xml
E:.
│ archs.py
│ dataset.py
│ deform_conv_v2.py
│ train.py
│ utils.py
│ visual_net.py
│
├─grad_cam
│ 2.png
│ 3.png
│
├─image
│ ├─1
│ │ 154.png
│ │ 2.png
│ │
│ ├─2
│ │ 143.png
│ │ 56.png
│ │
│ └─3
│ 13.png
│
├─models
│ └─ScaledMNISTNet_wDCNv2_c3-4
│ args.pkl
│ args.txt
│ log.csv
│ model.pth
│
└─__pycache__
archs.cpython-36.pyc
dataset.cpython-36.pyc
deform_conv_v2.cpython-36.pyc
utils.cpython-36.pyc
主函数(train.py)
python
import os
import argparse
import numpy as np
from tqdm import tqdm
import pandas as pd
import joblib
from collections import OrderedDict
from datetime import datetime
import torch
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from utils import *
from dataset import train_get_loader, test_get_loader
import archs as archs
arch_names = archs.__dict__.keys()
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--name', default=None,
help='model name: (default: arch+timestamp)')
parser.add_argument('--trainData', default=r"image",
help='train datasets: (default: image)')
parser.add_argument('--testData', default=r"image",
help='test datasets: (default: image)')
parser.add_argument('--batch_size', default=1, type=int,
help='batch number: (default: 4)')
parser.add_argument('--arch', '-a', metavar='ARCH', default='ScaledMNISTNet',
choices=arch_names,
help='model architecture: ' +
' | '.join(arch_names) +
' (default: ScaledMNISTNet)')
parser.add_argument('--deform', default=True, type=str2bool,
help='use deform conv')
parser.add_argument('--modulation', default=True, type=str2bool,
help='use modulated deform conv')
parser.add_argument('--min-deform-layer', default=3, type=int,
help='minimum number of layer using deform conv')
parser.add_argument('--epochs', default=100, type=int, metavar='N',
help='number of total epochs to run')
parser.add_argument('--optimizer', default='SGD',
choices=['Adam', 'SGD'],
help='loss: ' +
' | '.join(['Adam', 'SGD']) +
' (default: Adam)')
parser.add_argument('--lr', '--learning-rate', default=1e-2, type=float,
metavar='LR', help='initial learning rate')
parser.add_argument('--momentum', default=0.5, type=float,
help='momentum')
parser.add_argument('--weight-decay', default=1e-4, type=float,
help='weight decay')
parser.add_argument('--nesterov', default=False, type=str2bool,
help='nesterov')
args = parser.parse_args()
return args
def train(args, train_loader, model, criterion, optimizer, epoch, scheduler=None):
losses = AverageMeter()
scores = AverageMeter()
model.train()
accumulation_steps = 4
for i, (input, target) in tqdm(enumerate(train_loader), total=len(train_loader)):
input = input.cuda()
target = target.cuda()
output = model(input)
loss = criterion(output, target)
loss = loss / accumulation_steps
loss.backward()
acc = accuracy(output, target)[0]
losses.update(loss.item(), input.size(0))
scores.update(acc.item(), input.size(0))
# compute gradient and do optimizing step
if (i+1)%accumulation_steps==0:
optimizer.step()
optimizer.zero_grad()
log = OrderedDict([
('loss', losses.avg),
('acc', scores.avg),
])
return log
def validate(args, val_loader, model, criterion):
losses = AverageMeter()
scores = AverageMeter()
# switch to evaluate mode
model.eval()
with torch.no_grad():
for i, (input, target) in tqdm(enumerate(val_loader), total=len(val_loader)):
input = input.cuda()
target = target.cuda()
output = model(input)
loss = criterion(output, target)
acc = accuracy(output, target)[0]
losses.update(loss.item(), input.size(0))
scores.update(acc.item(), input.size(0))
log = OrderedDict([
('loss', losses.avg),
('acc', scores.avg),
])
return log
def main():
args = parse_args()
if args.name is None:
args.name = '%s' %args.arch
if args.deform:
args.name += '_wDCN'
if args.modulation:
args.name += 'v2'
args.name += '_c%d-4' %args.min_deform_layer
if not os.path.exists('models/%s' %args.name):
os.makedirs('models/%s' %args.name)
print('Config -----')
for arg in vars(args):
print('%s: %s' %(arg, getattr(args, arg)))
print('------------')
with open('models/%s/args.txt' %args.name, 'w') as f:
for arg in vars(args):
print('%s: %s' %(arg, getattr(args, arg)), file=f)
joblib.dump(args, 'models/%s/args.pkl' %args.name)
criterion = nn.CrossEntropyLoss().cuda()
cudnn.benchmark = True
train_set = train_get_loader(args.trainData, args.batch_size)
test_set = test_get_loader(args.testData, args.batch_size)
num_classes = 3
# create model
model = archs.__dict__[args.arch](args.deform, args.min_deform_layer, args.modulation, num_classes)
model = model.cuda()
print(model)
if args.optimizer == 'Adam':
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr)
elif args.optimizer == 'SGD':
optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr,
momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov)
log = pd.DataFrame(index=[], columns=[
'epoch', 'lr', 'loss', 'acc', 'val_loss', 'val_acc'
])
best_acc = 0
for epoch in range(args.epochs):
print('Epoch [%d/%d]' %(epoch, args.epochs))
# train for one epoch
train_log = train(args, train_set, model, criterion, optimizer, epoch)
# evaluate on validation set
val_log = validate(args, test_set, model, criterion)
print('loss %.4f - acc %.4f - val_loss %.4f - val_acc %.4f'
%(train_log['loss'], train_log['acc'], val_log['loss'], val_log['acc']))
tmp = pd.Series([
epoch,
1e-1,
train_log['loss'],
train_log['acc'],
val_log['loss'],
val_log['acc'],
], index=['epoch', 'lr', 'loss', 'acc', 'val_loss', 'val_acc'])
log = log.append(tmp, ignore_index=True)
log.to_csv('models/%s/log.csv' %args.name, index=False)
if train_log['acc'] >= best_acc:
torch.save(model.state_dict(), 'models/%s/model.pth' %args.name)
best_acc = train_log['acc']
print("=> saved best model")
print("best val_acc: %f" %best_acc)
if __name__ == '__main__':
main()
加载数据(dataset.py)
python
from torchvision import transforms
from torch.utils.data import Dataset
from torch.utils import data
import cv2
import numpy as np
import imgaug.augmenters as iaa
import os
from PIL import Image
sometimes = lambda aug: iaa.Sometimes(0.5, aug)
seq = iaa.Sequential([
iaa.Fliplr(0.5), # 对50%的图像进行上下翻转
iaa.Flipud(0.5), # 对50%的图像做镜像翻转
#OneOf中选一种算术运算
sometimes(iaa.OneOf([
iaa.Multiply(mul=(0.8,1.2),per_channel=False), # 像素相乘系数
iaa.Add(value=(-20,20),per_channel=False), # 像素加减
iaa.Cutout(size=0.2), # 随机裁剪区域,使用灰度填充0.05为比例
iaa.Dropout(p=(0.0, 0.5), per_channel=False) # 随机移除几个区域,用黑色填充
])),
#OneOf中选一种形状变化
sometimes(iaa.OneOf([
iaa.Affine(
scale={"x": (0.8, 1.2), "y": (0.8, 1.2)},#图像缩放为80%到120%之间
translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)}, #平移±20
rotate=(-5, 5), #旋转±45度之间
shear=(-5, 5), #剪切变换±16度,(矩形变平行四边形)
order=[0, 1], #使用最邻近差值或者双线性差值
cval=(0, 255), #全白全黑填充
mode="edge", #定义填充图像外区域的方法
fit_output=False,#是否保持边缘丢失
),
iaa.PiecewiseAffine(scale=(0,0.04)), #局部仿射变换
iaa.ElasticTransformation(alpha=(0,40),sigma=(4,8)), #使用位移场移动像素变换
iaa.PerspectiveTransform(scale=(0,0.06)) #随机四点透视变换
])),
#OneOf中选一种模糊方法
sometimes(iaa.OneOf([
iaa.GaussianBlur(sigma=2.0), # 高斯模糊
iaa.AverageBlur(k=(2, 7)), # 均值模糊
iaa.MedianBlur(k=(3, 11)), # 种植模糊
iaa.MotionBlur(k=(3, 7), angle=(0, 360)) # 运动模糊
])),
#OneOf中选一种边缘方法
sometimes(iaa.OneOf([
iaa.Sharpen(alpha=(0, 1.0), lightness=(0.75, 1.5)),
iaa.Emboss(alpha=(0, 1.0), strength=(0, 2.0)),
iaa.EdgeDetect(alpha=(0,0.75)),
iaa.DirectedEdgeDetect(
alpha=(0, 0.7), direction=(0.0, 1.0)
)# 增强特定方向上的边缘
])),
#OneOf中选一种对比度增强方法
sometimes(iaa.OneOf([
iaa.HistogramEqualization(), #直方图均衡
iaa.GammaContrast(gamma=(0.7, 1.7)), #使用伽马函数改变对比度
iaa.SigmoidContrast(gain=(5,6)), #使用sigmoid函数改变对比度
iaa.AllChannelsCLAHE(clip_limit=(0.1,8)) #对比受限的自适应直方图
])),
],
random_order=True # 随机的顺序把这些操作用在图像上
)
"""Custom Dataset compatible with prebuilt DataLoader."""
class Dataset(Dataset):
def __init__(self, ImgDir, transform):
self.image_paths = []
for root,dirs,files in os.walk(ImgDir):
for fs in files:
if fs.endswith(".png"):
self.image_paths.append(os.path.join(root, fs))
self.image_paths.sort()
self.transform = transform
def __getitem__(self, index):
"""Reads an image from a file and preprocesses it and returns."""
image_path = self.image_paths[index]
image = np.array(Image.open(image_path).convert("RGB").resize((256,256)))
image = seq.augment_image(image)
image = image.copy()
label = int(image_path.rsplit("\\",3)[1])-1
if self.transform is not None:
trans_image = self.transform(image)
trans_label = label
else:
trans_image = image
trans_label = label
return trans_image, trans_label
def __len__(self):
"""Returns the total number of image files."""
return len(self.image_paths)
def train_get_loader(ImgDir, batch_size):
"""Builds and returns Dataloader."""
transform = transforms.Compose([transforms.ToTensor()])
dataset = Dataset(ImgDir, transform)
data_loader = data.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, drop_last=True, num_workers=0)
return data_loader
def test_get_loader(ImgDir, batch_size):
"""Builds and returns Dataloader."""
transform = transforms.Compose([transforms.ToTensor()])
dataset = Dataset(ImgDir, transform)
data_loader = data.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, drop_last=True, num_workers=0)
return data_loader
模型结构(archs.py)
python
# -*- coding: utf-8 -*-
import numpy as np
from torch import nn
from torch.nn import functional as F
from deform_conv_v2 import *
class ScaledMNISTNet(nn.Module):
def __init__(self, deform, min_deform_layer, modulation, num_classes):
super().__init__()
self.relu = nn.ReLU(inplace=True)
self.pool = nn.MaxPool2d((2, 2))
self.avg_pool = nn.AdaptiveAvgPool2d(1)
features = []
inplanes = 3
outplanes = 32
for i in range(6):
if deform and min_deform_layer <= i+1:
features.append(DeformConv2d(inplanes, outplanes, 3, padding=1, bias=False, modulation=modulation))
else:
features.append(nn.Conv2d(inplanes, outplanes, 3, padding=1, bias=False))
features.append(nn.BatchNorm2d(outplanes))
features.append(self.relu)
if i < 5:
features.append(self.pool)
inplanes = outplanes
outplanes *= 2
self.features = nn.Sequential(*features)
self.fc = nn.Linear(1024, num_classes)
def forward(self, input):
x = self.features(input)
x = self.avg_pool(x)
x = x.view(x.shape[0], -1)
output = self.fc(x)
return output
可形变卷积结构(deform_conv_v2.py)
python
import torch
from torch import nn
class DeformConv2d(nn.Module):
def __init__(self, inc, outc, kernel_size=3, padding=1, stride=1, bias=None, modulation=False):
"""
Args:
modulation (bool, optional): If True, Modulated Defomable Convolution (Deformable ConvNets v2).
"""
super(DeformConv2d, self).__init__()
self.kernel_size = kernel_size
self.padding = padding
self.stride = stride
self.zero_padding = nn.ZeroPad2d(padding)
self.conv = nn.Conv2d(inc, outc, kernel_size=kernel_size, stride=kernel_size, bias=bias)
self.p_conv = nn.Conv2d(inc, 2*kernel_size*kernel_size, kernel_size=3, padding=1, stride=stride)
nn.init.constant_(self.p_conv.weight, 0)
self.p_conv.register_backward_hook(self._set_lr)
self.modulation = modulation
if modulation:
self.m_conv = nn.Conv2d(inc, kernel_size*kernel_size, kernel_size=3, padding=1, stride=stride)
nn.init.constant_(self.m_conv.weight, 0)
self.m_conv.register_backward_hook(self._set_lr)
@staticmethod
def _set_lr(module, grad_input, grad_output):
grad_input = (grad_input[i] * 0.1 for i in range(len(grad_input)))
grad_output = (grad_output[i] * 0.1 for i in range(len(grad_output)))
def forward(self, x):
offset = self.p_conv(x)
if self.modulation:
m = torch.sigmoid(self.m_conv(x))
dtype = offset.data.type()
ks = self.kernel_size
N = offset.size(1) // 2
if self.padding:
x = self.zero_padding(x)
# (b, 2N, h, w)
p = self._get_p(offset, dtype)
# (b, h, w, 2N)
p = p.contiguous().permute(0, 2, 3, 1)
q_lt = p.detach().floor()
q_rb = q_lt + 1
q_lt = torch.cat([torch.clamp(q_lt[..., :N], 0, x.size(2)-1), torch.clamp(q_lt[..., N:], 0, x.size(3)-1)], dim=-1).long()
q_rb = torch.cat([torch.clamp(q_rb[..., :N], 0, x.size(2)-1), torch.clamp(q_rb[..., N:], 0, x.size(3)-1)], dim=-1).long()
q_lb = torch.cat([q_lt[..., :N], q_rb[..., N:]], dim=-1)
q_rt = torch.cat([q_rb[..., :N], q_lt[..., N:]], dim=-1)
# clip p
p = torch.cat([torch.clamp(p[..., :N], 0, x.size(2)-1), torch.clamp(p[..., N:], 0, x.size(3)-1)], dim=-1)
# bilinear kernel (b, h, w, N)
g_lt = (1 + (q_lt[..., :N].type_as(p) - p[..., :N])) * (1 + (q_lt[..., N:].type_as(p) - p[..., N:]))
g_rb = (1 - (q_rb[..., :N].type_as(p) - p[..., :N])) * (1 - (q_rb[..., N:].type_as(p) - p[..., N:]))
g_lb = (1 + (q_lb[..., :N].type_as(p) - p[..., :N])) * (1 - (q_lb[..., N:].type_as(p) - p[..., N:]))
g_rt = (1 - (q_rt[..., :N].type_as(p) - p[..., :N])) * (1 + (q_rt[..., N:].type_as(p) - p[..., N:]))
# (b, c, h, w, N)
x_q_lt = self._get_x_q(x, q_lt, N)
x_q_rb = self._get_x_q(x, q_rb, N)
x_q_lb = self._get_x_q(x, q_lb, N)
x_q_rt = self._get_x_q(x, q_rt, N)
# (b, c, h, w, N)
x_offset = g_lt.unsqueeze(dim=1) * x_q_lt + \
g_rb.unsqueeze(dim=1) * x_q_rb + \
g_lb.unsqueeze(dim=1) * x_q_lb + \
g_rt.unsqueeze(dim=1) * x_q_rt
# modulation
if self.modulation:
m = m.contiguous().permute(0, 2, 3, 1)
m = m.unsqueeze(dim=1)
m = torch.cat([m for _ in range(x_offset.size(1))], dim=1)
x_offset *= m
x_offset = self._reshape_x_offset(x_offset, ks)
out = self.conv(x_offset)
return out
def _get_p_n(self, N, dtype):
p_n_x, p_n_y = torch.meshgrid(
torch.arange(-(self.kernel_size-1)//2, (self.kernel_size-1)//2+1),
torch.arange(-(self.kernel_size-1)//2, (self.kernel_size-1)//2+1))
# (2N, 1)
p_n = torch.cat([torch.flatten(p_n_x), torch.flatten(p_n_y)], 0)
p_n = p_n.view(1, 2*N, 1, 1).type(dtype)
return p_n
def _get_p_0(self, h, w, N, dtype):
p_0_x, p_0_y = torch.meshgrid(
torch.arange(1, h*self.stride+1, self.stride),
torch.arange(1, w*self.stride+1, self.stride))
p_0_x = torch.flatten(p_0_x).view(1, 1, h, w).repeat(1, N, 1, 1)
p_0_y = torch.flatten(p_0_y).view(1, 1, h, w).repeat(1, N, 1, 1)
p_0 = torch.cat([p_0_x, p_0_y], 1).type(dtype)
return p_0
def _get_p(self, offset, dtype):
N, h, w = offset.size(1)//2, offset.size(2), offset.size(3)
# (1, 2N, 1, 1)
p_n = self._get_p_n(N, dtype)
# (1, 2N, h, w)
p_0 = self._get_p_0(h, w, N, dtype)
p = p_0 + p_n + offset
return p
def _get_x_q(self, x, q, N):
b, h, w, _ = q.size()
padded_w = x.size(3)
c = x.size(1)
# (b, c, h*w)
x = x.contiguous().view(b, c, -1)
# (b, h, w, N)
index = q[..., :N]*padded_w + q[..., N:] # offset_x*w + offset_y
# (b, c, h*w*N)
index = index.contiguous().unsqueeze(dim=1).expand(-1, c, -1, -1, -1).contiguous().view(b, c, -1)
x_offset = x.gather(dim=-1, index=index).contiguous().view(b, c, h, w, N)
return x_offset
@staticmethod
def _reshape_x_offset(x_offset, ks):
b, c, h, w, N = x_offset.size()
x_offset = torch.cat([x_offset[..., s:s+ks].contiguous().view(b, c, h, w*ks) for s in range(0, N, ks)], dim=-1)
x_offset = x_offset.contiguous().view(b, c, h*ks, w*ks)
return x_offset
计算配置函数(utils.py)
python
import random
import math
from PIL import Image
import numpy as np
import torch
def str2bool(v):
if v.lower() in ['true', 1]:
return True
elif v.lower() in ['false', 0]:
return False
else:
assert('Boolean value expected.')
def count_params(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)
class AverageMeter(object):
"""Computes and stores the average and current value"""
def __init__(self):
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
self.avg = self.sum / self.count
def accuracy(output, target, topk=(1,)):
"""Computes the accuracy over the k top predictions for the specified values of k"""
with torch.no_grad():
maxk = max(topk)
batch_size = target.size(0)
_, pred = output.topk(maxk, 1, True, True)
pred = pred.t()
correct = pred.eq(target.view(1, -1).expand_as(pred))
res = []
for k in topk:
correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
res.append(correct_k.mul_(100.0 / batch_size))
return res
可视化函数(visual_net.py)
python
from PIL import Image
import torch
import torchvision.transforms as transforms
import numpy as np
import cv2
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
from deform_conv_v2 import *
class ScaledMNISTNet(nn.Module):
def __init__(self, deform, min_deform_layer, modulation, num_classes):
super(ScaledMNISTNet, self).__init__()
self.relu = nn.ReLU(inplace=True)
self.pool = nn.MaxPool2d((2, 2))
self.avg_pool = nn.AdaptiveAvgPool2d(1)
features = []
inplanes = 3
outplanes = 32
for i in range(6):
if deform and min_deform_layer <= i+1:
features.append(DeformConv2d(inplanes, outplanes, 3, padding=1, bias=False, modulation=modulation))
else:
features.append(nn.Conv2d(inplanes, outplanes, 3, padding=1, bias=False))
features.append(nn.BatchNorm2d(outplanes))
features.append(self.relu)
if i < 5:
features.append(self.pool)
inplanes = outplanes
outplanes *= 2
self.features = nn.Sequential(*features)
self.fc = nn.Linear(1024, num_classes)
def forward(self, input):
x = self.features(input)
x = self.avg_pool(x)
x = x.view(x.shape[0], -1)
output = self.fc(x)
return output
def kernel_inv_map(vis_attr, target_point, map_h, map_w):
pos_shift = [vis_attr['dilation'] * 0 - vis_attr['pad'],
vis_attr['dilation'] * 1 - vis_attr['pad'],
vis_attr['dilation'] * 2 - vis_attr['pad']]
source_point = []
for idx in range(vis_attr['filter_size']**2):
cur_source_point = np.array([target_point[0] + pos_shift[idx // 3],
target_point[1] + pos_shift[idx % 3]])
if cur_source_point[0] < 0 or cur_source_point[1] < 0 \
or cur_source_point[0] > map_h - 1 or cur_source_point[1] > map_w - 1:
continue
source_point.append(cur_source_point.astype('f'))
return source_point
def offset_inv_map(source_points, offset):
for idx, _ in enumerate(source_points):
source_points[idx][0] += offset[2*idx]
source_points[idx][1] += offset[2*idx + 1]
return source_points
def get_bottom_position(vis_attr, top_points, all_offset):
map_h = all_offset[0].shape[2]
map_w = all_offset[0].shape[3]
for level in range(vis_attr['plot_level']):
source_points = []
for idx, cur_top_point in enumerate(top_points):
cur_top_point = np.round(cur_top_point)
if cur_top_point[0] < 0 or cur_top_point[1] < 0 \
or cur_top_point[0] > map_h-1 or cur_top_point[1] > map_w-1:
continue
cur_source_point = kernel_inv_map(vis_attr, cur_top_point, map_h, map_w)
cur_offset = np.squeeze(all_offset[level][:, :, int(cur_top_point[0]), int(cur_top_point[1])])
cur_source_point = offset_inv_map(cur_source_point, cur_offset)
source_points = source_points + cur_source_point
top_points = source_points
return source_points
def plot_according_to_point(vis_attr, im, source_points, map_h, map_w, color=[255,0,0]):
plot_area = vis_attr['plot_area']
for idx, cur_source_point in enumerate(source_points):
y = np.round((cur_source_point[0] + 0.5) * im.shape[0] / map_h).astype('i')
x = np.round((cur_source_point[1] + 0.5) * im.shape[1] / map_w).astype('i')
if x < 0 or y < 0 or x > im.shape[1]-1 or y > im.shape[0]-1:
continue
y = min(y, im.shape[0] - vis_attr['plot_area'] - 1)
x = min(x, im.shape[1] - vis_attr['plot_area'] - 1)
y = max(y, vis_attr['plot_area'])
x = max(x, vis_attr['plot_area'])
im[y-plot_area:y+plot_area+1, x-plot_area:x+plot_area+1, :] = np.tile(
np.reshape(color, (1, 1, 3)), (2*plot_area+1, 2*plot_area+1, 1)
)
return im
def show_dconv_offset(im, all_offset, step=[2, 2], filter_size=3,
dilation=2, pad=2, plot_area=2, plot_level=2):
vis_attr = {'filter_size': filter_size, 'dilation': dilation, 'pad': pad,
'plot_area': plot_area, 'plot_level': plot_level}
map_h = all_offset[0].shape[2]
map_w = all_offset[0].shape[3]
step_h = step[0]
step_w = step[1]
start_h = np.round(step_h // 2)
start_w = np.round(step_w // 2)
plt.figure()
for im_h in range(start_h, map_h, step_h):
for im_w in range(start_w, map_w, step_w):
target_point = np.array([im_h, im_w])
source_y = np.round(target_point[0] * im.shape[0] / map_h)
source_x = np.round(target_point[1] * im.shape[1] / map_w)
if source_y < plot_area or source_x < plot_area \
or source_y >= im.shape[0] - plot_area or source_x >= im.shape[1] - plot_area:
continue
cur_im = np.copy(im)
source_points = get_bottom_position(vis_attr, [target_point], all_offset)
cur_im = plot_according_to_point(vis_attr, cur_im, source_points, map_h, map_w)
cur_im[int(source_y-plot_area):int(source_y+plot_area+1), int(source_x-plot_area):int(source_x+plot_area+1), :] = \
np.tile(np.reshape([0, 255, 0], (1, 1, 3)), (2*plot_area+1, 2*plot_area+1, 1))
plt.axis("off")
plt.imshow(cur_im)
plt.show(block=False)
plt.pause(0.01)
plt.clf()
def draw_CAM(model,img_path,save_path,resize=256,isSave=True,isShow=False):
# 图像加载&预处理
img=Image.open(img_path).convert('RGB')
loader = transforms.Compose([transforms.Resize(size=(resize,resize)),transforms.ToTensor()])
img = loader(img).unsqueeze(0) # unsqueeze(0)在第0维增加一个维度
# 获取模型输出的feature/score
model.eval() # 测试模式,不启用BatchNormalization和Dropout
feature=model.features(img)
output=model.fc(model.avg_pool(feature).view(1, -1))
# 预测得分最高的那一类对应的输出score
pred = torch.argmax(output).item()
pred_class = output[:, pred]
# 记录梯度值
def hook_grad(grad):
global feature_grad
feature_grad=grad
feature.register_hook(hook_grad)
# 计算梯度
pred_class.backward()
grads=feature_grad # 获取梯度
pooled_grads = torch.nn.functional.adaptive_avg_pool2d(grads, (1, 1)) # adaptive_avg_pool2d自适应平均池化函数,输出大小都为(1,1)
# 此处batch size默认为1,所以去掉了第0维(batch size维)
pooled_grads = pooled_grads[0] # shape为[batch,通道,size,size],此处batch为1,所以直接取[0]即取第一个batch的元素,就取到了每个batch内的所有元素
features = feature[0] # 取【0】原因同上
########################## 导数(权重)乘以相应元素
for i in range(len(features)):
features[i, ...] *= pooled_grads[i, ...]
##########################
# 绘制热力图
heatmap = features.detach().numpy()
heatmap = np.mean(heatmap, axis=0) # axis=0,对各列求均值,返回1*n
heatmap = np.maximum(heatmap, 0)
heatmap /= np.max(heatmap)
# 可视化原始热力图
if isShow:
plt.matshow(heatmap)
plt.show()
img = Image.open(img_path)
img = np.array(img)
heatmap = cv2.resize(heatmap, (img.shape[1], img.shape[0])) # 将热力图的大小调整为与原始图像相同
heatmap = np.uint8(heatmap) # 将热力图转换为RGB格式
heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET) # 将热力图应用于原始图像
superimposed_img = heatmap * 0.2 + img # 这里的0.4是热力图强度因子
# 将图像保存到硬盘
if isSave:
superimposed_img = np.array(superimposed_img).astype(np.uint8)
superimposed_img = Image.fromarray(superimposed_img)
superimposed_img.save(save_path)
# 展示图像
if isShow:
plt.imshow(superimposed_img)
def draw_red(model,img_path,save_path,resize=512,isSave=True,isShow=False):
# 图像加载&预处理
img0=Image.open(img_path).convert('RGB')
loader = transforms.Compose([transforms.Resize(size=(resize,resize)),transforms.ToTensor()])
img = loader(img0).unsqueeze(0) # unsqueeze(0)在第0维增加一个维度
# 获取模型输出的feature/score
model.eval() # 测试模式,不启用BatchNormalization和Dropout
feature1=model.features[:19](img)
feature2=model.features[:15](img)
feature3=model.features[:11](img)
show_dconv_offset(np.array(img0), [feature1, feature2])
model = ScaledMNISTNet(True, 3, True, 3)
print(model)
model.load_state_dict(torch.load(r'.\models\ScaledMNISTNet_wDCNv2_c3-4\model.pth'),strict=False)
draw_red(model,r'.\grad_cam\2.png',r'.\grad_cam\3.png',isSave=True,isShow=True)
draw_CAM(model,r'.\grad_cam\2.png',r'.\grad_cam\3.png',isSave=True,isShow=True)
保存目录
image:数据集存放路径。
models:模型存放路径以及日志存放路径。
grad_cam:可视化图存放路径。