深度学习J6周 ResNeXt-50实战解析

本周任务:

1.阅读ResNeXt论文,了解作者的构建思路

2.对比之前介绍的ResNet50V2、DenseNet算法

3.复现ResNeXt-50算法

一、模型结构

ResNeXt由何凯明团队,2017年CVPR会议上提出新型图像分类网络。它是ResNet升级版,在ResNet的基础上,引入cardinality概念。

在论文中,作者提出当时普遍存在的一个问题,如果要提高模型准确率,往往采取加深网络或者加宽网络的方法。但网络设计的难度和计算开销也增加了。为了一点精度的提升往往付出更大的代价。因此,需要在不额外增加计算代价的情况下,提升网络精度。

左边--ResNet,输入的具有256个通道的特征经过1*1卷积压缩到64个通道,之后3*3的卷积核用于处理特征,经1*1卷积扩大通道数与原特征残差连接后输出。

右边--ResNeXt,输入的具有256个通道的特征被分为32个组,每组被压缩到4个通道后处理,32个组相加后与原特征残差连接后输出。cardinality指的是一个block中所具有相同的分支的数目。

二、分组卷积

1.ResNeXt采用分组卷积:将特征图分为不同的组,再对每组特征图分别进行卷积,有效降低计算量。

2.分组卷积中,每个卷积核只处理部分通道,如下图,红色卷积核只处理红色通道,绿色卷积核只处理绿色通道,黄色卷积核只处理黄色通道。此时,每个卷积核有2个通道,每个卷积核生成一张特征图。

三、代码

学习于深度学习第J6周:ResNeXt-50实战解析_resnext50-CSDN博客

1.前期准备

python 复制代码
#配置GPU
import os, PIL, random, pathlib
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision
from torchvision import transforms, datasets
import torch.nn.functional as F

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(device)

#导入数据集
data_dir = './data/'
data_dir = pathlib.Path(data_dir)

data_paths = list(data_dir.glob('*'))
classeNames = [str(path).split("\\")[1] for path in data_paths]
print(classeNames)

image_count = len(list(data_dir.glob('*/*')))
print("图片总数为:", image_count)

#数据预处理+划分数据集
train_transforms = transforms.Compose([
    transforms.Resize([224, 224]),  # 将输入图片resize成统一尺寸
    # transforms.RandomHorizontalFlip(), # 随机水平翻转
    transforms.ToTensor(),  # 将PIL Image或numpy.ndarray转换为tensor,并归一化到[0,1]之间
    transforms.Normalize(  # 标准化处理-->转换为标准正太分布(高斯分布),使模型更容易收敛
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225])  # 其中 mean=[0.485,0.456,0.406]与std=[0.229,0.224,0.225] 从数据集中随机抽样计算得到的。
])

test_transform = transforms.Compose([
    transforms.Resize([224, 224]),  # 将输入图片resize成统一尺寸
    transforms.ToTensor(),  # 将PIL Image或numpy.ndarray转换为tensor,并归一化到[0,1]之间
    transforms.Normalize(  # 标准化处理-->转换为标准正太分布(高斯分布),使模型更容易收敛
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225])  # 其中 mean=[0.485,0.456,0.406]与std=[0.229,0.224,0.225] 从数据集中随机抽样计算得到的。
])

total_data = datasets.ImageFolder("./data/", transform=train_transforms)
print(total_data.class_to_idx)

train_size = int(0.8 * len(total_data))
test_size = len(total_data) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(total_data, [train_size, test_size])

batch_size = 32
train_dl = torch.utils.data.DataLoader(train_dataset,
                                       batch_size=batch_size,
                                       shuffle=True,
                                       num_workers=0)
test_dl = torch.utils.data.DataLoader(test_dataset,
                                      batch_size=batch_size,
                                      shuffle=True,
                                      num_workers=0)
for X, y in test_dl:
    print("Shape of X [N, C, H, W]: ", X.shape)
    print("Shape of y: ", y.shape, y.dtype)
    break

结果:

2.模型

python 复制代码
class BN_Conv2d(nn.Module):
    """
    BN_CONV_RELU
    """
 
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding, dilation=1, groups=1, bias=False):
        super(BN_Conv2d, self).__init__()
        self.seq = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride,
                      padding=padding, dilation=dilation, groups=groups, bias=bias),
            nn.BatchNorm2d(out_channels)
        )
 
    def forward(self, x):
        return F.relu(self.seq(x))
 
class ResNeXt_Block(nn.Module):
    """
    ResNeXt block with group convolutions
    """
 
    def __init__(self, in_chnls, cardinality, group_depth, stride):
        super(ResNeXt_Block, self).__init__()
        self.group_chnls = cardinality * group_depth
        self.conv1 = BN_Conv2d(in_chnls, self.group_chnls, 1, stride=1, padding=0)
        self.conv2 = BN_Conv2d(self.group_chnls, self.group_chnls, 3, stride=stride, padding=1, groups=cardinality)
        self.conv3 = nn.Conv2d(self.group_chnls, self.group_chnls*2, 1, stride=1, padding=0)
        self.bn = nn.BatchNorm2d(self.group_chnls*2)
        self.short_cut = nn.Sequential(
            nn.Conv2d(in_chnls, self.group_chnls*2, 1, stride, 0, bias=False),
            nn.BatchNorm2d(self.group_chnls*2)
        )
 
    def forward(self, x):
        out = self.conv1(x)
        out = self.conv2(out)
        out = self.bn(self.conv3(out))
        out += self.short_cut(x)
        return F.relu(out)
 
class ResNeXt(nn.Module):
    """
    ResNeXt builder
    """
 
    def __init__(self, layers: object, cardinality, group_depth, num_classes) -> object:
        super(ResNeXt, self).__init__()
        self.cardinality = cardinality
        self.channels = 64
        self.conv1 = BN_Conv2d(3, self.channels, 7, stride=2, padding=3)
        d1 = group_depth
        self.conv2 = self.___make_layers(d1, layers[0], stride=1)
        d2 = d1 * 2
        self.conv3 = self.___make_layers(d2, layers[1], stride=2)
        d3 = d2 * 2
        self.conv4 = self.___make_layers(d3, layers[2], stride=2)
        d4 = d3 * 2
        self.conv5 = self.___make_layers(d4, layers[3], stride=2)
        self.fc = nn.Linear(self.channels, num_classes)   # 224x224 input size
 
    def ___make_layers(self, d, blocks, stride):
        strides = [stride] + [1] * (blocks-1)
        layers = []
        for stride in strides:
            layers.append(ResNeXt_Block(self.channels, self.cardinality, d, stride))
            self.channels = self.cardinality*d*2
        return nn.Sequential(*layers)
 
    def forward(self, x):
        out = self.conv1(x)
        out = F.max_pool2d(out, 3, 2, 1)
        out = self.conv2(out)
        out = self.conv3(out)
        out = self.conv4(out)
        out = self.conv5(out)
        out = F.avg_pool2d(out, 7)
        out = out.view(out.size(0), -1)
        out = F.softmax(self.fc(out),dim=1)
        return out
python 复制代码
# 定义完成,测试一下
model = ResNeXt([3, 4, 6, 3], 32, 4, 4)
model.to(device)
 
# 统计模型参数量以及其他指标
import torchsummary as summary
summary.summary(model, (3, 224, 224))

结果:

3.训练运行

python 复制代码
 
# 训练循环
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)  # 训练集的大小
    num_batches = len(dataloader)  # 批次数目, (size/batch_size,向上取整)
 
    train_loss, train_acc = 0, 0  # 初始化训练损失和正确率
 
    for X, y in dataloader:  # 获取图片及其标签
        X, y = X.to(device), y.to(device)
 
        # 计算预测误差
        pred = model(X)  # 网络输出
        loss = loss_fn(pred, y)  # 计算网络输出和真实值之间的差距,targets为真实值,计算二者差值即为损失
 
        # 反向传播
        optimizer.zero_grad()  # grad属性归零
        loss.backward()  # 反向传播
        optimizer.step()  # 每一步自动更新
 
        # 记录acc与loss
        train_acc += (pred.argmax(1) == y).type(torch.float).sum().item()
        train_loss += loss.item()
 
    train_acc /= size
    train_loss /= num_batches
 
    return train_acc, train_loss
 
 
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)  # 测试集的大小
    num_batches = len(dataloader)  # 批次数目
    test_loss, test_acc = 0, 0
 
    # 当不进行训练时,停止梯度更新,节省计算内存消耗
    with torch.no_grad():
        for imgs, target in dataloader:
            imgs, target = imgs.to(device), target.to(device)
 
            # 计算loss
            target_pred = model(imgs)
            loss = loss_fn(target_pred, target)
 
            test_loss += loss.item()
            test_acc += (target_pred.argmax(1) == target).type(torch.float).sum().item()
 
    test_acc /= size
    test_loss /= num_batches
 
    return test_acc, test_loss
python 复制代码
 
import copy
 
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
loss_fn = nn.CrossEntropyLoss()  # 创建损失函数
 
epochs = 10
 
train_loss = []
train_acc = []
test_loss = []
test_acc = []
 
best_acc = 0  # 设置一个最佳准确率,作为最佳模型的判别指标
 
for epoch in range(epochs):
    # 更新学习率(使用自定义学习率时使用)
    # adjust_learning_rate(optimizer, epoch, learn_rate)
 
    model.train()
    epoch_train_acc, epoch_train_loss = train(train_dl, model, loss_fn, optimizer)
    # scheduler.step() # 更新学习率(调用官方动态学习率接口时使用)
 
    model.eval()
    epoch_test_acc, epoch_test_loss = test(test_dl, model, loss_fn)
 
    # 保存最佳模型到 best_model
    if epoch_test_acc > best_acc:
        best_acc = epoch_test_acc
        best_model = copy.deepcopy(model)
 
    train_acc.append(epoch_train_acc)
    train_loss.append(epoch_train_loss)
    test_acc.append(epoch_test_acc)
    test_loss.append(epoch_test_loss)
 
    # 获取当前的学习率
    lr = optimizer.state_dict()['param_groups'][0]['lr']
 
    template = ('Epoch:{:2d}, Train_acc:{:.1f}%, Train_loss:{:.3f}, Test_acc:{:.1f}%, Test_loss:{:.3f}, Lr:{:.2E}')
    print(template.format(epoch + 1, epoch_train_acc * 100, epoch_train_loss,
                          epoch_test_acc * 100, epoch_test_loss, lr))
 
# 保存最佳模型到文件中
PATH = './best_model.pth'  # 保存的参数文件名
torch.save(model.state_dict(), PATH)
 
print('Done')

结果:

4.打印训练图

python 复制代码
import matplotlib.pyplot as plt
# 隐藏警告
import warnings
 
warnings.filterwarnings("ignore")  # 忽略警告信息
plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False  # 用来正常显示负号
plt.rcParams['figure.dpi'] = 100  # 分辨率
 
epochs_range = range(epochs)
 
plt.figure(figsize=(12, 3))
plt.subplot(1, 2, 1)
 
plt.plot(epochs_range, train_acc, label='Training Accuracy')
plt.plot(epochs_range, test_acc, label='Test Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')
 
plt.subplot(1, 2, 2)
plt.plot(epochs_range, train_loss, label='Training Loss')
plt.plot(epochs_range, test_loss, label='Test Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

四、总结

1.读论文原文要花很长时间,但有讲义,就会快速知道论文的创新点是什么。

2.实验的流程已经很熟悉,现在就在慢慢学每一步的具体内容,争取下次能自己写出。

相关推荐
Coffeeee6 分钟前
帮你快速理解AI Agent之我想招个Android实习生
android·人工智能·agent
新新技术迷12 分钟前
AI聊天自动跟随滚动,附回到底部按钮
人工智能
先锋部队12 分钟前
用Web Worker解析AI返回的大文本不卡UI
人工智能
把你拉进白名单16 分钟前
8.OpenClaw源码解析——三层洋葱重试
人工智能·llm·agent
用户6324150317819 分钟前
拖文档进AI对话框解析,前端要处理哪些脏活
人工智能
姗姗来迟了25 分钟前
AI回答里的引用来源卡片,前端怎么做
人工智能
用户71062077334026 分钟前
Codex-端口配置错误排查案例(stream disconnected before completion)
人工智能
IT_陈寒1 小时前
JavaScript的默认参数挖坑实录,我掉进去了
前端·人工智能·后端
米小虾2 小时前
多Agent系统编排详解:从架构设计到代码实现
人工智能·agent
米小虾2 小时前
多Agent系统的编排:架构、协议与企业级应用
人工智能·agent