一 Residual Block实现
各变量含义
x:输入到残差块的特征图(输入张量)
out: 经过一系列卷积层,激活函数等处理后的输出,这就是残差部分f(x)
identity:恒等映射部分,公式中的x
二 残差连接的核心逻辑
#残差公式` `H(x)` `=` `F(x)` `+` `x`
`#代码实现`
`out` `=` `self.conv1(x)` `#out开始构建F(x)`
`out` `=` `self.bn1(out)`
`out` `=` `self.relu(out)`
`#更多层处理`
`out` `=` `self.conv2(out)`
`out` `=` `self.bn2(out)#out现在就是F(x)`
`identity` `=` `x` `#恒等映射x`
`if` `维度不匹配:#如果需要调整维度`
`identity` `=` `self.downsample(x)` `#通过1x1卷积调整x的维度`
`out` `+=` `identity` `#H(x)` `=` `F(x)` `+x`
`
三 关键点说明
1 identity就是x吗?
是的,在维度匹配时,identity=x 直接使用输入
维度不匹配时,identity=self.downsample(x) 调整后的x
2 out就是H(x)吗?
在执行 out += identity之前,out是F(x) 残差部分
在执行out += identity之后,out变成了H(x)最终输出
3 维度调整的作用
#示例:当输入通道数不等于输出通道数时`
`#假设x.shape` `=` `[batch, 64, 32, 32]`
`#out.shape` `=` `[batch, 128, 32, 32]` `#需要调整维度`
`#通过1x1卷积调整x的维度`
`if` `self.downsample` `is` `not` `None:` `#downsample` `通常是一个1x1卷积`
`identity` `= self.downsample(x)` `#现在` `identity.shape` `= [batch, 128, 32, 32]`
`out` `+=` `identity` `#可以相加了`
`
四 完整理解
#1` `identity` `就是x`
`#2` `out在相加前是F(x),相加后是H(x)`
`#3` `最终实现了H(x)` `=` `F(x)` `+ x` `的残差学习`
`
完整的残差网络模型示例
import` `torch`
`import` `torch.nn` `as` `nn`
`import` `torch.nn.functional` `as` `F`
`from` `torchsummary` `import` `summary` `#用于打印模型结构摘要`
`#基础残差块` `BasicBlock`
`class` `BasicBlock(nn.Module):`
`expansion` `=` `1` `#维度扩展系数,基础块中不扩展维度`
`def` `__init__(self, in_channels, out_channels, stride=1, downsample=None):`
`/*`
`args:in_channels输入通道数`
`out_channels` `输出通道数`
`stride` `步长,默认为1,大于1时会下采样特征图尺寸`
`downsample` `用于调整等映射维度的函数,默认为None`
`*/`
`super(BasicBlock, self).__init__()` `#调用父类nn.Module的初始化方法`
`#第一个3x3卷积层,可能改变通道数和空间尺寸。`
`self.conv1` `= nn.Conv2d(in_channels, out_channels, kernel_size=3,`
`stride=stride, padding=1, bias=False)` `#bias=False因为后面有BN层`
`#第一个批量归一化层`
`self.bn1` `=` `nn.BatchNorm2d(out_channels)`
`#ReLU激活函数,inplace=True表示原地操作,节省内存`
`self.relu` `=` `nn.ReLU(inplace=True)`
`#第二个3x3卷积层,保持通道数和空间尺寸不变` `stride=1`
`self.conv2` `= nn.Conv2d(`
`out_channels,` `out_channels,` `kernel_size=3,`
`stride=1,` `padding=1, bias` `= False`
`)`
`#第二个批量归一化层`
`self.bn2` `=` `nn.BatchNorm2d(out_channels)`
`#downsample函数:当需要调整恒等映射维度时使用`
`self.downsample` `= downsample`
`#步长参数保存`
`self.stride =` `stride`
`def` `forward(self, x):`
`identity = x` `#保存输入x,作为恒等映射部分`
`#主路径` `F(x)部分`
`#第一个卷积` `+` `BN` `+ ReLU`
`out =` `self.conv1(x)` `#3x3卷积`
`out` `=` `self.bn1(out)` `#批量归一化`
`out =` `self.relu(out)` `#ReLU激活`
`#第二个卷积` `+` `BN` `注意,这里没有ReLU,` `会放在残差相加后`
`out` `= self.conv2(out)` `#3x3卷积`
`out = self.bn2(out)` `#批量归一化`
`#如果需要调整恒等映射的维度`
`if self.downsample` `is not None:`
`identity` `= self.downsample(x)` `#通过1x1卷积调整的维度`
`#残差连接H(x)` `= F(x)` `+` `x`
`out` `+=` `identity` `#F(x)` `+` `identity`
`out =` `self.relu(out)` `#残差相加后再进行ReLU激活`
`return` `out` `#返回H(x)`
`#瓶颈残差块` `Bottleneck`
`class Bottleneck(nn.Module):`
`expansion` `= 4` `#维度扩展系数,瓶颈块中最终会扩展4倍`
`def __init__(self, in_channels, out_channels, stride=1, downsample=None):`
`super(Bottleneck, self).__init__()`
`#第一阶段:1x1卷积降维`
`self.conv1` `= nn.Conv2d(`
`in_channels,` `out_channels,` `kernel_size=1, bias=False`
`)`
`self.bn1 =` `nn.BatchNorm2d(out_channels)`
`#第二阶段:` `3x3卷积,可能改变空间尺寸`
`self.conv2` `=` `nn.Conv2d(`
`out_channels,` `out_channels,` `kernel_size=3,`
`stride=stride,` `padding=1,` `bias=False`
`)`
`self.bn2` `= nn.BatchNorm2d(out_channels)`
`#第三阶段` `1x1卷积升维,扩展4倍`
`self.conv3` `= nn.Conv2d(`
`out_channels,` `out_channels` `*` `self.expansion,`
`kernel_size=1,` `bias` `= False`
`)`
`self.bn3` `= nn.BatchNorm2d(out_channels * self.expansion)`
`self.relu =` `nn.ReLU(inplace=True)`
`self.downsample` `=` `downsample`
`self.stride` `= stride`
`def forward(self, x):`
`identity` `= x` `#保存恒等映射`
`#主路径,三个卷积层`
`out =` `self.conv1(x)` `#1x1卷积降维`
`out = self.bn1(out)` `#BN`
`out = self.relu(out)` `#ReLU`
`out = self.conv2(out)` `#3x3卷积`
`out` `=` `self.bn2(out)` `#BN`
`out = self.relu(out)` `#ReLU`
`out =` `self.conv3(out)` `#1x1卷积升维`
`out =` `self.bn3(out)` `#BN`
`#如果需要调整恒等映射维度`
`if self.downsample` `is not None:`
`identity` `=` `self.downsample(x)`
`#残差连接`
`out` `+=` `identity`
`out = self.relu(out)`
`return out`
`#ResNet主模型`
`class ResNet(nn.Module):`
`def __init__(self, block, layers, num_classes=1000):`
`/*`
`args:`
`block 残差块类型` `BasicBlock或Bottleneck`
`layers:` `列表,包含4个阶段的残差块数量,如[2,2,2,2,2]表示每个阶段2个块`
`num_classes` `分类任务的类别数`
`*/`
`super(ResNet, self).__init__()`
`self.in_channels` `= 64` `#初始通道数`
`#初始卷积层,处理输入图像`
`self.conv1` `= nn.Conv2d(`
`3,64,kernel_size=7,stirde=2,` `padding=3,` `bias=Flase`
`)`
`self.bn1` `= nn.BatchNorm2d(64)`
`self.relu = nn.ReLU(inplace=True)`
`#最大池化层,进一步下采样`
`self.maxpool =` `nn.MaxPool2d(kernel_size=3, stride=2, padding=1)`
`#构建4个残差阶段`
`#layer1:64通道,不改变空间尺寸`
`self.layer1` `= self._make_layer(block, 64, layers[0], stride=1)`
`#layer2:128通道,下采样` `空间尺寸减半`
`self.layer2 =` `self._make_layer(block, 128, layers[1], stride=2)`
`#layer3:` `256通道,下采样`
`self.layer3 =` `self._make_layer(block, 256, layers[2], stride=2)`
`#layer4:` `512通道,下采样`
`self.layer4 = self._make_layer(block, 512, layers[3], stride=2)`
`#分类头`
`self.avgpool` `= nn.AdaptiveAvgPool2d(1, 1)` `#自适应平均池化道1x1`
`#全连接层,输入维度是512` `* expansion,输出是类别数`
`self.fc = nn.Linear(512 * block.expansion, num_classes)`
`#初始化权重`
`self._initialize_weights()`
`def _make_layer(self, block, out_channels, blocks, stride=1):`
`//构建一个残差阶段,包含多个连续的残差块`
`downsample=None`
`#判断是否需要调整恒等映射维度,两种情况`
`#1` `stride` `!= 1需要下采样,改变空间尺寸`
`#2` `输入输出通道数不匹配,当block.expansion` `!= 1或通道数变化时`
`if` `stride` `!=` `1` `or self.in_channels` `!= out_channels` `* block.expansion:`
`downsample` `= nn.Sequential(`
`#1x1卷积调整维度`
`nn.Conv2d(`
`self.in_channels,`
`out_channels` `* block.expansion,`
`kernel_size=1, stride=stride, bias=False`
`),`
`nn.BatchNorm2d(out_channels * block.expansion)`
`)`
`layers=[]#存储该阶段的所有残差块`
`#添加第一个残差块` `可能包含下采样`
`layers.append(block(self.in_channels, out_channels, stride, downsample))`
`#更新当前通道数`
`self.in_channels` `= out_channels` `* block.expansion`
`#添加剩余的残差块,这些块不改变空间尺寸和通道数`
`for _` `in` `range(1, blocks):`
`layers.append(block(self.in_channels, out_channels))` `#stride=1,` `downsample=None`
`#将列表转换为Sequential(*layers)`
`def _initialize_weights(self):`
`#初始化模型权重`
`for` `m` `in self.modules():` `#遍历所有子模块`
`if` `isinstance(m, nn.Conv2d):`
`#卷积层使用` `Kaiming初始化,适合ReLU激活函数`
`nn.init.kaiming_normal_(`
`m.weight,` `mode='fan_out',` `nonlinearity='relu'`
`)`
`elif` `isinstance(m, nn.BatchNorm2d):`
`#BN层权重初始化为1,偏置初始化为0`
`nn.init.constant_(m.weight, 1)`
`nn.init.constant_(m.bias, 0)`
`def forward(self, x):`
`#初始层`
`x` `= self.conv1(x)` `#7x7卷积`
`x = self.bn1(x)` `#BN`
`x = self.relu(x)` `#ReLU`
`x` `= self.maxpool(x)` `#最大池化`
`#4个残差阶段`
`x =` `self.layer1(x)` `#阶段1`
`x = self.layer2(x)` `#阶段2`
`x =` `self.layer3(x)` `#阶段` `3`
`x = self.layer4(x)` `#阶段4`
`#分类头`
`x` `= self.avgpool(x)` `#全局平均池化`
`x = torch.flatten(x, 1)` `#展平为向量`
`x = self.fc(x)` `#全连接层分类`
`return` `x`
`#预定义的ResNet变体` `工厂函数`
`def` `resnet18(num_classes=1000):`
`#ResNet-18` `使用BasicBlock,` `每层块数为[2,2,2,2]`
`return ResNet(BasicBlock, [2,2,2,2], num_classes)`
`def` `resnet34(num_classes=1000):`
`#ResNet-34` `使用BasicBlock,` `[3,4,6,3]`
`return` `ResNet(BasicBlock, [3,4,6,3],num_classes)`
`def resnet50(num_classes=1000):`
`"""ResNet-50: 使用Bottleneck,每层块数为[3,4,6,3]"""`
` return ResNet(Bottleneck, [3, 4, 6, 3], num_classes)`
`def resnet101(num_classes=1000):`
` """ResNet-101: 使用Bottleneck,每层块数为[3,4,23,3]"""`
` return ResNet(Bottleneck, [3, 4, 23, 3], num_classes)`
`def resnet152(num_classes=1000):`
` """ResNet-152: 使用Bottleneck,每层块数为[3,8,36,3]"""`
` return ResNet(Bottleneck, [3, 8, 36, 3], num_classes)`
`#简化的ResNet` `适用于CIFAR-10等小尺寸数据集`
`class SimpleResNet(nn.Module):`
`#简化的ResNet,` `用于CIFAR-10等小尺寸数据集`
`def __init__(self, num_classes=10):`
`super(SimpleResNet, self).__init__()`
`#初始层,相比标准ResNet,` `使用更小的卷积核和步长`
`self.conv1` `= nn.Conv2d(3, 16, kernel_size=3, stride=1,padding=1,bias=False)`
`self.bn1 = nn.BatchNorm2d(16)`
`self.relu=nn.ReLU(inplace=True)`
`#3个残差阶段`
`self.layer1` `= self._make_layer(16, 16, 2, stride=1)` `#16->16,2个块`
`self.layer2 = self._make_layer(16, 32, 2, stride=2)` `#16->32` `2个块,下采样`
`self.layer3` `= self._make_layer(32, 64, 2, stride=2)#32->64,2个块,下采样`
`#分类头`
`self.avgpool` `= nn.AdaptiveAvgPool2d(1, 1)`
`self.fc = nn.Linear(64, num_classes)`
`def _make_layer(self, in_channels, out_channels, num_blocks, stride):`
`#构建简化版的残差阶段`
`downsample` `= None`
`if` `stride` `!= 1` `or in_channels !=` `out_channels:`
`downsample=nn.Sequential(`
`nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False)`
`nn.BatchNorm2d(out_channels)`
`)`
`layers` `= []`
`#第一个块可能包含下采样`
`layers.append(BasicBlock(in_channels, out_channels, stride, downsample)`
`#剩余的块`
`for _ in` `range(1, num_blocks):`
`layers.append(BasicBlock(out_channels, out_channels))`
`return` `nn.Sequential(*layers)`
`def forward(self, x)`
`#1` `卷积层部分`
`x = self.conv1(x)`
` x = self.bn1(x)`
` x = self.relu(x)`
`#2` `多个残差块/层`
` x = self.layer1(x)` `#每个layer可能包含多个残差块`
` x = self.layer2(x)`
` x = self.layer3(x)`
`#3` `分类头部`
` x = self.avgpool(x)` `#全局平均池化`
` x = torch.flatten(x, 1) # 保持批次维度,展平其他维度`
` x = self.fc(x)` `#全连接层分类`
` return x`
`#使用示例和测试代码`
`if __name__='__main__':`
`#设备:优先使用GPU,如果没有则使用CPU`
`device = torch.device("cuda", if torch.cuda.is_available() else "cpu")`
`#1` `打印简化的ResNet结构,`
`model_simple = SimpleResNet(num_classes=10).to(device)` `#创建模型并移动到设备`
`summary(model_simple, (3, 32, 32))` `#打印模型结构摘要,输入尺寸为CIFAR`
`#2` `打印标准ResNet结构` `适用于ImageNet`
`print("\n" + "=" * 50)`
` print("2. ResNet-18 (ImageNet尺寸)")`
` print("=" * 50)`
`model_resnet18 = resnet18(num_classes=1000).to(device)`
`summary(model_resnet18, (3, 224, 224)) #ImageNet标准输入尺寸224 x` `224`
`# 3. 打印ResNet-50结构`
` print("\n" + "=" * 50)`
` print("3. ResNet-50")`
` print("=" * 50)`
` model_resnet50 = resnet50(num_classes=1000).to(device)`
` summary(model_resnet50, (3, 224, 224))`
`# 4. 测试前向传播`
` print("\n" + "=" * 50)`
` print("测试前向传播")`
` print("=" * 50)`
`#创建测试数据` `2张32x32的RGB图像`
`batch_size=2`
`test_input = torch.randn(batch_size, 2, 32, 32).to(device)`
`#使用简化模型进行能推理测试`
`model_simple.eval()` `#设置为评估模式` `关闭dropout等训练专用层`
`with torch.no_gard():` `#关闭梯度计算,节省内存和计算资源`
`output = model_simple(test_input)`
`print(f"输入尺寸: {test_input.shape}")`
` print(f"输出尺寸: {output.shape}") # 应为[2, 10]`
` print(f"输出前5个值: {output[0, :5]}") # 打印第一张图片的前5个类别的得分`
`#5` `演示维度调整机制`
`print("\n" + "=" * 50)`
` print("残差连接维度调整示例")`
` print("=" * 50)`
`#创建一个需要维度调整的残差块`
`#输入64通道,输出128通道,stride=2空间尺寸减半`
`block_with_downsample` `= BasicBlock(`
`in_channels=64,`
`out_channels=128,` `输出通道数增加`
`stride=2,`
` downsample = nn.Sequential(`
`nn.Conv2d(64, 128, kernel_size=1, stride=2, bias=False),`
`nn.BatchNorm2d(128)`
`)`
`).to(device)`
`#创建测试输入:2张64通道,32x32的特征图`
`test_input =` `torch.randn(2, 64, 32, 32).to(device)`
`#前向传播`
`output = block_with_downsample(test_input)`
`#验证是否使用了downsample`
`