Day54打卡 @浙大疏锦行

知识点回顾:

  1. 传统计算机视觉发展史:LeNet-->AlexNet-->VGGNet-->nceptionNet-->ResNet
  2. inception模块和网络
python 复制代码
import torch
import torch.nn as nn

class Inception(nn.Module):
    def __init__(self, in_channels):
        """
        Inception模块初始化,实现多尺度特征并行提取与融合
        
        参数:
            in_channels: 输入特征图的通道数
        """
        super(Inception, self).__init__()
        
        # 1x1卷积分支:降维并提取通道间特征关系
        # 减少后续卷积的计算量,同时保留局部特征信息
        self.branch1x1 = nn.Sequential(
            nn.Conv2d(in_channels, 64, kernel_size=1),  # 降维至64通道
            nn.ReLU()  # 引入非线性激活
        )
        
        # 3x3卷积分支:通过1x1卷积降维后使用3x3卷积捕捉中等尺度特征
        # 先降维减少计算量,再进行空间特征提取
        self.branch3x3 = nn.Sequential(
            nn.Conv2d(in_channels, 96, kernel_size=1),  # 降维至96通道
            nn.ReLU(),
            nn.Conv2d(96, 128, kernel_size=3, padding=1),  # 3x3卷积,保持空间尺寸不变
            nn.ReLU()
        )
        
        # 5x5卷积分支:通过1x1卷积降维后使用5x5卷积捕捉大尺度特征
        # 较大的感受野用于提取更全局的结构信息
        self.branch5x5 = nn.Sequential(
            nn.Conv2d(in_channels, 16, kernel_size=1),  # 大幅降维至16通道
            nn.ReLU(),
            nn.Conv2d(16, 32, kernel_size=5, padding=2),  # 5x5卷积,保持空间尺寸不变
            nn.ReLU()
        )
        
        # 池化分支:通过池化操作保留全局信息并降维
        # 增强特征的平移不变性
        self.branch_pool = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),  # 3x3最大池化,保持尺寸
            nn.Conv2d(in_channels, 32, kernel_size=1),  # 降维至32通道
            nn.ReLU()
        )

    def forward(self, x):
        """
        前向传播函数,并行计算四个分支并在通道维度拼接
        
        参数:
            x: 输入特征图,形状为[batch_size, in_channels, height, width]
        
        返回:
            拼接后的特征图,形状为[batch_size, 256, height, width]
        """
        # 注意,这里是并行计算四个分支
        branch1x1 = self.branch1x1(x)  # 输出形状: [batch_size, 64, height, width]
        branch3x3 = self.branch3x3(x)  # 输出形状: [batch_size, 128, height, width]
        branch5x5 = self.branch5x5(x)  # 输出形状: [batch_size, 32, height, width]
        branch_pool = self.branch_pool(x)  # 输出形状: [batch_size, 32, height, width]
        
        # 在通道维度(dim=1)拼接四个分支的输出
        # 总通道数: 64 + 128 + 32 + 32 = 256
        outputs = [branch1x1, branch3x3, branch5x5, branch_pool]
        return torch.cat(outputs, dim=1)
上述模块变化为[B, C, H, W]-->[B, 256, H, W]

model = Inception(in_channels=64)
input = torch.randn(32, 64, 28, 28)
output = model(input)
print(f"输入形状: {input.shape}")
print(f"输出形状: {output.shape}")  
python 复制代码
class InceptionNet(nn.Module):
    def __init__(self, num_classes=10):
        super(InceptionNet, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        )

        self.inception1 = Inception(64)
        self.inception2 = Inception(256)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(256, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = self.inception1(x)
        x = self.inception2(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x
# 创建网络实例
model = InceptionNet()
# 创建一个随机输入张量,模拟图像数据,这里假设输入图像是3通道,尺寸为224x224
input_tensor = torch.randn(1, 3, 224, 224)
# 前向传播
output = model(input_tensor)
print(output.shape)
  1. 特征融合方法阶段性总结:逐元素相加、逐元素相乘、concat通道数增加等
  2. 感受野与卷积核变体:深入理解不同模块和类的设计初衷
python 复制代码
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

# 数据预处理
transform = transforms.Compose([
    transforms.ToTensor(),  # 转为张量
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # 归一化
])

# 加载CIFAR-10数据集
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=128, shuffle=True)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = DataLoader(testset, batch_size=128, shuffle=False)

# 定义含空洞卷积的CNN模型
class SimpleCNNWithDilation(nn.Module):
    def __init__(self):
        super(SimpleCNNWithDilation, self).__init__()
        # 第一层:普通3×3卷积,捕捉基础特征
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)  
        # 第二层:空洞卷积,dilation=2,感受野扩大(等效5×5普通卷积感受野)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=2, dilation=2)  
        # 第三层:普通3×3卷积,恢复特征对齐
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)  
        
        self.pool = nn.MaxPool2d(2, 2)  # 池化层
        self.relu = nn.ReLU()
        
        # 全连接层,根据CIFAR-10尺寸计算:32×32→池化后16×16→...→最终特征维度需匹配
        self.fc1 = nn.Linear(64 * 8 * 8, 256)  
        self.fc2 = nn.Linear(256, 10)  

    def forward(self, x):
        # 输入: [batch, 3, 32, 32]
        x = self.conv1(x)  # [batch, 16, 32, 32]
        x = self.relu(x)
        x = self.pool(x)   # [batch, 16, 16, 16]
        
        x = self.conv2(x)  # [batch, 32, 16, 16](dilation=2 + padding=2 保持尺寸)
        x = self.relu(x)
        x = self.pool(x)   # [batch, 32, 8, 8]
        
        x = self.conv3(x)  # [batch, 64, 8, 8]
        x = self.relu(x)
        
        x = x.view(-1, 64 * 8 * 8)  # 展平
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

# 初始化模型、损失函数、优化器
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SimpleCNNWithDilation().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# 训练函数
def train(epoch):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)
        
        optimizer.zero_grad()
        
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        if i % 100 == 99:  # 每100个batch打印一次
            print(f'Epoch: {epoch + 1}, Batch: {i + 1}, Loss: {running_loss / 100:.3f}')
            running_loss = 0.0

# 测试函数
def test():
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f'Accuracy on test set: {100 * correct / total:.2f}%')

# 训练&测试流程
for epoch in range(5):  # 简单跑5个epoch示例
    train(epoch)
    test()

@浙大疏锦行

相关推荐
Johny_Zhao2 小时前
CentOS Stream 8 高可用 Kuboard 部署方案
linux·网络·python·网络安全·docker·信息安全·kubernetes·云计算·shell·yum源·系统运维·kuboard
站大爷IP3 小时前
精通einsum():多维数组操作的瑞士军刀
python
站大爷IP4 小时前
Python与MongoDB的亲密接触:从入门到实战的代码指南
python
Roc-xb4 小时前
/etc/profile.d/conda.sh: No such file or directory : numeric argument required
python·ubuntu·conda
世由心生5 小时前
[从0到1]环境准备--anaconda与pycharm的安装
ide·python·pycharm
猛犸MAMMOTH6 小时前
Python打卡第54天
pytorch·python·深度学习
梓羽玩Python6 小时前
12K+ Star的离线语音神器!50MB模型秒杀云端API,隐私零成本,20+语种支持!
人工智能·python·github
成都犀牛6 小时前
LangGraph 深度学习笔记:构建真实世界的智能代理
人工智能·pytorch·笔记·python·深度学习
終不似少年遊*7 小时前
【数据可视化】Pyecharts-家乡地图
python·信息可视化·数据挖掘·数据分析·数据可视化·pyecharts
仟濹7 小时前
「Matplotlib 入门指南」 Python 数据可视化分析【数据分析全栈攻略:爬虫+处理+可视化+报告】
python·信息可视化·数据分析·matplotlib