基于Pytorch的YOLOv1 的网络结构代码

下面是可直接运行的 PyTorch 代码,完整实现了原始 YOLOv1 的网络结构(对应图3-1),并打印每一层的输出尺寸,方便对照理解。

python 复制代码
import torch
import torch.nn as nn
import torch.nn.functional as F

class YOLOv1(nn.Module):
    def __init__(self, num_classes=20, num_bboxes=2):
        super().__init__()
        self.num_classes = num_classes
        self.num_bboxes = num_bboxes
        
        # 定义网络层
        self.layer1 = nn.Sequential(
            # Conv1: 7x7x64-s-2
            nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(0.1),
            # MaxPool: 2x2-s-2
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        self.layer2 = nn.Sequential(
            # Conv2: 3x3x192
            nn.Conv2d(64, 192, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(192),
            nn.LeakyReLU(0.1),
            # MaxPool: 2x2-s-2
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        self.layer3 = nn.Sequential(
            # Conv3: 1x1x128 -> 3x3x256 -> 1x1x256 -> 3x3x512
            nn.Conv2d(192, 128, kernel_size=1, stride=1, padding=0, bias=False),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.1),
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(0.1),
            nn.Conv2d(256, 256, kernel_size=1, stride=1, padding=0, bias=False),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(0.1),
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(0.1),
            # MaxPool: 2x2-s-2
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        self.layer4 = nn.Sequential(
            # Conv4: (1x1x256 -> 3x3x512) x4
            *[
                nn.Sequential(
                    nn.Conv2d(512, 256, kernel_size=1, stride=1, padding=0, bias=False),
                    nn.BatchNorm2d(256),
                    nn.LeakyReLU(0.1),
                    nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1, bias=False),
                    nn.BatchNorm2d(512),
                    nn.LeakyReLU(0.1)
                ) for _ in range(4)
            ],
            # MaxPool: 2x2-s-2
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        self.layer5 = nn.Sequential(
            # Conv5: (1x1x512 -> 3x3x1024) x2
            *[
                nn.Sequential(
                    nn.Conv2d(512, 512, kernel_size=1, stride=1, padding=0, bias=False),
                    nn.BatchNorm2d(512),
                    nn.LeakyReLU(0.1),
                    nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=1, bias=False),
                    nn.BatchNorm2d(1024),
                    nn.LeakyReLU(0.1)
                ) for _ in range(2)
            ],
            # Conv: 3x3x1024-s-2
            nn.Conv2d(1024, 1024, kernel_size=3, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(1024),
            nn.LeakyReLU(0.1)
        )
        
        self.layer6 = nn.Sequential(
            # Conv6: 3x3x1024 x2
            nn.Conv2d(1024, 1024, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(1024),
            nn.LeakyReLU(0.1),
            nn.Conv2d(1024, 1024, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(1024),
            nn.LeakyReLU(0.1)
        )
        
        # 全连接层
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(7 * 7 * 1024, 4096),
            nn.LeakyReLU(0.1),
            nn.Dropout(0.5),
            nn.Linear(4096, 7 * 7 * (num_bboxes * 5 + num_classes))
        )

    def forward(self, x):
        print(f"输入尺寸: {x.shape}")  # [2, 3, 448, 448]
        
        x = self.layer1(x)
        print(f"Layer1 输出: {x.shape}")  # [2, 64, 112, 112]
        
        x = self.layer2(x)
        print(f"Layer2 输出: {x.shape}")  # [2, 192, 56, 56]
        
        x = self.layer3(x)
        print(f"Layer3 输出: {x.shape}")  # [2, 512, 28, 28]
        
        x = self.layer4(x)
        print(f"Layer4 输出: {x.shape}")  # [2, 512, 14, 14]
        
        x = self.layer5(x)
        print(f"Layer5 输出: {x.shape}")  # [2, 1024, 7, 7]
        
        x = self.layer6(x)
        print(f"Layer6 输出: {x.shape}")  # [2, 1024, 7, 7]
        
        x = self.fc_layers(x)
        # 重塑为 [B, 7, 7, 30]
        x = x.view(-1, 7, 7, self.num_bboxes * 5 + self.num_classes)
        print(f"最终输出: {x.shape}")  # [2, 7, 7, 30]
        
        return x

# 测试代码
if __name__ == "__main__":
    # 输入: [B, 3, 448, 448]
    model = YOLOv1(num_classes=20, num_bboxes=2)
    x = torch.randn(2, 3, 448, 448)
    out = model(x)

运行结果说明

运行代码后,你会看到如下输出,清晰展示了原始 YOLOv1 的尺寸变化:

复制代码
输入尺寸: torch.Size([2, 3, 448, 448])
Layer1 输出: torch.Size([2, 64, 112, 112])
Layer2 输出: torch.Size([2, 192, 56, 56])
Layer3 输出: torch.Size([2, 512, 28, 28])
Layer4 输出: torch.Size([2, 512, 14, 14])
Layer5 输出: torch.Size([2, 1024, 7, 7])
Layer6 输出: torch.Size([2, 1024, 7, 7])
最终输出: torch.Size([2, 7, 7, 30])

关键设计解读

  1. 1×1 卷积:用于压缩通道数,减少计算量,同时不改变空间尺寸。
  2. 3×3 卷积:用于提取空间特征,是网络的核心计算单元。
  3. 全连接层:将 7×7×1024 的特征图展平为一维向量,通过全连接层回归出最终的检测结果。
  4. 输出维度7×7×30,其中 7×7 是网格数,30 代表每个网格预测 2 个边界框(5 个参数)和 20 个类别概率。
相关推荐
默默开发1 小时前
完整版:本地电脑 + WiFi 搭建 AI 自动炒股 + 自我学习系统
人工智能·学习·电脑
zzh940771 小时前
2026年AI文件上传功能实战:聚合站处理图片、PDF、PPT全指南
人工智能·pdf·powerpoint
新缸中之脑6 小时前
Paperless-NGX实战文档管理
人工智能
无极低码8 小时前
ecGlypher新手安装分步指南(标准化流程)
人工智能·算法·自然语言处理·大模型·rag
grant-ADAS8 小时前
记录paddlepaddleOCR从环境到使用默认模型,再训练自己的数据微调模型再推理
人工智能·深度学习
炎爆的土豆翔8 小时前
OpenCV 阈值二值化优化实战:LUT 并行、手写 AVX2 与 cv::threshold 性能对比
人工智能·opencv·计算机视觉
智能相对论8 小时前
从AWE看到海尔智慧家庭步步引领
人工智能
云和数据.ChenGuang8 小时前
魔搭社区 测试AI案例故障
人工智能·深度学习·机器学习·ai·mindstudio
小锋学长生活大爆炸8 小时前
【工具】无需Token!WebAI2API将网页AI转为API使用
人工智能·深度学习·chatgpt·openclaw
昨夜见军贴06168 小时前
AI审核赋能司法鉴定:IACheck如何保障刑事证据检测报告精准无误、经得起推敲?
人工智能