DAY43

一、方案整体设计

1. 数据集选择

选用 Kaggle 经典的 Dogs vs. Cats 数据集(https://www.kaggle.com/c/dogs-vs-cats/data),该数据集包含 25000 张带标签的猫狗图像,适合 CNN 二分类任务,且数据规模适中,训练成本低。

2. 技术栈

  • 深度学习框架:PyTorch
  • 数据处理:torchvision, PIL
  • Grad-CAM 实现:自定义梯度加权类激活映射
  • 模块化拆分:按功能拆分为 4 个独立文件,便于维护和复用

二、基础版(单文件完整实现)

先提供单文件版本,方便快速验证效果,再进行模块化拆分。

python 复制代码
# main.py (单文件完整版本)
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from PIL import Image
import numpy as np
import cv2

# ---------------------- 1. 数据集定义 ----------------------
class CatDogDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        self.transform = transform
        self.images = []
        self.labels = []
        # 读取数据:文件名包含标签(cat.0.jpg / dog.0.jpg)
        for img_name in os.listdir(data_dir):
            if img_name.endswith('.jpg'):
                label = 1 if 'dog' in img_name else 0  # dog=1, cat=0
                self.images.append(os.path.join(data_dir, img_name))
                self.labels.append(label)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = self.images[idx]
        label = self.labels[idx]
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, label

# ---------------------- 2. CNN模型定义 ----------------------
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        # 特征提取层
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
        )
        # 分类层
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 28 * 28, 512),
            nn.ReLU(),
            nn.Linear(512, 2)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

    # 用于Grad-CAM:获取最后一个卷积层的输出
    def get_last_conv_layer(self):
        return self.features[-2]

# ---------------------- 3. Grad-CAM实现 ----------------------
class GradCAM:
    def __init__(self, model, target_layer):
        self.model = model
        self.target_layer = target_layer
        self.gradients = None
        self.activations = None
        # 注册钩子
        target_layer.register_forward_hook(self.save_activation)
        target_layer.register_backward_hook(self.save_gradient)

    def save_activation(self, module, input, output):
        self.activations = output

    def save_gradient(self, module, grad_input, grad_output):
        self.gradients = grad_output[0]

    def generate_cam(self, input_tensor, target_class=None):
        # 前向传播
        output = self.model(input_tensor)
        if target_class is None:
            target_class = torch.argmax(output, dim=1).item()
        # 反向传播
        self.model.zero_grad()
        one_hot = torch.zeros_like(output)
        one_hot[0][target_class] = 1
        output.backward(gradient=one_hot, retain_graph=True)
        # 计算权重
        gradients = self.gradients.cpu().data.numpy()[0]
        activations = self.activations.cpu().data.numpy()[0]
        weights = np.mean(gradients, axis=(1, 2))
        # 生成CAM
        cam = np.zeros(activations.shape[1:], dtype=np.float32)
        for i, w in enumerate(weights):
            cam += w * activations[i]
        cam = np.maximum(cam, 0)
        cam = cv2.resize(cam, (input_tensor.shape[3], input_tensor.shape[2]))
        cam = cam - np.min(cam)
        cam = cam / np.max(cam)
        return cam

# ---------------------- 4. 训练函数 ----------------------
def train_model(model, train_loader, criterion, optimizer, device, epochs=5):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        epoch_loss = running_loss / len(train_loader)
        epoch_acc = 100 * correct / total
        print(f'Epoch {epoch+1}/{epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.2f}%')
    return model

# ---------------------- 5. 主函数 ----------------------
if __name__ == '__main__':
    # 配置参数
    data_dir = './train'  # 替换为你的数据集路径
    batch_size = 32
    epochs = 5
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # 数据预处理
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    # 加载数据集
    dataset = CatDogDataset(data_dir, transform=transform)
    train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=4)

    # 初始化模型、损失函数、优化器
    model = SimpleCNN().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # 训练模型
    print("开始训练...")
    trained_model = train_model(model, train_loader, criterion, optimizer, device, epochs)
    torch.save(trained_model.state_dict(), 'cat_dog_cnn.pth')
    print("模型训练完成并保存!")

    # Grad-CAM可视化
    print("生成Grad-CAM可视化...")
    # 加载测试图像
    test_img_path = './test/dog.12345.jpg'  # 替换为你的测试图像路径
    test_img = Image.open(test_img_path).convert('RGB')
    input_tensor = transform(test_img).unsqueeze(0).to(device)

    # 初始化Grad-CAM
    target_layer = trained_model.get_last_conv_layer()
    grad_cam = GradCAM(trained_model, target_layer)

    # 生成CAM
    cam = grad_cam.generate_cam(input_tensor)
    # 叠加到原图
    img = np.array(test_img)
    heatmap = cv2.applyColorMap(np.uint8(255 * cam), cv2.COLORMAP_JET)
    heatmap = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB)
    result = cv2.addWeighted(img, 0.5, heatmap, 0.5, 0)
    # 保存结果
    cv2.imwrite('grad_cam_result.jpg', cv2.cvtColor(result, cv2.COLOR_RGB2BGR))
    print("Grad-CAM可视化完成,结果已保存!")

@浙大疏锦行

相关推荐
weixin_4597539440 分钟前
golang如何实现Trace上下文传播_golang Trace上下文传播实现思路
jvm·数据库·python
weixin_444012931 小时前
PHP 中逻辑或(--)运算符的正确使用与条件逻辑重构指南
jvm·数据库·python
iAm_Ike7 小时前
Go 中自定义类型与基础类型间的显式类型转换详解
jvm·数据库·python
iuvtsrt7 小时前
Golang怎么实现方法集与接口的匹配_Golang如何理解值类型和指针类型实现接口的区别【详解】
jvm·数据库·python
旦莫8 小时前
AI驱动的纯视觉自动化测试:知识库里应该积累什么知识内容
人工智能·python·测试开发·pytest·ai测试
知识领航员9 小时前
蘑兔AI音乐深度实测:功能拆解、实测表现与适用场景
java·c语言·c++·人工智能·python·算法·github
如何原谅奋力过但无声10 小时前
【灵神高频面试题合集06-08】反转链表、快慢指针(环形链表/重排链表)、前后指针(删除链表/链表去重)
数据结构·python·算法·leetcode·链表
deephub10 小时前
2026 RAG 选型指南:Vector、Graph、Vectorless 该怎么挑
人工智能·python·大语言模型·rag
狐狐生风12 小时前
使用 UV 创建并运行 Python 项目(完整步骤)
python·uv
噜噜噜阿鲁~12 小时前
python学习笔记 | 9.2、模块-安装第三方模块
笔记·python·学习