DAY43

一、方案整体设计

1. 数据集选择

选用 Kaggle 经典的 Dogs vs. Cats 数据集(https://www.kaggle.com/c/dogs-vs-cats/data),该数据集包含 25000 张带标签的猫狗图像,适合 CNN 二分类任务,且数据规模适中,训练成本低。

2. 技术栈

  • 深度学习框架:PyTorch
  • 数据处理:torchvision, PIL
  • Grad-CAM 实现:自定义梯度加权类激活映射
  • 模块化拆分:按功能拆分为 4 个独立文件,便于维护和复用

二、基础版(单文件完整实现)

先提供单文件版本,方便快速验证效果,再进行模块化拆分。

python 复制代码
# main.py (单文件完整版本)
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from PIL import Image
import numpy as np
import cv2

# ---------------------- 1. 数据集定义 ----------------------
class CatDogDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        self.transform = transform
        self.images = []
        self.labels = []
        # 读取数据:文件名包含标签(cat.0.jpg / dog.0.jpg)
        for img_name in os.listdir(data_dir):
            if img_name.endswith('.jpg'):
                label = 1 if 'dog' in img_name else 0  # dog=1, cat=0
                self.images.append(os.path.join(data_dir, img_name))
                self.labels.append(label)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = self.images[idx]
        label = self.labels[idx]
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, label

# ---------------------- 2. CNN模型定义 ----------------------
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        # 特征提取层
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
        )
        # 分类层
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 28 * 28, 512),
            nn.ReLU(),
            nn.Linear(512, 2)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

    # 用于Grad-CAM:获取最后一个卷积层的输出
    def get_last_conv_layer(self):
        return self.features[-2]

# ---------------------- 3. Grad-CAM实现 ----------------------
class GradCAM:
    def __init__(self, model, target_layer):
        self.model = model
        self.target_layer = target_layer
        self.gradients = None
        self.activations = None
        # 注册钩子
        target_layer.register_forward_hook(self.save_activation)
        target_layer.register_backward_hook(self.save_gradient)

    def save_activation(self, module, input, output):
        self.activations = output

    def save_gradient(self, module, grad_input, grad_output):
        self.gradients = grad_output[0]

    def generate_cam(self, input_tensor, target_class=None):
        # 前向传播
        output = self.model(input_tensor)
        if target_class is None:
            target_class = torch.argmax(output, dim=1).item()
        # 反向传播
        self.model.zero_grad()
        one_hot = torch.zeros_like(output)
        one_hot[0][target_class] = 1
        output.backward(gradient=one_hot, retain_graph=True)
        # 计算权重
        gradients = self.gradients.cpu().data.numpy()[0]
        activations = self.activations.cpu().data.numpy()[0]
        weights = np.mean(gradients, axis=(1, 2))
        # 生成CAM
        cam = np.zeros(activations.shape[1:], dtype=np.float32)
        for i, w in enumerate(weights):
            cam += w * activations[i]
        cam = np.maximum(cam, 0)
        cam = cv2.resize(cam, (input_tensor.shape[3], input_tensor.shape[2]))
        cam = cam - np.min(cam)
        cam = cam / np.max(cam)
        return cam

# ---------------------- 4. 训练函数 ----------------------
def train_model(model, train_loader, criterion, optimizer, device, epochs=5):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        epoch_loss = running_loss / len(train_loader)
        epoch_acc = 100 * correct / total
        print(f'Epoch {epoch+1}/{epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.2f}%')
    return model

# ---------------------- 5. 主函数 ----------------------
if __name__ == '__main__':
    # 配置参数
    data_dir = './train'  # 替换为你的数据集路径
    batch_size = 32
    epochs = 5
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # 数据预处理
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    # 加载数据集
    dataset = CatDogDataset(data_dir, transform=transform)
    train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=4)

    # 初始化模型、损失函数、优化器
    model = SimpleCNN().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # 训练模型
    print("开始训练...")
    trained_model = train_model(model, train_loader, criterion, optimizer, device, epochs)
    torch.save(trained_model.state_dict(), 'cat_dog_cnn.pth')
    print("模型训练完成并保存!")

    # Grad-CAM可视化
    print("生成Grad-CAM可视化...")
    # 加载测试图像
    test_img_path = './test/dog.12345.jpg'  # 替换为你的测试图像路径
    test_img = Image.open(test_img_path).convert('RGB')
    input_tensor = transform(test_img).unsqueeze(0).to(device)

    # 初始化Grad-CAM
    target_layer = trained_model.get_last_conv_layer()
    grad_cam = GradCAM(trained_model, target_layer)

    # 生成CAM
    cam = grad_cam.generate_cam(input_tensor)
    # 叠加到原图
    img = np.array(test_img)
    heatmap = cv2.applyColorMap(np.uint8(255 * cam), cv2.COLORMAP_JET)
    heatmap = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB)
    result = cv2.addWeighted(img, 0.5, heatmap, 0.5, 0)
    # 保存结果
    cv2.imwrite('grad_cam_result.jpg', cv2.cvtColor(result, cv2.COLOR_RGB2BGR))
    print("Grad-CAM可视化完成,结果已保存!")

@浙大疏锦行

相关推荐
zzzzzz3106 小时前
当产品经理说这个很简单:我用Python自动化处理奇葩需求的实战指南
python·pycharm·产品经理
雪隐7 小时前
个人电脑玩AI-06让5060 Ti给你打工——不光能画画,Qwen3-TTS还能学人说话,连我老板都信了!
人工智能·后端·python
兵慌码乱18 小时前
面向桌面端的资产管理系统分层架构设计与核心模块实现
python·系统架构·sqlite·pyqt5·数据库设计·桌面应用开发·mvc架构
hboot20 小时前
AI工程师第三课 - 机器学习基础
python·scikit-learn·kaggle
顾林海1 天前
Agent入门阶段-编程基础-Python:流程控制
python·agent·ai编程
呱呱复呱呱1 天前
Django CBV 源码解读:一个请求是怎么找到你的 get() 方法的
python·django
曲幽1 天前
刚部署的 LibreTranslate 频频翻车?我掏出了 20 年前的 StarDict 词典,用 FastAPI 搭了个本地词典翻译 API
python·fastapi·web·translate·goldendict·libretranslate·stardict·pystardict
荣码1 天前
用Streamlit给AI应用套个界面,10行代码出Web页面
java·python
兵慌码乱2 天前
基于Python+PyQt5+SQLite的药房管理系统实现:事务一致性与界面解耦全流程解析
python·sqlite·信号与槽·pyqt5·数据库设计·桌面应用开发·事务处理
金銀銅鐵2 天前
[Python] 体验用欧几里得算法计算最大公约数的过程
python·数学