DAY43

一、方案整体设计

1. 数据集选择

选用 Kaggle 经典的 Dogs vs. Cats 数据集(https://www.kaggle.com/c/dogs-vs-cats/data),该数据集包含 25000 张带标签的猫狗图像,适合 CNN 二分类任务,且数据规模适中,训练成本低。

2. 技术栈

  • 深度学习框架:PyTorch
  • 数据处理:torchvision, PIL
  • Grad-CAM 实现:自定义梯度加权类激活映射
  • 模块化拆分:按功能拆分为 4 个独立文件,便于维护和复用

二、基础版(单文件完整实现)

先提供单文件版本,方便快速验证效果,再进行模块化拆分。

python 复制代码
# main.py (单文件完整版本)
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from PIL import Image
import numpy as np
import cv2

# ---------------------- 1. 数据集定义 ----------------------
class CatDogDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        self.transform = transform
        self.images = []
        self.labels = []
        # 读取数据:文件名包含标签(cat.0.jpg / dog.0.jpg)
        for img_name in os.listdir(data_dir):
            if img_name.endswith('.jpg'):
                label = 1 if 'dog' in img_name else 0  # dog=1, cat=0
                self.images.append(os.path.join(data_dir, img_name))
                self.labels.append(label)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = self.images[idx]
        label = self.labels[idx]
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, label

# ---------------------- 2. CNN模型定义 ----------------------
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        # 特征提取层
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
        )
        # 分类层
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 28 * 28, 512),
            nn.ReLU(),
            nn.Linear(512, 2)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

    # 用于Grad-CAM:获取最后一个卷积层的输出
    def get_last_conv_layer(self):
        return self.features[-2]

# ---------------------- 3. Grad-CAM实现 ----------------------
class GradCAM:
    def __init__(self, model, target_layer):
        self.model = model
        self.target_layer = target_layer
        self.gradients = None
        self.activations = None
        # 注册钩子
        target_layer.register_forward_hook(self.save_activation)
        target_layer.register_backward_hook(self.save_gradient)

    def save_activation(self, module, input, output):
        self.activations = output

    def save_gradient(self, module, grad_input, grad_output):
        self.gradients = grad_output[0]

    def generate_cam(self, input_tensor, target_class=None):
        # 前向传播
        output = self.model(input_tensor)
        if target_class is None:
            target_class = torch.argmax(output, dim=1).item()
        # 反向传播
        self.model.zero_grad()
        one_hot = torch.zeros_like(output)
        one_hot[0][target_class] = 1
        output.backward(gradient=one_hot, retain_graph=True)
        # 计算权重
        gradients = self.gradients.cpu().data.numpy()[0]
        activations = self.activations.cpu().data.numpy()[0]
        weights = np.mean(gradients, axis=(1, 2))
        # 生成CAM
        cam = np.zeros(activations.shape[1:], dtype=np.float32)
        for i, w in enumerate(weights):
            cam += w * activations[i]
        cam = np.maximum(cam, 0)
        cam = cv2.resize(cam, (input_tensor.shape[3], input_tensor.shape[2]))
        cam = cam - np.min(cam)
        cam = cam / np.max(cam)
        return cam

# ---------------------- 4. 训练函数 ----------------------
def train_model(model, train_loader, criterion, optimizer, device, epochs=5):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        epoch_loss = running_loss / len(train_loader)
        epoch_acc = 100 * correct / total
        print(f'Epoch {epoch+1}/{epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.2f}%')
    return model

# ---------------------- 5. 主函数 ----------------------
if __name__ == '__main__':
    # 配置参数
    data_dir = './train'  # 替换为你的数据集路径
    batch_size = 32
    epochs = 5
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # 数据预处理
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    # 加载数据集
    dataset = CatDogDataset(data_dir, transform=transform)
    train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=4)

    # 初始化模型、损失函数、优化器
    model = SimpleCNN().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # 训练模型
    print("开始训练...")
    trained_model = train_model(model, train_loader, criterion, optimizer, device, epochs)
    torch.save(trained_model.state_dict(), 'cat_dog_cnn.pth')
    print("模型训练完成并保存!")

    # Grad-CAM可视化
    print("生成Grad-CAM可视化...")
    # 加载测试图像
    test_img_path = './test/dog.12345.jpg'  # 替换为你的测试图像路径
    test_img = Image.open(test_img_path).convert('RGB')
    input_tensor = transform(test_img).unsqueeze(0).to(device)

    # 初始化Grad-CAM
    target_layer = trained_model.get_last_conv_layer()
    grad_cam = GradCAM(trained_model, target_layer)

    # 生成CAM
    cam = grad_cam.generate_cam(input_tensor)
    # 叠加到原图
    img = np.array(test_img)
    heatmap = cv2.applyColorMap(np.uint8(255 * cam), cv2.COLORMAP_JET)
    heatmap = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB)
    result = cv2.addWeighted(img, 0.5, heatmap, 0.5, 0)
    # 保存结果
    cv2.imwrite('grad_cam_result.jpg', cv2.cvtColor(result, cv2.COLOR_RGB2BGR))
    print("Grad-CAM可视化完成,结果已保存!")

@浙大疏锦行

相关推荐
zero15972 小时前
Python 8天极速入门笔记(大模型工程师专用):第七篇-文件操作 + 异常处理,大模型实战落地关键
python·ai编程·大模型编程语言
T0uken2 小时前
【Python】uvpacker:跨平台打包 Windows 应用
开发语言·python
Li emily2 小时前
解决了用美股历史数据api分析价格波动的困扰
数据库·人工智能·python
Xpower 172 小时前
PHM念叨叨系列--工业场景大模型幻觉治理
人工智能·python·语言模型
请数据别和我作队2 小时前
基于 DeepSeek API 的 ASR 文本纠错脚本实战:Python 多线程批量处理 JSONL 语音转写数据
开发语言·经验分享·python·自然语言处理·nlp
Circ.3 小时前
文本相似性对比python代码
开发语言·python·相似度
Ahtacca3 小时前
基于决策树算法的动物分类实验:Mac环境复现指南
python·算法·决策树·机器学习·ai·分类
萌>__<新3 小时前
AI聊天助手-测试报告
人工智能·python
sg_knight3 小时前
设计模式实战:观察者模式(Observer)
python·观察者模式·设计模式