DAY43

一、方案整体设计

1. 数据集选择

选用 Kaggle 经典的 Dogs vs. Cats 数据集(https://www.kaggle.com/c/dogs-vs-cats/data),该数据集包含 25000 张带标签的猫狗图像,适合 CNN 二分类任务,且数据规模适中,训练成本低。

2. 技术栈

  • 深度学习框架:PyTorch
  • 数据处理:torchvision, PIL
  • Grad-CAM 实现:自定义梯度加权类激活映射
  • 模块化拆分:按功能拆分为 4 个独立文件,便于维护和复用

二、基础版(单文件完整实现)

先提供单文件版本,方便快速验证效果,再进行模块化拆分。

python 复制代码
# main.py (单文件完整版本)
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from PIL import Image
import numpy as np
import cv2

# ---------------------- 1. 数据集定义 ----------------------
class CatDogDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        self.transform = transform
        self.images = []
        self.labels = []
        # 读取数据:文件名包含标签(cat.0.jpg / dog.0.jpg)
        for img_name in os.listdir(data_dir):
            if img_name.endswith('.jpg'):
                label = 1 if 'dog' in img_name else 0  # dog=1, cat=0
                self.images.append(os.path.join(data_dir, img_name))
                self.labels.append(label)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = self.images[idx]
        label = self.labels[idx]
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, label

# ---------------------- 2. CNN模型定义 ----------------------
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        # 特征提取层
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
        )
        # 分类层
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 28 * 28, 512),
            nn.ReLU(),
            nn.Linear(512, 2)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

    # 用于Grad-CAM:获取最后一个卷积层的输出
    def get_last_conv_layer(self):
        return self.features[-2]

# ---------------------- 3. Grad-CAM实现 ----------------------
class GradCAM:
    def __init__(self, model, target_layer):
        self.model = model
        self.target_layer = target_layer
        self.gradients = None
        self.activations = None
        # 注册钩子
        target_layer.register_forward_hook(self.save_activation)
        target_layer.register_backward_hook(self.save_gradient)

    def save_activation(self, module, input, output):
        self.activations = output

    def save_gradient(self, module, grad_input, grad_output):
        self.gradients = grad_output[0]

    def generate_cam(self, input_tensor, target_class=None):
        # 前向传播
        output = self.model(input_tensor)
        if target_class is None:
            target_class = torch.argmax(output, dim=1).item()
        # 反向传播
        self.model.zero_grad()
        one_hot = torch.zeros_like(output)
        one_hot[0][target_class] = 1
        output.backward(gradient=one_hot, retain_graph=True)
        # 计算权重
        gradients = self.gradients.cpu().data.numpy()[0]
        activations = self.activations.cpu().data.numpy()[0]
        weights = np.mean(gradients, axis=(1, 2))
        # 生成CAM
        cam = np.zeros(activations.shape[1:], dtype=np.float32)
        for i, w in enumerate(weights):
            cam += w * activations[i]
        cam = np.maximum(cam, 0)
        cam = cv2.resize(cam, (input_tensor.shape[3], input_tensor.shape[2]))
        cam = cam - np.min(cam)
        cam = cam / np.max(cam)
        return cam

# ---------------------- 4. 训练函数 ----------------------
def train_model(model, train_loader, criterion, optimizer, device, epochs=5):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        epoch_loss = running_loss / len(train_loader)
        epoch_acc = 100 * correct / total
        print(f'Epoch {epoch+1}/{epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.2f}%')
    return model

# ---------------------- 5. 主函数 ----------------------
if __name__ == '__main__':
    # 配置参数
    data_dir = './train'  # 替换为你的数据集路径
    batch_size = 32
    epochs = 5
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # 数据预处理
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    # 加载数据集
    dataset = CatDogDataset(data_dir, transform=transform)
    train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=4)

    # 初始化模型、损失函数、优化器
    model = SimpleCNN().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # 训练模型
    print("开始训练...")
    trained_model = train_model(model, train_loader, criterion, optimizer, device, epochs)
    torch.save(trained_model.state_dict(), 'cat_dog_cnn.pth')
    print("模型训练完成并保存!")

    # Grad-CAM可视化
    print("生成Grad-CAM可视化...")
    # 加载测试图像
    test_img_path = './test/dog.12345.jpg'  # 替换为你的测试图像路径
    test_img = Image.open(test_img_path).convert('RGB')
    input_tensor = transform(test_img).unsqueeze(0).to(device)

    # 初始化Grad-CAM
    target_layer = trained_model.get_last_conv_layer()
    grad_cam = GradCAM(trained_model, target_layer)

    # 生成CAM
    cam = grad_cam.generate_cam(input_tensor)
    # 叠加到原图
    img = np.array(test_img)
    heatmap = cv2.applyColorMap(np.uint8(255 * cam), cv2.COLORMAP_JET)
    heatmap = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB)
    result = cv2.addWeighted(img, 0.5, heatmap, 0.5, 0)
    # 保存结果
    cv2.imwrite('grad_cam_result.jpg', cv2.cvtColor(result, cv2.COLOR_RGB2BGR))
    print("Grad-CAM可视化完成,结果已保存!")

@浙大疏锦行

相关推荐
Cloud_Shy6181 分钟前
解读《Effective Python 3rd Edition》:从练气到老魔(第二章 Item 13 - 16)
c语言·开发语言·网络·笔记·python·编辑器
E_ICEBLUE7 分钟前
Python 办公自动化:如何轻松调整 Word/PDF 页面大小为 Legal 尺寸
python·pdf·word
战斗强7 分钟前
RTX 5090 Grounded-SAM-2 实时 RTSP 跟踪部署指南
python·conda
Wonderful U8 分钟前
Python+Django实战 | 智能图书管理系统:从传统人工到全自动数字化管理
python·信息可视化·django
耶夫斯计10 分钟前
【xy_healthy_assistant:LLM+Mem0 +skills个性化助手开发手记】
人工智能·python·健康医疗
csdn_aspnet10 分钟前
Python 霍尔分区算法(Hoare‘s Partition Algorithm)
开发语言·python·算法
❀͜͡傀儡师12 分钟前
告别脚手架:用 JBang 打通 Java、Kotlin、Python 的脚本化开发
java·python·kotlin·jbang
nashane13 分钟前
HarmonyOS 6学习:应用无响应(AppFreeze)故障排查与性能优化指南
人工智能·pytorch·python
Rain50915 分钟前
GitLab-Runner + AI 代码审查服务 + 远程大模型 全套部署运维实战
linux·运维·人工智能·python·ci/cd·gitlab·ai编程
学计算机的计算基15 分钟前
MySQL 锁体系全解:从 MDL 到间隙锁,一次讲透
java·数据库·笔记·python·mysql