DAY43打卡

@浙大疏锦行
kaggle找到一个图像数据集,用cnn网络进行训练并且用grad-cam做可视化

进阶:并拆分成多个文件

python 复制代码
fruit_cnn_project/
├─ data/                # 存放数据集(需手动创建,后续放入图片)
│  ├─ train/            # 训练集图像
│  └─ val/              # 验证集图像
├─ models/              # 模型定义
│  └─ cnn_model.py      # CNN网络结构
├─ utils/               # 工具函数
│  ├─ dataset_utils.py  # 数据加载与预处理
│  ├─ grad_cam.py       # Grad-CAM可视化
│  └─ train_utils.py    # 训练与评估
├─ main.py              # 主程序
└─ requirements.txt     # 依赖列表(可选)
python 复制代码
# 第一部分:导入库
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# 第二部分:数据加载与预处理
def load_data():
    data_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
    train_dataset = datasets.ImageFolder(root='data/train', transform=data_transform)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
    test_dataset = datasets.ImageFolder(root='data/test', transform=data_transform)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)
    return train_loader, test_loader

# 第三部分:模型定义
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(32 * 56 * 56, 128)
        self.fc2 = nn.Linear(128, 2)

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(-1, 32 * 56 * 56)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# 第四部分:模型训练
train_loader, _ = load_data()
model = SimpleCNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
num_epochs = 10

for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f'Epoch {epoch + 1}, Loss: {running_loss / len(train_loader)}')

torch.save(model.state_dict(), 'trained_model.pth')

# 第五部分:模型测试
_, test_loader = load_data()
model = SimpleCNN()
model.load_state_dict(torch.load('trained_model.pth'))
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for data in test_loader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the test images: {100 * correct / total}%')

# 第六部分:Grad-CAM可视化(修复版)
def get_activation():
    activation = {}
    def hook(model, input, output):
        activation['target_layer'] = output.detach()
    return hook, activation

def grad_cam(model, image, target_class_index):
    hook, activation = get_activation()
    target_layer = model.conv2
    target_layer.register_forward_hook(hook)
    
    model.eval()
    image = image.unsqueeze(0)
    image.requires_grad_(True)
    
    output = model(image)
    one_hot = torch.zeros(1, output.size()[-1]).to(image.device)
    one_hot[0][target_class_index] = 1
    
    output.backward(gradient=one_hot, retain_graph=True)
    gradients = image.grad[0].cpu().numpy()
    
    # 从activation字典中获取激活图
    activation_map = activation['target_layer'].cpu().numpy()[0]
    
    weights = np.mean(gradients, axis=(1, 2))
    cam = np.zeros(activation_map.shape[1:], dtype=np.float32)
    
    for i, w in enumerate(weights):
        cam += w * activation_map[i]
    
    cam = np.maximum(cam, 0)
    cam = F.interpolate(
        torch.from_numpy(cam).unsqueeze(0).unsqueeze(0), 
        size=(224, 224), 
        mode='bilinear', 
        align_corners=False
    )[0][0].numpy()
    
    cam = (cam - cam.min()) / (cam.max() - cam.min())
    return cam

# 可视化前几张测试图片
dataiter = iter(test_loader)
images, labels = dataiter.next()

for i in range(5):  # 可视化前5张图片
    image = images[i]
    label = labels[i].item()
    cam = grad_cam(model, image, label)
    
    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    plt.imshow(image.permute(1, 2, 0).numpy())
    plt.title(f'Original Image (Class: {label})')
    plt.axis('off')
    
    plt.subplot(1, 2, 2)
    plt.imshow(image.permute(1, 2, 0).numpy())
    plt.imshow(cam, cmap='jet', alpha=0.5)
    plt.title('Grad-CAM Visualization')
    plt.axis('off')
    
    plt.tight_layout()
    plt.show()
相关推荐
合作小小程序员小小店11 分钟前
web网页开发,在线%聚类,微博,舆情%系统,基于python,pycharm,django,nlp,kmeans,mysql
python·pycharm·kmeans·聚类·sklearn·kmean
Dan.Qiao11 分钟前
python读文件readline和readlines区别和惰性读
开发语言·python·惰性读文件
闲人编程35 分钟前
将你的旧手机变成监控摄像头(Python + OpenCV)
python·opencv·智能手机·监控·codecapsule·oasis
007php00737 分钟前
大厂深度面试相关文章:深入探讨底层原理与高性能优化
java·开发语言·git·python·面试·职场和发展·性能优化
SunnyDays10111 小时前
Python 复制和移动 Excel 工作表并保留所有格式:详解
python·复制excel工作表·移动excel工作表·重新排列excel工作表
不会编程的小寒1 小时前
C++初始继承,继承中构造、析构顺序
开发语言·python
Mos_x2 小时前
关于我们的python日记本
开发语言·python
十重幻想2 小时前
reshape的共享内存
python
Juchecar2 小时前
设计模式不是Java专属,其他语言的使用方法
java·python·设计模式
scala舔狗汪3 小时前
双层跳动爱心❤️❤️💕💕
python