李沐动手学深度学习:树叶分类竞赛

视频地址:30 第二部分完结竞赛:图片分类【动手学深度学习v2】

竞赛地址:https://www.kaggle.com/competitions/classify-leaves

python 复制代码
!nvidia-smi   # 查看 GPU 信息
# !lscpu        # 查看 CPU 信息
# !free -h      # 查看内存(RAM) 信息
# !python --version

My Code

几个要点:

  • 数据标准化
  • 数据增强
  • 标签编码
  • ResNet50
python 复制代码
import matplotlib.pyplot as plt
from PIL import Image
from torchvision import transforms

# 加载图片
image_path = '/kaggle/input/classify-leaves/images/6.jpg'
image = Image.open(image_path)

# 显示原始图片
plt.imshow(image)
plt.title("Original Image")
plt.axis('off')
plt.show()

# 将图片转换为Tensor并查看形状
tensor_transform = transforms.ToTensor()
image_tensor = tensor_transform(image)

# 查看转换后的通道数和形状
print(f"Tensor shape (C, H, W): {image_tensor.shape}")
print(f"Number of channels: {image_tensor.shape[0]}")
复制代码
Tensor shape (C, H, W): torch.Size([3, 224, 224])
Number of channels: 3
python 复制代码
import os
import pandas as pd
import matplotlib.pyplot as plt
from torchvision import transforms, models, datasets
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
import torch.nn.functional as F
import random
import time

# 路径和文件名
train_csv = '/kaggle/input/classify-leaves/train.csv'
test_csv = '/kaggle/input/classify-leaves/test.csv'
image_folder = '/kaggle/input/classify-leaves/'
# 读取数据
train_df = pd.read_csv(train_csv)
test_df = pd.read_csv(test_csv)

# 将训练数据划分为训练集和验证集
train_data, val_data = train_test_split(train_df, test_size=0.2, random_state=42)
label_to_index = {label: idx for idx, label in enumerate(train_data['label'].unique())}

# 自定义Dataset
class LeafDataset(Dataset):
    def __init__(self, dataframe, image_dir, label_to_index, transform=None, is_test=False):
        self.dataframe = dataframe
        self.image_dir = image_dir
        self.transform = transform
        self.is_test = is_test
        if not is_test:
            self.label_to_index = label_to_index

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = os.path.join(self.image_dir, self.dataframe.iloc[idx, 0])
        image = Image.open(img_name)
        
        if self.transform:
            image = self.transform(image)
        
        if self.is_test:
            return image
        else:
            label = self.dataframe.iloc[idx, 1]
            # 需要对标签进行编码
            label = self.label_to_index[label]
            return image, label
python 复制代码
# 自定义一个仅包含ToTensor()的transform,用于计算各个通道的均值和标准差
simple_transform = transforms.ToTensor()

# 创建训练集的 DataLoader
train_dataset0 = LeafDataset(train_data, image_folder, label_to_index, transform=simple_transform)
train_loader0 = DataLoader(train_dataset0, batch_size=256, shuffle=False)

# 初始化累加器
mean = 0.0
std = 0.0
nb_samples = 0

# 计算均值和标准差
for images, _ in train_loader0:
    batch_samples = images.size(0)  # 当前批次的图片数量
    images = images.view(batch_samples, images.size(1), -1)  # 将图片展开为二维
    mean += images.mean(2).sum(0)  # 累加每个通道的均值
    std += images.std(2).sum(0)  # 累加每个通道的标准差
    nb_samples += batch_samples

mean /= nb_samples
std /= nb_samples

print(f'Mean: {mean}')
print(f'Std: {std}')

# Mean: tensor([0.7581, 0.7782, 0.7592])
# Std: tensor([0.1576, 0.1500, 0.1827])
python 复制代码
mean = [0.7581, 0.7782, 0.7592]
std = [0.1576, 0.1500, 0.1827]
# 图像变换
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomRotation(45),#随机旋转,-45到45度之间随机选
    transforms.RandomHorizontalFlip(p=0.5),#随机水平翻转 选择一个概率概率
    transforms.RandomVerticalFlip(p=0.5),#随机垂直翻转
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

# 加载训练集、验证集和测试集
train_dataset = LeafDataset(train_data, image_folder, label_to_index, transform=transform)
val_dataset = LeafDataset(val_data, image_folder, label_to_index, transform=transform)
test_dataset = LeafDataset(test_df, image_folder, label_to_index, transform=transform, is_test=True)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# ResNet模型
model = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)

# 修改最后的全连接层,适应分类任务
num_ftrs = model.fc.in_features # 获取全连接层的输入特征数
model.fc = nn.Linear(num_ftrs, len(train_df['label'].unique()))


def evaluate_accuracy(data_iter, net, device = None):
    if device is None:
        device = next(net.parameters()).device
    acc_sum, n = 0.0, 0
    net.eval()  # 进入评估模式
    with torch.no_grad():
        for X, y in data_iter:
            X, y = X.to(device), y.to(device)
            y_hat = net(X)
            acc_sum += (y_hat.argmax(dim=1) == y).float().sum().item()
            n += y.size(0)
    net.train()  # 恢复训练模式
    return acc_sum / n

def train(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs):
    net = net.to(device)
    print("training on ", device)
    loss = nn.CrossEntropyLoss()
    accuracy_test=[]
    accuracy_train=[]
    train_loss = []
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, batch_count, start = 0.0, 0.0, 0, 0, time.time()
        for X, y in train_iter:
            X = X.to(device)
            y = y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            train_l_sum += l.cpu().item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
            n += y.shape[0]
            batch_count += 1
        test_acc = evaluate_accuracy(test_iter, net)
        accuracy_train.append(train_acc_sum / n)
        accuracy_test.append(test_acc)
        train_loss.append(train_l_sum / batch_count)
        print('epoch %d, loss %.4f, train acc %.3f, val acc %.3f, time %.1f sec'
              % (epoch + 1, train_l_sum / batch_count, train_acc_sum / n, test_acc, time.time() - start))
    plot_acc(accuracy_train,accuracy_test,train_loss)
        
def plot_acc(accuracy_train, accuracy_test, train_loss):
    epochs = range(1, len(accuracy_train) + 1)
    
    fig, ax1 = plt.subplots()

    # 绘制训练准确率和测试准确率,使用左坐标轴
    ax1.plot(epochs, accuracy_train, 'b-', label='Train Accuracy')
    ax1.plot(epochs, accuracy_test, 'g-', label='Val Accuracy')
    ax1.set_xlabel('Epochs')
    ax1.set_ylabel('Accuracy', color='black')
    ax1.tick_params(axis='y', labelcolor='black')

    # 创建一个共享x轴的右坐标轴,用于绘制损失
    ax2 = ax1.twinx()
    ax2.plot(epochs, train_loss, 'r--', label='Train Loss')
    ax2.set_ylabel('Loss', color='red')
    ax2.tick_params(axis='y', labelcolor='red')
    
    # 隐藏右坐标轴标签
    ax2.get_yaxis().set_visible(False)

    # 添加图例
    fig.legend(loc="center left", bbox_to_anchor=(0.67, 0.5), bbox_transform=ax1.transAxes)
    
    plt.title('Training and Test Accuracy vs Training Loss')
    plt.show()
    
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
lr, num_epochs = 0.001, 70
batch_size = 64
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
train(model, train_loader, val_loader, batch_size, optimizer, device, num_epochs)

# 保存模型
# torch.save(model,'model.pt')

# 创建标签到索引的映射
index_to_label = {idx: label for label, idx in train_dataset.label_to_index.items()}

# 预测
model.eval()
predictions = []
with torch.no_grad():
    for images in test_loader:
        images = images.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        predictions.extend(predicted.cpu().numpy())

# 将预测结果转换为原始标签
test_df['label'] = [index_to_label[pred] for pred in predictions]

# 将预测结果保存到 submission.csv
test_df.to_csv('submission6.csv', index=False)

太长了,这里省略。。。。。

python 复制代码
# test_df
python 复制代码
!nvidia-smi   # 查看 GPU 信息

提交到网站,有94%的准确率,运行时间要大约四个小时。

相关推荐
Lee川28 分钟前
RAG 实战:从一篇掘金文章出发,拆解检索增强生成的全链路
前端·人工智能·后端
码农小旋风32 分钟前
Codex小白入门使用教程
人工智能·chatgpt·claude
Lee川41 分钟前
MCP 高德地图实战:当 AI 学会使用工具,一个协议如何重塑大模型的行动边界
前端·人工智能·后端
凌杰1 小时前
AI 学习笔记:Agent 的应用演示
人工智能
程序员cxuan1 小时前
Codex 把我家烂网给优化后,我 TM 直接原地起飞了。
人工智能·后端·程序员
IT_陈寒1 小时前
Redis批量删除踩了坑,原来DEL命令不是万能的
前端·人工智能·后端
xinhuanjieyi1 小时前
gpt-sovits测试语音克隆
人工智能·gpt
星辰AI1 小时前
Transformers 架构核心原理:从注意力机制到 GPT
人工智能·ai·语言模型
沪漂阿龙1 小时前
Hermes Agent Sessions 架构详解:AI 如何跨平台延续任务、找回历史、持续推进工作
人工智能·架构
500841 小时前
昇腾 CANN 的五层架构,到底分了哪五层
java·人工智能·分布式·架构·ocr·wpf