李沐动手学深度学习:树叶分类竞赛

视频地址:30 第二部分完结竞赛:图片分类【动手学深度学习v2】

竞赛地址:https://www.kaggle.com/competitions/classify-leaves

python 复制代码
!nvidia-smi   # 查看 GPU 信息
# !lscpu        # 查看 CPU 信息
# !free -h      # 查看内存(RAM) 信息
# !python --version

My Code

几个要点:

  • 数据标准化
  • 数据增强
  • 标签编码
  • ResNet50
python 复制代码
import matplotlib.pyplot as plt
from PIL import Image
from torchvision import transforms

# 加载图片
image_path = '/kaggle/input/classify-leaves/images/6.jpg'
image = Image.open(image_path)

# 显示原始图片
plt.imshow(image)
plt.title("Original Image")
plt.axis('off')
plt.show()

# 将图片转换为Tensor并查看形状
tensor_transform = transforms.ToTensor()
image_tensor = tensor_transform(image)

# 查看转换后的通道数和形状
print(f"Tensor shape (C, H, W): {image_tensor.shape}")
print(f"Number of channels: {image_tensor.shape[0]}")
Tensor shape (C, H, W): torch.Size([3, 224, 224])
Number of channels: 3
python 复制代码
import os
import pandas as pd
import matplotlib.pyplot as plt
from torchvision import transforms, models, datasets
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
import torch.nn.functional as F
import random
import time

# 路径和文件名
train_csv = '/kaggle/input/classify-leaves/train.csv'
test_csv = '/kaggle/input/classify-leaves/test.csv'
image_folder = '/kaggle/input/classify-leaves/'
# 读取数据
train_df = pd.read_csv(train_csv)
test_df = pd.read_csv(test_csv)

# 将训练数据划分为训练集和验证集
train_data, val_data = train_test_split(train_df, test_size=0.2, random_state=42)
label_to_index = {label: idx for idx, label in enumerate(train_data['label'].unique())}

# 自定义Dataset
class LeafDataset(Dataset):
    def __init__(self, dataframe, image_dir, label_to_index, transform=None, is_test=False):
        self.dataframe = dataframe
        self.image_dir = image_dir
        self.transform = transform
        self.is_test = is_test
        if not is_test:
            self.label_to_index = label_to_index

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = os.path.join(self.image_dir, self.dataframe.iloc[idx, 0])
        image = Image.open(img_name)
        
        if self.transform:
            image = self.transform(image)
        
        if self.is_test:
            return image
        else:
            label = self.dataframe.iloc[idx, 1]
            # 需要对标签进行编码
            label = self.label_to_index[label]
            return image, label
python 复制代码
# 自定义一个仅包含ToTensor()的transform,用于计算各个通道的均值和标准差
simple_transform = transforms.ToTensor()

# 创建训练集的 DataLoader
train_dataset0 = LeafDataset(train_data, image_folder, label_to_index, transform=simple_transform)
train_loader0 = DataLoader(train_dataset0, batch_size=256, shuffle=False)

# 初始化累加器
mean = 0.0
std = 0.0
nb_samples = 0

# 计算均值和标准差
for images, _ in train_loader0:
    batch_samples = images.size(0)  # 当前批次的图片数量
    images = images.view(batch_samples, images.size(1), -1)  # 将图片展开为二维
    mean += images.mean(2).sum(0)  # 累加每个通道的均值
    std += images.std(2).sum(0)  # 累加每个通道的标准差
    nb_samples += batch_samples

mean /= nb_samples
std /= nb_samples

print(f'Mean: {mean}')
print(f'Std: {std}')

# Mean: tensor([0.7581, 0.7782, 0.7592])
# Std: tensor([0.1576, 0.1500, 0.1827])
python 复制代码
mean = [0.7581, 0.7782, 0.7592]
std = [0.1576, 0.1500, 0.1827]
# 图像变换
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomRotation(45),#随机旋转,-45到45度之间随机选
    transforms.RandomHorizontalFlip(p=0.5),#随机水平翻转 选择一个概率概率
    transforms.RandomVerticalFlip(p=0.5),#随机垂直翻转
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

# 加载训练集、验证集和测试集
train_dataset = LeafDataset(train_data, image_folder, label_to_index, transform=transform)
val_dataset = LeafDataset(val_data, image_folder, label_to_index, transform=transform)
test_dataset = LeafDataset(test_df, image_folder, label_to_index, transform=transform, is_test=True)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# ResNet模型
model = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)

# 修改最后的全连接层,适应分类任务
num_ftrs = model.fc.in_features # 获取全连接层的输入特征数
model.fc = nn.Linear(num_ftrs, len(train_df['label'].unique()))


def evaluate_accuracy(data_iter, net, device = None):
    if device is None:
        device = next(net.parameters()).device
    acc_sum, n = 0.0, 0
    net.eval()  # 进入评估模式
    with torch.no_grad():
        for X, y in data_iter:
            X, y = X.to(device), y.to(device)
            y_hat = net(X)
            acc_sum += (y_hat.argmax(dim=1) == y).float().sum().item()
            n += y.size(0)
    net.train()  # 恢复训练模式
    return acc_sum / n

def train(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs):
    net = net.to(device)
    print("training on ", device)
    loss = nn.CrossEntropyLoss()
    accuracy_test=[]
    accuracy_train=[]
    train_loss = []
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, batch_count, start = 0.0, 0.0, 0, 0, time.time()
        for X, y in train_iter:
            X = X.to(device)
            y = y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            train_l_sum += l.cpu().item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
            n += y.shape[0]
            batch_count += 1
        test_acc = evaluate_accuracy(test_iter, net)
        accuracy_train.append(train_acc_sum / n)
        accuracy_test.append(test_acc)
        train_loss.append(train_l_sum / batch_count)
        print('epoch %d, loss %.4f, train acc %.3f, val acc %.3f, time %.1f sec'
              % (epoch + 1, train_l_sum / batch_count, train_acc_sum / n, test_acc, time.time() - start))
    plot_acc(accuracy_train,accuracy_test,train_loss)
        
def plot_acc(accuracy_train, accuracy_test, train_loss):
    epochs = range(1, len(accuracy_train) + 1)
    
    fig, ax1 = plt.subplots()

    # 绘制训练准确率和测试准确率,使用左坐标轴
    ax1.plot(epochs, accuracy_train, 'b-', label='Train Accuracy')
    ax1.plot(epochs, accuracy_test, 'g-', label='Val Accuracy')
    ax1.set_xlabel('Epochs')
    ax1.set_ylabel('Accuracy', color='black')
    ax1.tick_params(axis='y', labelcolor='black')

    # 创建一个共享x轴的右坐标轴,用于绘制损失
    ax2 = ax1.twinx()
    ax2.plot(epochs, train_loss, 'r--', label='Train Loss')
    ax2.set_ylabel('Loss', color='red')
    ax2.tick_params(axis='y', labelcolor='red')
    
    # 隐藏右坐标轴标签
    ax2.get_yaxis().set_visible(False)

    # 添加图例
    fig.legend(loc="center left", bbox_to_anchor=(0.67, 0.5), bbox_transform=ax1.transAxes)
    
    plt.title('Training and Test Accuracy vs Training Loss')
    plt.show()
    
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
lr, num_epochs = 0.001, 70
batch_size = 64
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
train(model, train_loader, val_loader, batch_size, optimizer, device, num_epochs)

# 保存模型
# torch.save(model,'model.pt')

# 创建标签到索引的映射
index_to_label = {idx: label for label, idx in train_dataset.label_to_index.items()}

# 预测
model.eval()
predictions = []
with torch.no_grad():
    for images in test_loader:
        images = images.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        predictions.extend(predicted.cpu().numpy())

# 将预测结果转换为原始标签
test_df['label'] = [index_to_label[pred] for pred in predictions]

# 将预测结果保存到 submission.csv
test_df.to_csv('submission6.csv', index=False)

太长了,这里省略。。。。。

python 复制代码
# test_df
python 复制代码
!nvidia-smi   # 查看 GPU 信息

提交到网站,有94%的准确率,运行时间要大约四个小时。

相关推荐
阡之尘埃1 小时前
Python数据分析案例61——信贷风控评分卡模型(A卡)(scorecardpy 全面解析)
人工智能·python·机器学习·数据分析·智能风控·信贷风控
孙同学要努力3 小时前
全连接神经网络案例——手写数字识别
人工智能·深度学习·神经网络
Eric.Lee20213 小时前
yolo v5 开源项目
人工智能·yolo·目标检测·计算机视觉
其实吧34 小时前
基于Matlab的图像融合研究设计
人工智能·计算机视觉·matlab
丕羽4 小时前
【Pytorch】基本语法
人工智能·pytorch·python
ctrey_4 小时前
2024-11-1 学习人工智能的Day20 openCV(2)
人工智能·opencv·学习
SongYuLong的博客4 小时前
Air780E基于LuatOS编程开发
人工智能
Jina AI4 小时前
RAG 系统的分块难题:小型语言模型如何找到最佳断点?
人工智能·语言模型·自然语言处理
-派神-5 小时前
大语言模型(LLM)量化基础知识(一)
人工智能·语言模型·自然语言处理
johnny_hhh5 小时前
AI大模型重塑软件开发流程:定义、应用场景、优势、挑战及未来展望
人工智能