李沐动手学深度学习:树叶分类竞赛

视频地址:30 第二部分完结竞赛:图片分类【动手学深度学习v2】

竞赛地址:https://www.kaggle.com/competitions/classify-leaves

python 复制代码
!nvidia-smi   # 查看 GPU 信息
# !lscpu        # 查看 CPU 信息
# !free -h      # 查看内存(RAM) 信息
# !python --version

My Code

几个要点:

  • 数据标准化
  • 数据增强
  • 标签编码
  • ResNet50
python 复制代码
import matplotlib.pyplot as plt
from PIL import Image
from torchvision import transforms

# 加载图片
image_path = '/kaggle/input/classify-leaves/images/6.jpg'
image = Image.open(image_path)

# 显示原始图片
plt.imshow(image)
plt.title("Original Image")
plt.axis('off')
plt.show()

# 将图片转换为Tensor并查看形状
tensor_transform = transforms.ToTensor()
image_tensor = tensor_transform(image)

# 查看转换后的通道数和形状
print(f"Tensor shape (C, H, W): {image_tensor.shape}")
print(f"Number of channels: {image_tensor.shape[0]}")
复制代码
Tensor shape (C, H, W): torch.Size([3, 224, 224])
Number of channels: 3
python 复制代码
import os
import pandas as pd
import matplotlib.pyplot as plt
from torchvision import transforms, models, datasets
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
import torch.nn.functional as F
import random
import time

# 路径和文件名
train_csv = '/kaggle/input/classify-leaves/train.csv'
test_csv = '/kaggle/input/classify-leaves/test.csv'
image_folder = '/kaggle/input/classify-leaves/'
# 读取数据
train_df = pd.read_csv(train_csv)
test_df = pd.read_csv(test_csv)

# 将训练数据划分为训练集和验证集
train_data, val_data = train_test_split(train_df, test_size=0.2, random_state=42)
label_to_index = {label: idx for idx, label in enumerate(train_data['label'].unique())}

# 自定义Dataset
class LeafDataset(Dataset):
    def __init__(self, dataframe, image_dir, label_to_index, transform=None, is_test=False):
        self.dataframe = dataframe
        self.image_dir = image_dir
        self.transform = transform
        self.is_test = is_test
        if not is_test:
            self.label_to_index = label_to_index

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = os.path.join(self.image_dir, self.dataframe.iloc[idx, 0])
        image = Image.open(img_name)
        
        if self.transform:
            image = self.transform(image)
        
        if self.is_test:
            return image
        else:
            label = self.dataframe.iloc[idx, 1]
            # 需要对标签进行编码
            label = self.label_to_index[label]
            return image, label
python 复制代码
# 自定义一个仅包含ToTensor()的transform,用于计算各个通道的均值和标准差
simple_transform = transforms.ToTensor()

# 创建训练集的 DataLoader
train_dataset0 = LeafDataset(train_data, image_folder, label_to_index, transform=simple_transform)
train_loader0 = DataLoader(train_dataset0, batch_size=256, shuffle=False)

# 初始化累加器
mean = 0.0
std = 0.0
nb_samples = 0

# 计算均值和标准差
for images, _ in train_loader0:
    batch_samples = images.size(0)  # 当前批次的图片数量
    images = images.view(batch_samples, images.size(1), -1)  # 将图片展开为二维
    mean += images.mean(2).sum(0)  # 累加每个通道的均值
    std += images.std(2).sum(0)  # 累加每个通道的标准差
    nb_samples += batch_samples

mean /= nb_samples
std /= nb_samples

print(f'Mean: {mean}')
print(f'Std: {std}')

# Mean: tensor([0.7581, 0.7782, 0.7592])
# Std: tensor([0.1576, 0.1500, 0.1827])
python 复制代码
mean = [0.7581, 0.7782, 0.7592]
std = [0.1576, 0.1500, 0.1827]
# 图像变换
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomRotation(45),#随机旋转,-45到45度之间随机选
    transforms.RandomHorizontalFlip(p=0.5),#随机水平翻转 选择一个概率概率
    transforms.RandomVerticalFlip(p=0.5),#随机垂直翻转
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

# 加载训练集、验证集和测试集
train_dataset = LeafDataset(train_data, image_folder, label_to_index, transform=transform)
val_dataset = LeafDataset(val_data, image_folder, label_to_index, transform=transform)
test_dataset = LeafDataset(test_df, image_folder, label_to_index, transform=transform, is_test=True)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# ResNet模型
model = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)

# 修改最后的全连接层,适应分类任务
num_ftrs = model.fc.in_features # 获取全连接层的输入特征数
model.fc = nn.Linear(num_ftrs, len(train_df['label'].unique()))


def evaluate_accuracy(data_iter, net, device = None):
    if device is None:
        device = next(net.parameters()).device
    acc_sum, n = 0.0, 0
    net.eval()  # 进入评估模式
    with torch.no_grad():
        for X, y in data_iter:
            X, y = X.to(device), y.to(device)
            y_hat = net(X)
            acc_sum += (y_hat.argmax(dim=1) == y).float().sum().item()
            n += y.size(0)
    net.train()  # 恢复训练模式
    return acc_sum / n

def train(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs):
    net = net.to(device)
    print("training on ", device)
    loss = nn.CrossEntropyLoss()
    accuracy_test=[]
    accuracy_train=[]
    train_loss = []
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, batch_count, start = 0.0, 0.0, 0, 0, time.time()
        for X, y in train_iter:
            X = X.to(device)
            y = y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            train_l_sum += l.cpu().item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
            n += y.shape[0]
            batch_count += 1
        test_acc = evaluate_accuracy(test_iter, net)
        accuracy_train.append(train_acc_sum / n)
        accuracy_test.append(test_acc)
        train_loss.append(train_l_sum / batch_count)
        print('epoch %d, loss %.4f, train acc %.3f, val acc %.3f, time %.1f sec'
              % (epoch + 1, train_l_sum / batch_count, train_acc_sum / n, test_acc, time.time() - start))
    plot_acc(accuracy_train,accuracy_test,train_loss)
        
def plot_acc(accuracy_train, accuracy_test, train_loss):
    epochs = range(1, len(accuracy_train) + 1)
    
    fig, ax1 = plt.subplots()

    # 绘制训练准确率和测试准确率,使用左坐标轴
    ax1.plot(epochs, accuracy_train, 'b-', label='Train Accuracy')
    ax1.plot(epochs, accuracy_test, 'g-', label='Val Accuracy')
    ax1.set_xlabel('Epochs')
    ax1.set_ylabel('Accuracy', color='black')
    ax1.tick_params(axis='y', labelcolor='black')

    # 创建一个共享x轴的右坐标轴,用于绘制损失
    ax2 = ax1.twinx()
    ax2.plot(epochs, train_loss, 'r--', label='Train Loss')
    ax2.set_ylabel('Loss', color='red')
    ax2.tick_params(axis='y', labelcolor='red')
    
    # 隐藏右坐标轴标签
    ax2.get_yaxis().set_visible(False)

    # 添加图例
    fig.legend(loc="center left", bbox_to_anchor=(0.67, 0.5), bbox_transform=ax1.transAxes)
    
    plt.title('Training and Test Accuracy vs Training Loss')
    plt.show()
    
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
lr, num_epochs = 0.001, 70
batch_size = 64
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
train(model, train_loader, val_loader, batch_size, optimizer, device, num_epochs)

# 保存模型
# torch.save(model,'model.pt')

# 创建标签到索引的映射
index_to_label = {idx: label for label, idx in train_dataset.label_to_index.items()}

# 预测
model.eval()
predictions = []
with torch.no_grad():
    for images in test_loader:
        images = images.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        predictions.extend(predicted.cpu().numpy())

# 将预测结果转换为原始标签
test_df['label'] = [index_to_label[pred] for pred in predictions]

# 将预测结果保存到 submission.csv
test_df.to_csv('submission6.csv', index=False)

太长了,这里省略。。。。。

python 复制代码
# test_df
python 复制代码
!nvidia-smi   # 查看 GPU 信息

提交到网站,有94%的准确率,运行时间要大约四个小时。

相关推荐
橡晟3 小时前
深度学习入门:让神经网络变得“深不可测“⚡(二)
人工智能·python·深度学习·机器学习·计算机视觉
墨尘游子3 小时前
神经网络的层与块
人工智能·python·深度学习·机器学习
Leah01053 小时前
什么是神经网络,常用的神经网络,如何训练一个神经网络
人工智能·深度学习·神经网络·ai
Leah01053 小时前
机器学习、深度学习、神经网络之间的关系
深度学习·神经网络·机器学习·ai
PyAIExplorer4 小时前
图像亮度调整的简单实现
人工智能·计算机视觉
Striker_Eureka4 小时前
DiffDet4SAR——首次将扩散模型用于SAR图像目标检测,来自2024 GRSL(ESI高被引1%论文)
人工智能·目标检测
Rvelamen5 小时前
LLM-SECURITY-PROMPTS大模型提示词攻击测评基准
人工智能·python·安全
AI technophile5 小时前
OpenCV计算机视觉实战(15)——霍夫变换详解
人工智能·opencv·计算机视觉
JNU freshman6 小时前
计算机视觉 之 数字图像处理基础(一)
人工智能·计算机视觉
鹧鸪云光伏7 小时前
鹧鸪云重构光伏发电量预测的精度标准
人工智能·无人机·光伏·光伏设计·光伏模拟