J4打卡—— ResNet 和 DenseNet结合实现鸟类分类

1.检查GPU

import os,PIL,pathlib,warnings
warnings.filterwarnings("ignore")            
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

​​​​

2.查看数据

import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei']  
plt.rcParams['axes.unicode_minus'] = False 

import os,PIL,pathlib
data_dir="data/第8天/bird_photos"
data_dir=pathlib.Path(data_dir)

image_count = len(list(data_dir.glob('*/*')))
print("图片总数为:",image_count)

3.划分数据集

python 复制代码
batch_size = 8
img_height = 224
img_width = 224

import torchvision.transforms as transforms
import torchvision.datasets as datasets
transforms=transforms.Compose([
    transforms.Resize([224,224]),
    transforms.ToTensor(),
    # transforms.Normalize(
    #     mean=[0.482,0.456,0.406],
    #     std=[0.229,0.224,0.225]
    # )
])
total_data=datasets.ImageFolder("data/第8天/bird_photos",transform=transforms)
total_data

total_data.class_to_idx

train_size=int(0.8*len(total_data))
test_size=len(total_data)-train_size
train_data,test_data=torch.utils.data.random_split(total_data,[train_size,test_size])
train_data,test_data

batch_size=8
train_dl=torch.utils.data.DataLoader(train_data,batch_size,shuffle=True,num_workers=1)
test_dl=torch.utils.data.DataLoader(test_data,batch_size,shuffle=True,num_workers=1)

for X,y in train_dl:
    print(X.shape)
    print(y.shape)
    break

​​​​​​​

4.创建模型

python 复制代码
import torch
import torch.nn as nn
import torch.nn.functional as F

# ResNet的残差块
class ResBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(ResBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride),
                nn.BatchNorm2d(out_channels)
            )
    
    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)  # 残差连接
        return F.relu(out)

# DenseNet的密集块
class DenseBlock(nn.Module):
    def __init__(self, in_channels, growth_rate, num_layers):
        super(DenseBlock, self).__init__()
        self.layers = nn.ModuleList()
        for i in range(num_layers):
            self.layers.append(self._make_layer(in_channels + i * growth_rate, growth_rate))
    
    def _make_layer(self, in_channels, growth_rate):
        return nn.Sequential(
            nn.Conv2d(in_channels, growth_rate, kernel_size=3, padding=1),
            nn.BatchNorm2d(growth_rate),
            nn.ReLU(inplace=True),
            nn.Conv2d(growth_rate, growth_rate, kernel_size=3, padding=1),
            nn.BatchNorm2d(growth_rate)
        )
    
    def forward(self, x):
        for layer in self.layers:
            new_features = layer(x)
            x = torch.cat([x, new_features], 1)  # DenseNet: 将特征图拼接
        return x

# 混合网络模型
class ResNetDenseNet(nn.Module):
    def __init__(self, in_channels=3, num_classes=1000):
        super(ResNetDenseNet, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, 64, kernel_size=7, stride=2, padding=3)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = ResBlock(64, 64)
        self.dense_block = DenseBlock(64, growth_rate=32, num_layers=4)  # DenseNet模块
        self.fc = nn.Linear(64 + 32 * 4, num_classes)  # 全连接层
        
    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = self.layer1(x)  # ResNet残差块
        x = self.dense_block(x)  # DenseNet块
        x = F.adaptive_avg_pool2d(x, (1, 1))  # 全局池化
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x

model = ResNetDenseNet().to(device)
model

5.编译及训练模型

python 复制代码
def train(dataloader,model,loss_fn,optimizer):
    size=len(dataloader.dataset)
    num_batches=len(dataloader)
    train_loss,train_acc=0,0
    for X,y in dataloader:
        X,y =X.to(device),y.to(device)
        pred=model(X)
        loss=loss_fn(pred,y)
        #反向传播
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_loss+=loss.item()
        train_acc+=(pred.argmax(1)==y).type(torch.float).sum().item()
    train_acc/=size
    train_loss/=num_batches
    return train_acc,train_loss
def test(dataloader,model,loss_fn):
    size=len(dataloader.dataset)
    num_batches=len(dataloader)
    test_loss,test_acc=0,0
    with torch.no_grad():
        for imgs,target in dataloader:
            imgs,target=imgs.to(device),target.to(device)
            target_pred=model(imgs)
            loss=loss_fn(target_pred,target)
            test_loss+=loss.item()
            test_acc+=(target_pred.argmax(1)==target).type(torch.float).sum().item()
    test_acc/=size
    test_loss/=num_batches
    return test_acc,test_loss

import copy
optimizer  = torch.optim.Adam(model.parameters(), lr= 1e-4)
loss_fn    = nn.CrossEntropyLoss()

import copy
import torch

# Loss function and other initializations
loss_fn = nn.CrossEntropyLoss()
epochs = 20
train_loss = []
train_acc = []
test_loss = []
test_acc = []
best_acc = 0
best_loss = float('inf')  # 初始化最优损失为正无穷
patience = 5  # 设置耐心值,即连续5轮损失不下降时停止训练
patience_counter = 0  # 用于记录损失不下降的轮数

# Training and testing loop
for epoch in range(epochs):
    model.train()
    epoch_train_acc, epoch_train_loss = train(train_dl, model, loss_fn, optimizer)
   
    model.eval()
    epoch_test_acc, epoch_test_loss = test(test_dl, model, loss_fn)

    # Check if test loss improved
    if epoch_test_loss < best_loss:
        best_loss = epoch_test_loss
        best_model = copy.deepcopy(model)
        patience_counter = 0  # 重置耐心计数器
    else:
        patience_counter += 1  # 增加耐心计数器

    # If patience is exceeded, stop training
    if patience_counter >= patience:
        print(f"Stopping early at epoch {epoch+1} due to no improvement in test loss for {patience} epochs.")
        break

    train_acc.append(epoch_train_acc)
    train_loss.append(epoch_train_loss)
    test_acc.append(epoch_test_acc)
    test_loss.append(epoch_test_loss)

    # Print the results for the current epoch
    lr = optimizer.state_dict()['param_groups'][0]['lr']
    template = ('Epoch:{:2d}, Train_acc:{:.1f}%, Train_loss:{:.3f}, Test_acc:{:.1f}%, Test_loss:{:.3f}, Lr:{:.2E}')
    print(template.format(epoch + 1, epoch_train_acc * 100, epoch_train_loss, 
                          epoch_test_acc * 100, epoch_test_loss, lr))

# Save the best model
PATH = './best_model.pth'
torch.save(best_model.state_dict(), PATH)
print('Finished Training')

6.结果可视化

python 复制代码
import matplotlib.pyplot as plt
import warnings
epochs = len(train_acc)
    
# 绘制准确率
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(range(1, epochs + 1), train_acc, label='Train Accuracy', color='blue')
plt.plot(range(1, epochs + 1), test_acc, label='Test Accuracy', color='orange')
plt.title('Training and Test Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(loc='best')
    
# 绘制损失
plt.subplot(1, 2, 2)
plt.plot(range(1, epochs + 1), train_loss, label='Train Loss', color='blue')
plt.plot(range(1, epochs + 1), test_loss, label='Test Loss', color='orange')
plt.title('Training and Test Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend(loc='best')
    
# 显示图形
plt.tight_layout()
plt.show()

​​​​​7.预测图片

python 复制代码
import os,PIL,random,pathlib

data_dir='data/第8天/bird_photos'
data_path=pathlib.Path(data_dir)
data_paths=list(data_path.glob('*'))
classNames=[str(path).split('\\')[3] for path in data_paths]
classNames

print(images[i].shape)

plt.figure(figsize=(10, 5))  
plt.suptitle("Photo Predictions")

for images, labels in test_dl:
    for i in range(min(8, len(images))):  
        ax = plt.subplot(2, 4, i + 1)

        img_tensor = images[i]
        
        # 如果img_tensor不是numpy数组或者没有归一化到[0, 1],先进行转换
        img_array = img_tensor.squeeze().permute(1, 2, 0).cpu().numpy()
        if img_array.min() < 0 or img_array.max() > 1:
            img_array = (img_array - img_array.min()) / (img_array.max() - img_array.min())
        
        plt.imshow(img_array)
        
        model.eval() 
        with torch.no_grad():
            predictions = model(img_tensor.unsqueeze(0).to(device))
            predicted_label = classNames[predictions.argmax(dim=1).item()]
        
        plt.title(predicted_label)
        plt.axis("off")
    break 
plt.show()

​​总结:

1.创新点:

  • 结合 ResNet 和 DenseNet:充分利用两种网络的优点,提取更丰富的特征。

  • 混合特征提取:通过残差块和密集块的组合,增强特征表示能力。

  • 特征重用:通过密集连接实现特征重用,提高参数效率。

  • 全局池化:减少参数量,避免全连接层对输入尺寸的依赖。

  • 灵活性和可扩展性:模型结构灵活,可以根据任务需求进行调整。

  • 模型结构:

    python 复制代码
    import torchsummary as summary
    summary.summary(model, (3, 224, 224))

2.早停机制:

python 复制代码
import copy
import torch

# Loss function and other initializations
loss_fn = nn.CrossEntropyLoss()
epochs = 20
train_loss = []
train_acc = []
test_loss = []
test_acc = []
best_acc = 0
best_loss = float('inf')  # 初始化最优损失为正无穷
patience = 5  # 设置耐心值,即连续5轮损失不下降时停止训练
patience_counter = 0  # 用于记录损失不下降的轮数

# Training and testing loop
for epoch in range(epochs):
    model.train()
    epoch_train_acc, epoch_train_loss = train(train_dl, model, loss_fn, optimizer)
   
    model.eval()
    epoch_test_acc, epoch_test_loss = test(test_dl, model, loss_fn)

    # Check if test loss improved
    if epoch_test_loss < best_loss:
        best_loss = epoch_test_loss
        best_model = copy.deepcopy(model)
        patience_counter = 0  # 重置耐心计数器
    else:
        patience_counter += 1  # 增加耐心计数器

    # If patience is exceeded, stop training
    if patience_counter >= patience:
        print(f"Stopping early at epoch {epoch+1} due to no improvement in test loss for {patience} epochs.")
        break

    train_acc.append(epoch_train_acc)
    train_loss.append(epoch_train_loss)
    test_acc.append(epoch_test_acc)
    test_loss.append(epoch_test_loss)

    # Print the results for the current epoch
    lr = optimizer.state_dict()['param_groups'][0]['lr']
    template = ('Epoch:{:2d}, Train_acc:{:.1f}%, Train_loss:{:.3f}, Test_acc:{:.1f}%, Test_loss:{:.3f}, Lr:{:.2E}')
    print(template.format(epoch + 1, epoch_train_acc * 100, epoch_train_loss, 
                          epoch_test_acc * 100, epoch_test_loss, lr))

# Save the best model
PATH = './best_model.pth'
torch.save(best_model.state_dict(), PATH)
print('Finished Training')

3.流程总结:

1. 代码结构

代码分为以下几个部分:

  1. 检查 GPU:检查是否有可用的 GPU,并将模型和数据加载到 GPU 上。

  2. 查看数据:加载数据集并统计图片数量。

  3. 划分数据集:将数据集划分为训练集和测试集,并创建数据加载器。

  4. 创建模型 :定义了一个结合 ResNetDenseNet 的混合模型。

  5. 编译及训练模型:定义了训练和测试函数,并进行了模型训练。

  6. 结果可视化:绘制训练和测试的准确率和损失曲线。

  7. 预测图片:对测试集中的图片进行预测,并可视化预测结果。


2. 模型设计

模型的核心是一个结合 ResNetDenseNet 的混合网络:

  • ResNet 残差块:通过残差连接解决了梯度消失问题,使得网络可以训练得更深。

  • DenseNet 密集块:通过密集连接实现了特征重用,提高了参数效率。

  • 混合模型 :先通过一个 ResNet 残差块 提取特征,然后通过一个 DenseNet 密集块 进一步提取和重用特征,最后通过全局平均池化和全连接层进行分类。


3. 数据集处理

  • 数据集被划分为训练集和测试集,比例为 80% 和 20%。

  • 数据加载器使用了 torch.utils.data.DataLoader,支持批量加载和数据打乱。

  • 数据预处理包括调整大小 (Resize) 和转换为张量 (ToTensor)。


4. 训练和测试

  • 训练函数:计算损失并进行反向传播,更新模型参数。

  • 测试函数:在测试集上评估模型性能,计算准确率和损失。

  • 早停机制:如果测试损失在连续 5 轮训练中没有下降,则提前停止训练。

  • 模型保存:保存测试损失最小的模型。


5. 结果可视化

  • 绘制了训练和测试的准确率和损失曲线,直观展示了模型的性能。

  • 准确率曲线反映了模型在训练集和测试集上的分类能力。

  • 损失曲线反映了模型在训练集和测试集上的优化情况。


6. 预测图片

  • 对测试集中的图片进行预测,并可视化预测结果。

  • 预测结果以图片标题的形式显示,便于观察模型的分类效果。

相关推荐
Felaim27 分钟前
基于模仿学习(IL)的端到端自动驾驶发展路径
人工智能·深度学习·自动驾驶
量子-Alex1 小时前
【目标检测】【PANet】Path Aggregation Network for Instance Segmentation
人工智能·目标检测·计算机视觉
lihuayong1 小时前
计算机视觉:经典数据格式(VOC、YOLO、COCO)解析与转换(附代码)
人工智能·yolo·目标检测·计算机视觉·目标跟踪·coco·数据标注
thinkMoreAndDoMore1 小时前
深度学习(3)-TensorFlow入门(常数张量和变量)
开发语言·人工智能·python
神舟之光1 小时前
动手学深度学习2025.2.23-预备知识之-线性代数
人工智能·深度学习·线性代数
wapicn991 小时前
‌挖数据平台对接DeepSeek推出一键云端部署功能:API接口驱动金融、汽车等行业智能化升级
java·人工智能·python·金融·汽车·php
不爱学习的YY酱2 小时前
MusicGPT的本地化部署与远程调用:让你的Windows电脑成为AI音乐工作站
人工智能·windows
kakaZhui2 小时前
【多模态大模型】端侧语音大模型minicpm-o:手机上的 GPT-4o 级多模态大模型
人工智能·chatgpt·aigc·llama
艾思科蓝 AiScholar2 小时前
【SPIE出版,见刊快速,EI检索稳定,浙江水利水电学院主办】2025年物理学与量子计算国际学术会议(ICPQC 2025)
图像处理·人工智能·信息可视化·自然语言处理·数据分析·力扣·量子计算
zidea2 小时前
AI 长篇文本生成面临的挑战,以及解决方案(1)
人工智能·python·deepseek