12.12深度学习_CNN_项目实战

基于CNN的AnimalTriClassifier

关于项目实现的文档说明书,三个要素:数据、模型、训练

1、项目简介

关于项目的基本介绍。

本项目实现的是对猫科动物的划分,划分的物种有猫、狗、野生三种分类,属于小颗粒度分类

  • 大颗粒度分类:以物种作为分类,比如飞机、青蛙、狗、猫、马、鹿等。
  • 实体颗粒度分类:具体到具体的人,比如指纹识别、人脸识别等具体的个体,具体的实体

1.1 项目名称

​ 基于CNN的AnimalTriClassifie

1.2 项目简介

​ 本项目旨在使用卷积神经网络(CNN)进行图像分类任务。我们将使用 LeNet5(衍生) 模型来训练一个可以区分猫、狗和野生动物的分类器。项目中包括了数据预处理、模型训练、测试、验证以及单张图片推理等功能。

2、数据

公开的数据集

2.1 公开数据集

Animal Faces

2.3 数据增强

提升模型的泛化能力和鲁棒性。

python 复制代码
    # 数据预处理和加载
    transform = transforms.Compose([
        # transforms.RandomVerticalFlip(),
        # transforms.RandomRotation(degrees=(0, 180)),
        # transforms.RandomHorizontalFlip(),  # 随机水平翻转
        # transforms.RandomRotation(10),
        # transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  
        transforms.ToTensor(),
        transforms.Resize((64, 64)),
        transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225]) ,
        transforms.RandomRotation(degrees=(0, 180)),
        transforms.RandomInvert(), # 随机反转变换,
        transforms.RandomAffine(degrees=(30, 70), translate=(0.1, 0.3), scale=(0.5, 0.75)),
        ])

3. 神经网络

手写LeNets5

python 复制代码
import torch
import torch.nn as nn
import torch.nn.functional as F

class LeNet5(nn.Module):
    def __init__(self, num_classes=3):
        super(LeNet5, self).__init__()
        # 第一层卷积层,输入通道为3,输出通道为16,卷积核大小为5x5,步幅为1,填充为2
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=5, stride=1, padding=2),  # 输出大小: (64 + 2*2 - 5)/1 + 1 = 64
            nn.ReLU(),  # 使用 ReLU 激活函数
            nn.AvgPool2d(kernel_size=2, stride=2)  # 输出大小: 64 / 2 = 32
        )
        # 第二层卷积层,输入通道为16,输出通道为32,卷积核大小为5x5,步幅为1,填充为2
        self.layer2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2),  # 输出大小: (32 + 2*2 - 5)/1 + 1 = 32
            nn.ReLU(),  # 使用 ReLU 激活函数
            nn.AvgPool2d(kernel_size=2, stride=2)  # 输出大小: 32 / 2 = 16
        )
        # 第三层卷积层,输入通道为32,输出通道为64,卷积核大小为5x5,步幅为1,填充为2
        self.layer3 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=5, stride=1, padding=2),  # 输出大小: (16 + 2*2 - 5)/1 + 1 = 16
            nn.ReLU(),  # 使用 ReLU 激活函数
            nn.AvgPool2d(kernel_size=2, stride=2)  # 输出大小: 16 / 2 = 8
        )
        # 全连接层
        self.fc1 = nn.Linear(64 * 8 * 8, 120)  # 输入大小: 64 * 8 * 8
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, num_classes)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = out.reshape(out.size(0), -1)  # 展平
        out = F.relu(self.fc1(out))  # 使用 ReLU 激活函数
        out = F.relu(self.fc2(out))  # 使用 ReLU 激活函数
        out = self.fc3(out)
        return out

# 创建模型实例
model = LeNet5(num_classes=3)

4. 模型训练

python 复制代码
def train():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # 数据预处理和加载
    transform = transforms.Compose([
        transforms.RandomVerticalFlip(),
        # transforms.RandomRotation(degrees=(0, 180)),
        # transforms.RandomHorizontalFlip(),  # 随机水平翻转
        # transforms.RandomRotation(10),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  
        transforms.ToTensor(),
        transforms.Resize((64, 64)),
        transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225]) ,
        transforms.RandomRotation(degrees=(0, 180)),
        transforms.RandomInvert(), # 随机反转变换,
        transforms.RandomAffine(degrees=(30, 70), translate=(0.1, 0.3), scale=(0.5, 0.75)),
        ])

    # 检查数据集路径是否存在
    train_path = os.path.join(data_path, 'train')
    if not os.path.exists(train_path):
        raise FileNotFoundError(f"数据集路径不存在: {train_path}")

    # 加载整个数据集
    full_dataset = ImageFolder(root=train_path, transform=transform)
    print("分类列表:", full_dataset.classes)
    print("分类和索引的对应关系:", full_dataset.class_to_idx)

    # 分割数据集为训练集和测试集
    train_ratio = 0.7
    train_size = int(train_ratio * len(full_dataset))
    test_size = len(full_dataset) - train_size
    train_dataset, test_dataset = random_split(full_dataset, [train_size, test_size])

    # 模型准备
    net = LeNet5(num_classes=len(full_dataset.classes)).to(device)  # 使用 LeNet5 模型
    state_dict = torch.load(pth_path)
    net.load_state_dict(state_dict)
    net.train()

    # 保存网络结构到tensorboard
    writer.add_graph(net, torch.randn(1, 3, 64, 64).to(device))  # 添加模型的计算图

    # 训练设置
    epochs = 10
    batch_size = 64
    criterion = nn.CrossEntropyLoss(reduction="sum")
    optimizer = optim.Adam(net.parameters(), lr=0.0001)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    for epoch in range(epochs):
        start_time = time.time()
        accuracy = 0
        total_loss = 0

        # 使用 tqdm 显示进度条
        for i, (x, y) in enumerate(tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")):
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            yhat = net(x)
            loss = criterion(yhat, y)
            loss.backward()
            optimizer.step()

            accuracy += torch.sum(torch.argmax(yhat, dim=1) == y).item()
            total_loss += loss.item()

            # 每 1 个批次保存一次图像
            if i % 1 == 0:
                img_grid = vutils.make_grid(x, normalize=True, nrow=8)  # 生成图像网格
                writer.add_image(f"r_m_{epoch}_{i * 1}", img_grid, epoch * len(train_dataset) + i)

        print(
            f"Epoch {epoch+1}/{epochs} - Time: {time.time() - start_time:.2f}s, Accuracy: {accuracy / len(train_dataset):.4f}, Loss: {total_loss / len(train_dataset):.4f}")
        writer.add_scalar("Loss/train", total_loss / len(train_dataset), epoch)
        writer.add_scalar("Accuracy/train", accuracy / len(train_dataset), epoch)

    # 测试模型
    test_accuracy = test(net, test_loader, device)
    print(f"Test Accuracy: {test_accuracy:.4f}")

    # 生成当前时间戳作为文件名
    timestamp = time.strftime("%Y%m%d-%H%M%S")  # 格式化时间戳为 "年月日-时分秒"
    pth_filename = f"model_{timestamp}.pth"  # 生成文件名
    pth_filepath = os.path.join(prepare_path, pth_filename)  # 拼接完整路径

    # 保存模型权重到 prepare 文件夹
    torch.save(net.state_dict(), pth_filepath)
    print(f"Model saved as: {pth_filename} in prepare folder")

    # 更新 last_model.pth 文件
    last_model_path = os.path.join(weight_path, "last_model.pth")
    torch.save(net.state_dict(), last_model_path)
    print(f"Updated last_model.pth")

4.1 训练参数

轮次:ecpochs = 10

批次:batch_size=64

4.2 损失函数

交叉熵损失

4.3 优化器

optim.Adam()

4.4 训练过程可视化

使用tensorBoard


5. 模型验证

验证我们的模型的鲁棒性和泛化能力

python 复制代码
def inference():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # 读取验证集 val
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize((64, 64)),
    ])
    val_path = os.path.join(vali_path)

    full_dataset = ImageFolder(root=val_path, transform=transform)
    print("分类列表:", full_dataset.classes)
    print("分类和索引的对应关系:", full_dataset.class_to_idx)

    # 网络准备
    net = LeNet5(num_classes=3).to(device)  # 使用 LeNet5 模型
    state_dict = torch.load(pth_path, map_location=device)
    net.load_state_dict(state_dict)
    net.to(device)
    net.eval()

    val_loader = DataLoader(full_dataset, batch_size=8, shuffle=False)
    # 验证准确率
    acc = 0
    total = 0
    with torch.no_grad():
        for i, (x, y) in enumerate(tqdm(val_loader, desc=f"Validation")):
            x, y = x.to(device), y.to(device)
            yhat = net(x)
            acc += torch.sum(torch.argmax(yhat, dim=1) == y).item()
            total += y.size(0)

    val_accuracy = acc / total
    print(f"Validation Accuracy: {val_accuracy:.4f}")

5.1 验证过程数据化

生成csv:

5.2 指标报表

外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传

5.3 混淆矩阵

可视化

6. 模型优化

6.1 增加网络深度

让网络变得更好

python 复制代码
# 通道注意力模块
class ChannelAttentionModule(nn.Module):
    def __init__(self, c, r=16):
        super(ChannelAttentionModule, self).__init__()
        self.maxpool = nn.AdaptiveMaxPool2d(1)
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.sharedMLP = nn.Sequential(
            nn.Linear(c, c // r),
            nn.ReLU(),
            nn.Linear(c // r, c)
        )
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        maxpool_out = self.maxpool(x).view(x.size(0), -1)
        avgpool_out = self.avgpool(x).view(x.size(0), -1)
        max_out = self.sharedMLP(maxpool_out)
        avg_out = self.sharedMLP(avgpool_out)
        out = self.sigmoid((max_out + avg_out).unsqueeze(2).unsqueeze(3))
        return x * out

# 空间注意力模块
class SpatialAttentionModule(nn.Module):
    def __init__(self):
        super(SpatialAttentionModule, self).__init__()
        self.conv = nn.Conv2d(2, 1, kernel_size=7, stride=1, padding=3)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        maxpool_out, _ = torch.max(x, dim=1, keepdim=True)
        avgpool_out = torch.mean(x, dim=1, keepdim=True)
        pool_out = torch.cat([maxpool_out, avgpool_out], dim=1)
        out = self.conv(pool_out)
        return x * self.sigmoid(out)

# 混合注意力模块
class CBAM(nn.Module):
    def __init__(self, c, r=16):
        super(CBAM, self).__init__()
        self.cam = ChannelAttentionModule(c, r)
        self.sam = SpatialAttentionModule()

    def forward(self, x):
        x = self.cam(x)
        x = self.sam(x)
        return x

6.2 继续训练

备份和保存last

python 复制代码
    # 保存模型权重到 prepare 文件夹
    torch.save(net.state_dict(), pth_filepath)
    print(f"Model saved as: {pth_filename} in prepare folder")

    # 更新 last_model.pth 文件
    last_model_path = os.path.join(weight_path, "last_model.pth")
    torch.save(net.state_dict(), last_model_path)
    print(f"Updated last_model.pth")

6.3 预训练和迁移学习

让网络变得更好

python 复制代码
    # 模型准备
    net = LeNet5(num_classes=len(full_dataset.classes)).to(device)  # 使用 LeNet5 模型
    state_dict = torch.load(pth_path)
    net.load_state_dict(state_dict)
    net.train()

7. 模型应用

推理工作

7.1 图片处理

opencv的操作

python 复制代码
def imgread(img_path):
    imgdata = cv2.imread(img_path)
    if imgdata is None:
        raise ValueError(f"Failed to load image at path: {img_path}")
    imgdata = cv2.cvtColor(imgdata, cv2.COLOR_BGR2RGB)
    transformdata = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((64, 64)),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2471, 0.2435, 0.2616))
    ])
    imgdata = transformdata(imgdata)
    imgdata = imgdata.unsqueeze(0)
    return imgdata

7.2 模型推理

python 复制代码
def inference_one():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    net = LeNet5(num_classes=3).to(device)  # 使用 LeNet5 模型
    state_dict = torch.load(pth_path, map_location=device)
    net.load_state_dict(state_dict)
    net.to(device)
    net.eval()


    # 获取要推理的图片
    img_path = os.path.join(r"D:\Desktop\cat1.png")
    imgdata = imgread(img_path).to(device)

    # 使用模型进行推理
    with torch.no_grad():
        out = net(imgdata)
        probabilities = nn.Softmax(dim=1)(out)
        predicted_class_idx = torch.argmax(probabilities, dim=1).item()
        classlabels = ['猫', '狗', '野生']
        print(f"Predicted class: {classlabels[predicted_class_idx]}")
        print(f"Probabilities: {probabilities[0].tolist()}")

7.3 类别显示

python 复制代码
Predicted class: 猫
Probabilities: [0.9819951057434082, 0.01752881519496441, 0.0004761434975080192]

8. 模型移植

使用ONNX

8.1 导出ONNX

8.2 使用ONNX推理

python 复制代码
def inference_one():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    net = LeNet5(num_classes=3).to(device)  # 使用 LeNet5 模型
    state_dict = torch.load(pth_path, map_location=device)
    net.load_state_dict(state_dict)
    net.to(device)
    net.eval()

    # 获取要推理的图片
    img_path = os.path.join(r"D:\Desktop\cat1.png")
    imgdata = imgread(img_path).to(device)

    # 使用模型进行推理
    with torch.no_grad():
        out = net(imgdata)
        probabilities = nn.Softmax(dim=1)(out)
        predicted_class_idx = torch.argmax(probabilities, dim=1).item()
        classlabels = ['猫', '狗', '野生']
        print(f"Predicted class: {classlabels[predicted_class_idx]}")
        print(f"Probabilities: {probabilities[0].tolist()}")

    # 导出 ONNX 模型
    onnx_path = os.path.join(current_path, "LeNet5.onnx")
    torch.onnx.export(
        net,  # 模型
        imgdata,  # 输入张量
        onnx_path,  # 导出路径
        export_params=True,  # 导出模型参数
        opset_version=11,  # ONNX 操作集版本
        do_constant_folding=True,  # 是否执行常量折叠优化
        input_names=["input"],  # 输入节点名称
        output_names=["output"],  # 输出节点名称
    )
    print(f"ONNX 模型已导出到: {onnx_path}")
if __name__ =="__main__":
    inference_one()
    # print(onnx.__version__)

推测:

python 复制代码
# 加载 ONNX 模型
onnx_path = os.path.join(current_path, "LeNet5.onnx")
session = ort.InferenceSession(onnx_path)

# 获取输入和输出名称
input_name = session.get_inputs()[0].name
output_name = session.get_outputs()[0].name

# 读取图像并进行推理
img_path = os.path.join(r"D:\Desktop\cat1.png")
imgdata = imgread(img_path).numpy()  # 转换为 NumPy 数组

# 推理
result = session.run([output_name], {input_name: imgdata})
probabilities = result[0]
predicted_class_idx = np.argmax(probabilities, axis=1).item()
classlabels = ['猫', '狗', '野生']
print(f"Predicted class: {classlabels[predicted_class_idx]}")
print(f"Probabilities: {probabilities[0].tolist()}")

9. 项目总结

9.1 遇到的问题及解决办法

问题

  • 导出onnx时命名文件为"onnx.py"会与onnx库冲突,报错为:

python 复制代码
File "d:\Desktop\计算机视觉\06day\onnx.py", line 7, in <module>
  import onnx
File "d:\Desktop\计算机视觉\06day\onnx.py", line 8, in <module>
  print(onnx.__version__)
AttributeError: partially initialized module 'onnx' has no attribute '__version__' (most likely due to a circular import)

解决办法:

9.2 收获

​ 在本轮项目中,我切身体会了深度学习的整个流程,从数据集的准备、模型的设计与训练,到模型的验证与优化,每一步都充满了挑战与收获。以下是我在项目中的主要收获:

1. 对深度学习流程的深入理解

​ 通过本次项目,我对深度学习的整个流程有了更加清晰的认识。从数据预处理、模型设计、训练调参,到模型验证与优化,每一步都需要细致的思考和调试。尤其是在数据增强和模型优化阶段,我深刻体会到了数据和模型对最终结果的影响。

2. 数据增强的重要性

​ 数据增强是提升模型泛化能力的重要手段。通过本次项目,我学会了如何使用 PyTorch 提供的各种数据增强方法(如随机旋转、随机反转、颜色抖动等),并通过 TensorBoard 可视化了增强后的数据分布。数据增强不仅能够增加数据的多样性,还能有效防止模型过拟合。

3. 模型优化的技巧

​ 在模型优化阶段,我尝试了多种方法来提升模型的性能。例如,通过增加网络深度(引入 CBAM 模块),提升了模型的表达能力;通过迁移学习,利用预训练模型的权重加速训练过程。这些优化技巧让我对模型的设计与优化有了更深的理解,但是在后续我并没有用CBAM,因为手写的LeNet5模型并不能很好的突出他的特征。

代码合集:

python 复制代码
import os
import time
import cv2
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.utils as vutils  # 用于生成图像网格
from torch.utils.data import DataLoader, random_split
from tqdm import tqdm  # 导入 tqdm
from torch.utils.tensorboard import SummaryWriter
from torchvision.datasets import ImageFolder
from LeNet5 import LeNet5

# 路径兼容处理
current_path = os.path.dirname(__file__)  # 本地路径
data_path = os.path.join(current_path, "afhq")  # 数据集路径
vali_path = os.path.join(current_path, 'afhq')
weight_path = os.path.join(current_path, "runs", "weights")  # 权重路径
prepare_path = os.path.join(current_path, "runs", "prepare")  # 备份路径
pth_path = os.path.join(weight_path, "last_model.pth")  # 最后的一次权重路径
excel_path = os.path.join(current_path, "metrics", "metrics.xlsx")

# 生成唯一的日志目录
log_dir = os.path.join(current_path, "tboard", time.strftime("%Y%m%d-%H%M%S"))  # 使用时间戳生成唯一目录
writer = SummaryWriter(log_dir=log_dir)

# 关闭科学计数法打印
torch.set_printoptions(sci_mode=False)
np.set_printoptions(suppress=True)


def train():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # 数据预处理和加载
    transform = transforms.Compose([
        transforms.RandomVerticalFlip(),
        # transforms.RandomRotation(degrees=(0, 180)),
        # transforms.RandomHorizontalFlip(),  # 随机水平翻转
        # transforms.RandomRotation(10),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  
        transforms.ToTensor(),
        transforms.Resize((64, 64)),
        transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225]) ,
        transforms.RandomRotation(degrees=(0, 180)),
        transforms.RandomInvert(), # 随机反转变换,
        transforms.RandomAffine(degrees=(30, 70), translate=(0.1, 0.3), scale=(0.5, 0.75)),
        ])

    # 检查数据集路径是否存在
    train_path = os.path.join(data_path, 'train')
    if not os.path.exists(train_path):
        raise FileNotFoundError(f"数据集路径不存在: {train_path}")

    # 加载整个数据集
    full_dataset = ImageFolder(root=train_path, transform=transform)
    print("分类列表:", full_dataset.classes)
    print("分类和索引的对应关系:", full_dataset.class_to_idx)

    # 分割数据集为训练集和测试集
    train_ratio = 0.7
    train_size = int(train_ratio * len(full_dataset))
    test_size = len(full_dataset) - train_size
    train_dataset, test_dataset = random_split(full_dataset, [train_size, test_size])

    # 模型准备
    net = LeNet5(num_classes=len(full_dataset.classes)).to(device)  # 使用 LeNet5 模型
    state_dict = torch.load(pth_path)
    net.load_state_dict(state_dict)
    net.train()

    # 保存网络结构到tensorboard
    writer.add_graph(net, torch.randn(1, 3, 64, 64).to(device))  # 添加模型的计算图

    # 训练设置
    epochs = 10
    batch_size = 64
    criterion = nn.CrossEntropyLoss(reduction="sum")
    optimizer = optim.Adam(net.parameters(), lr=0.0001)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    for epoch in range(epochs):
        start_time = time.time()
        accuracy = 0
        total_loss = 0

        # 使用 tqdm 显示进度条
        for i, (x, y) in enumerate(tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")):
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            yhat = net(x)
            loss = criterion(yhat, y)
            loss.backward()
            optimizer.step()

            accuracy += torch.sum(torch.argmax(yhat, dim=1) == y).item()
            total_loss += loss.item()

            # 每 1 个批次保存一次图像
            if i % 1 == 0:
                img_grid = vutils.make_grid(x, normalize=True, nrow=8)  # 生成图像网格
                writer.add_image(f"r_m_{epoch}_{i * 1}", img_grid, epoch * len(train_dataset) + i)

        print(
            f"Epoch {epoch+1}/{epochs} - Time: {time.time() - start_time:.2f}s, Accuracy: {accuracy / len(train_dataset):.4f}, Loss: {total_loss / len(train_dataset):.4f}")
        writer.add_scalar("Loss/train", total_loss / len(train_dataset), epoch)
        writer.add_scalar("Accuracy/train", accuracy / len(train_dataset), epoch)

    # 测试模型
    test_accuracy = test(net, test_loader, device)
    print(f"Test Accuracy: {test_accuracy:.4f}")

    # 生成当前时间戳作为文件名
    timestamp = time.strftime("%Y%m%d-%H%M%S")  # 格式化时间戳为 "年月日-时分秒"
    pth_filename = f"model_{timestamp}.pth"  # 生成文件名
    pth_filepath = os.path.join(prepare_path, pth_filename)  # 拼接完整路径

    # 保存模型权重到 prepare 文件夹
    torch.save(net.state_dict(), pth_filepath)
    print(f"Model saved as: {pth_filename} in prepare folder")

    # 更新 last_model.pth 文件
    last_model_path = os.path.join(weight_path, "last_model.pth")
    torch.save(net.state_dict(), last_model_path)
    print(f"Updated last_model.pth")


def test(model, test_loader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for x, y in test_loader:
            x, y = x.to(device), y.to(device)
            outputs = model(x)
            _, predicted = torch.max(outputs.data, 1)
            total += y.size(0)
            correct += (predicted == y).sum().item()
    return correct / total


def imgread(img_path):
    imgdata = cv2.imread(img_path)
    if imgdata is None:
        raise ValueError(f"Failed to load image at path: {img_path}")
    imgdata = cv2.cvtColor(imgdata, cv2.COLOR_BGR2RGB)
    transformdata = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((64, 64)),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2471, 0.2435, 0.2616))
    ])
    imgdata = transformdata(imgdata)
    imgdata = imgdata.unsqueeze(0)
    return imgdata


def inference():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # 读取验证集 val
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize((64, 64)),
    ])
    val_path = os.path.join(vali_path)

    full_dataset = ImageFolder(root=val_path, transform=transform)
    print("分类列表:", full_dataset.classes)
    print("分类和索引的对应关系:", full_dataset.class_to_idx)

    # 网络准备
    net = LeNet5(num_classes=3).to(device)  # 使用 LeNet5 模型
    state_dict = torch.load(pth_path, map_location=device)
    net.load_state_dict(state_dict)
    net.to(device)
    net.eval()

    val_loader = DataLoader(full_dataset, batch_size=8, shuffle=False)
    # 验证准确率
    acc = 0
    total = 0
    with torch.no_grad():
        for i, (x, y) in enumerate(tqdm(val_loader, desc=f"Validation")):
            x, y = x.to(device), y.to(device)
            yhat = net(x)
            acc += torch.sum(torch.argmax(yhat, dim=1) == y).item()
            total += y.size(0)

    val_accuracy = acc / total
    print(f"Validation Accuracy: {val_accuracy:.4f}")

def inference_one():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    net = LeNet5(num_classes=3).to(device)  # 使用 LeNet5 模型
    state_dict = torch.load(pth_path, map_location=device)
    net.load_state_dict(state_dict)
    net.to(device)
    net.eval()


    # 获取要推理的图片
    img_path = os.path.join(r"D:\Desktop\cat1.png")
    imgdata = imgread(img_path).to(device)

    # 使用模型进行推理
    with torch.no_grad():
        out = net(imgdata)
        probabilities = nn.Softmax(dim=1)(out)
        predicted_class_idx = torch.argmax(probabilities, dim=1).item()
        classlabels = ['猫', '狗', '野生']
        print(f"Predicted class: {classlabels[predicted_class_idx]}")
        print(f"Probabilities: {probabilities[0].tolist()}")

if __name__ == "__main__":
    # train()
    # inference()
    inference_one()
python 复制代码
# 测试模型
# 保存模型
# 训练过程可视化
# test.py
import os 
from torch.utils.data import DataLoader
from torchvision import  transforms
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from LeNet5_ import LeNet5
import time
from sklearn.metrics import *
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
import torch
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
from tqdm import tqdm  # 导入 tqdm
from torch.utils.tensorboard import SummaryWriter
from torchvision.datasets import ImageFolder
from LeNet5 import LeNet5

# 路径兼容处理
current_path = os.path.dirname(__file__)  # 本地路径
val_path = os.path.join(current_path, 'afhq')
csv_path = os.path.join(current_path, 'metrics')
weight_path = os.path.join(current_path, "runs", "weights")  # 权重路径
pth_path = os.path.join(weight_path, "last_model.pth")  # 最后的一次权重路径


# 生成唯一的日志目录
log_dir = os.path.join(current_path, "tboard", time.strftime("%Y%m%d-%H%M%S"))  # 使用时间戳生成唯一目录
writer = SummaryWriter(log_dir=log_dir)

# 关闭科学计数法打印
torch.set_printoptions(sci_mode=False)
np.set_printoptions(suppress=True)
#这里定义测试方法 获取准确率
def test():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize((64, 64)),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    # 使用外部的 val_path 变量
    full_dataset = ImageFolder(root=val_path, transform=transform)

    # 网络准备
    net = LeNet5(num_classes=3).to(device)
    state_dict = torch.load(pth_path, map_location=device)
    net.load_state_dict(state_dict)
    net.to(device)
    net.eval()

    val_loader = DataLoader(full_dataset, batch_size=8, shuffle=False)
    acc = 0 
    total = 0 
    with torch.no_grad():
        for i, (x, y) in enumerate(tqdm(val_loader, desc=f"Validation")):
            x, y = x.to(device), y.to(device)
            yhat = net(x)
            acc += torch.sum(torch.argmax(yhat, dim=1) == y).item()
            total += y.size(0)
    val_accuracy = acc / total
    print(f"Validation Accuracy: {val_accuracy:.4f}")

    # 验证过程数据记录表格
    excel_path = os.path.join(current_path, r"./metrics", "validation_metrics.csv")
    
#混淆矩阵 然后pd.tocsv() 测试数据数据化
def test_csv():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize((64, 64))
    ])
    model = LeNet5(num_classes=3)  # 确保 num_classes 参数正确
    state_dict = torch.load(pth_path)

    model.load_state_dict(state_dict)
    model.to(device)
    model.eval()

    current_path = os.path.dirname(__file__)
    val_data = ImageFolder(root=val_path, transform=transform)
    val_loader = DataLoader(val_data, batch_size=64, shuffle=False)  # 使用 DataLoader 进行批量加载

    acc = 0
    total_csv_data = []  # 使用列表来存储数据

    with torch.no_grad():
        for x, y in tqdm(val_loader, desc=f"Validation"):  # 通过 DataLoader 进行迭代
            x, y = x.to(device), y.to(device)
            y_pred = model(x)
            acc += torch.sum(torch.argmax(y_pred, dim=1) == y).item()

            # 获取预测结果、真实标签和概率分布
            pred_csv_data = torch.argmax(y_pred, dim=1).unsqueeze(dim=1).cpu().detach().numpy()
            true_csv_data = y.cpu().unsqueeze(dim=1).detach().numpy()
            csv_data = y_pred.cpu().detach().numpy()

            # 将预测结果、真实标签和概率分布拼接
            batch_csv_data = np.concatenate([csv_data, pred_csv_data, true_csv_data], axis=1)
            total_csv_data.append(batch_csv_data)  # 将当前批次的数据追加到列表中

    # 将所有批次的数据拼接成一个大的 numpy 数组
    total_csv_data = np.vstack(total_csv_data)

    # 打印验证数据集的准确率
    print("验证数据集的准确率:%.4f" % (acc / len(val_data)))

    # 保存为 CSV 文件
    columns = ["猫", "狗", "野生", "y_pred", "y_true"]
    df = pd.DataFrame(total_csv_data, columns=columns)
    df.to_csv(os.path.join(csv_path, "validation_results1.csv"), index=False)




# 报表化
def test_report():
    # 读取 CSV 文件
    csv_data = pd.read_csv(os.path.join(csv_path, "validation_results1.csv"))

    # 提取真实标签和预测标签
    y_true = csv_data["y_true"].values.astype(int)
    y_pred = csv_data["y_pred"].values.astype(int)

    # 获取标签名称
    class_names = ["猫", "狗", "野生"]

    # 计算混淆矩阵
    matrix = confusion_matrix(y_true, y_pred)

    # 计算准确率、召回率、F1 分数
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average=None)
    recall = recall_score(y_true, y_pred, average=None)
    f1 = f1_score(y_true, y_pred, average=None)

    # 打印报表
    print("=" * 50)
    print("模型性能报表")
    print("=" * 50)
    print(f"准确率 (Accuracy): {accuracy:.4f}")
    print("\n分类报告:")
    print(classification_report(y_true, y_pred, target_names=class_names))
    print("\n混淆矩阵:")
    print(matrix)
    print("=" * 50)

    # 将报表保存为 CSV 文件
    report_data = {
        "指标": ["准确率 (Accuracy)", "猫 Precision", "狗 Precision", "野生 Precision",
                 "猫 Recall", "狗 Recall", "野生 Recall",
                 "猫 F1 Score", "狗 F1 Score", "野生 F1 Score"],
        "值": [accuracy, *precision, *recall, *f1]
    }
    report_df = pd.DataFrame(report_data)
    report_df.to_csv(os.path.join(csv_path, "model_report.csv"), index=False)
    print(f"报表已保存为: {os.path.join(csv_path, 'model_report.csv')}")


# 可视化 用matplotlib 测试数据可视化
def test_visual():
    csv_data = pd.read_csv(os.path.join(csv_path, "validation_results1.csv"))
    y_true = csv_data["y_true"].values  # 真实标签
    y_pred = csv_data["y_pred"].values  # 预测标签

    # 确保 y_true 和 y_pred 是数值类型
    y_true = y_true.astype(int)
    y_pred = y_pred.astype(int)

    plt.rcParams['font.sans-serif'] = ['SimHei']  # 使用 SimHei 字体
    plt.rcParams['axes.unicode_minus'] = False  # 解决负号显示问题

    # 计算混淆矩阵
    matrix = confusion_matrix(y_true, y_pred)
    print(matrix)

    # 获取标签名称
    class_names = ["猫", "狗", "野生"]

    # 绘制混淆矩阵
    plt.matshow(matrix, cmap=plt.cm.Greens)
    plt.colorbar()

    # 显示具体的数字
    for i in range(len(matrix)):
        for j in range(len(matrix)):
            plt.annotate(
                matrix[i, j],
                xy=(j, i),
                horizontalalignment="center",
                verticalalignment="center",
            )

    # 美化
    plt.xlabel("Predicted labels")
    plt.ylabel("True labels")
    plt.xticks(range(len(class_names)), class_names, rotation=45)
    plt.yticks(range(len(class_names)), class_names)
    plt.title("训练结果混淆矩阵视图")

    plt.show()

if __name__ == '__main__':
    #test()
    #test_csv()
    #test_report()
    test_visual()
相关推荐
小陈phd10 分钟前
深度学习之超分辨率算法——SRGAN
python·深度学习·神经网络·算法
qq_4230195510 分钟前
模型优化之知识蒸馏
深度学习·神经网络·机器学习
科技资讯快报23 分钟前
诸葛智能CTO文革:放大数据价值,释放金融营销原动力
大数据·人工智能
AI2AGI24 分钟前
天天 AI-241220:今日热点-OpenAI整大活!ChatGPT新增电话功能,全民AGI要来了
人工智能·ai·chatgpt·aigc·agi
Qingniu0143 分钟前
汽车气候控制传感器
人工智能·科技·单片机·嵌入式硬件·汽车·电脑·散热风扇
十年一梦实验室1 小时前
【C++】sophus : se2.hpp 提供了SE(2)群的数学操作和Lie群的基本操作 (十五)
开发语言·c++·人工智能·算法·机器学习
孤单网愈云1 小时前
12.8深度学习_经典神经网络_GoogleNet
人工智能·深度学习·神经网络
Elastic 中国社区官方博客1 小时前
带有 Elasticsearch 和 Langchain 的 Agentic RAG
大数据·人工智能·elasticsearch·搜索引擎·ai·langchain
Srlua1 小时前
自动图像标注可体验
人工智能·python