基于深度学习的猫狗识别系统【深度学习课设】

🏆 作者简介：席万里

⚡ 个人网站：https://dahua.bloggo.chat/

✍️ 一名后端开发小趴菜，同时略懂Vue与React前端技术，也了解一点微信小程序开发。

🍻 对计算机充满兴趣，愿意并且希望学习更多的技术，接触更多的大神，提高自己的编程思维和解决问题的能力。

文章目录

作品演示
- 代码
- - 1.train_and_test.py
  - 2、view.py（可视化界面）

作品演示

代码

采用模型VGG16、ALEXNet、Resnet18，训练测试。python版本3.10.11 。

数据集：和鲸社区猫狗图像数据集。https://www.heywhale.com/mw/project/631aedb893f47b16cb062b2a

1.train_and_test.py

py 复制代码

# 导入 PyTorch 库和相关模块
import torch                                 # PyTorch 的核心库，提供张量计算和自动求导功能
import torchvision.transforms as transforms  # 提供图像数据增强和预处理的功能
from torch.utils.data import Dataset         # 用于自定义数据集
from torch import nn, optim                  # nn 用于构建神经网络，optim 用于优化算法
from PIL import Image                        # 用于加载和处理图像文件
import time                                  # 用于记录训练时长和其他时间相关操作
import torchvision.models as models          # 包含一些预训练模型，如 AlexNet、ResNet 等
import os                                    # 用于与操作系统交互，如文件路径处理、创建目录等
import matplotlib.pyplot as plt              # 用于绘制图表，如准确率曲线、损失曲线等
from tqdm import tqdm                        # 用于显示训练过程中的进度条
from sklearn.metrics import confusion_matrix # 用于计算混淆矩阵，评估分类性能
import seaborn as sns                        # 用于绘制混淆矩阵的热图，提供美观的图表风格


device = torch.device('cpu')

# 数据预处理：缩放到224x224大小，并转换为Tensor
transformer = transforms.Compose([transforms.Resize((224, 224)), transforms.ToTensor()])

# 加载训练数据集
DogTrainImageList = os.listdir(r"./catsdogs/train/Dog")  # 加载训练集中的狗图片列表
CatTrainImageList = os.listdir(r"./catsdogs/train/Cat")  # 加载训练集中的猫图片列表
train_label = []  # 存储训练数据的标签
train_data = []  # 存储训练数据的图像数据
dog_train_data_dir = r"./catsdogs/train/Dog/"  # 狗的图片目录路径
cat_train_data_dir = r"./catsdogs/train/Cat/"  # 猫的图片目录路径

# 将狗的图片加载进训练数据集
for i in range(len(DogTrainImageList)):
    train_label.append(1)  # 狗的标签为1
    dog_img = Image.open(dog_train_data_dir + DogTrainImageList[i]).convert('RGB')  # 打开图片并转换为RGB
    dog_img = transformer(dog_img)  # 进行预处理
    train_data.append(dog_img)  # 添加到训练数据

# 将猫的图片加载进训练数据集
for i in range(len(CatTrainImageList)):
    train_label.append(0)  # 猫的标签为0
    cat_img = Image.open(cat_train_data_dir + CatTrainImageList[i]).convert('RGB')  # 打开图片并转换为RGB
    cat_img = transformer(cat_img)  # 进行预处理
    train_data.append(cat_img)  # 添加到训练数据

# 加载测试数据集（与训练集类似）
DogTestImageList = os.listdir(r"./catsdogs/train/Dog")
CatTestImageList = os.listdir(r"./catsdogs/train/Cat")
test_label = []  # 存储测试数据的标签
test_data = []  # 存储测试数据的图像数据
dog_test_data_dir = r"./catsdogs/train/Dog/"  # 狗的测试图片目录路径
cat_test_data_dir = r"./catsdogs/train/Cat/"  # 猫的测试图片目录路径

# 将狗的测试图片加载进测试数据集
for i in range(len(DogTestImageList)):
    test_label.append(1)  # 狗的标签为1
    dog_img = Image.open(dog_test_data_dir + DogTestImageList[i]).convert('RGB')
    dog_img = transformer(dog_img)
    test_data.append(dog_img)

# 将猫的测试图片加载进测试数据集
for i in range(len(CatTestImageList)):
    test_label.append(0)  # 猫的标签为0
    cat_img = Image.open(cat_test_data_dir + CatTestImageList[i]).convert('RGB')
    cat_img = transformer(cat_img)
    test_data.append(cat_img)

# 自定义的数据集类，用于加载图像数据
class DealDataset(Dataset):
    def __init__(self, data, label, transform=None):
        self.data = data  # 图像数据
        self.label = label  # 图像标签
        self.transform = transform  # 图像预处理

    def __getitem__(self, index):
        data, label = self.data[index], int(self.label[index])  # 获取指定索引的数据和标签
        return data, label  # 返回数据和标签

    def __len__(self):
        return len(self.data)  # 返回数据集的大小

# 将训练数据集和测试数据集包装为DealDataset对象
TrainDataSet = DealDataset(train_data, train_label, transform=transformer)
TestDataSet = DealDataset(test_data, test_label, transform=transformer)

# 定义AlexNet模型
class AlexNet(nn.Module):
    def __init__(self):
        super(AlexNet, self).__init__()
        # 定义卷积层部分
        self.conv = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.BatchNorm2d(64),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.BatchNorm2d(192),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.BatchNorm2d(256)
        )
        # 定义全连接层部分
        self.fc = nn.Sequential(
            nn.Linear(256 * 5 * 5, 4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096, 2)  # 输出2个类别：猫或狗
        )

    def forward(self, img):
        feature = self.conv(img)  # 通过卷积层提取特征
        output = self.fc(feature.view(img.shape[0], -1))  # 展开特征并通过全连接层进行分类
        return output

# 使用预训练的VGG16模型，并修改最后的全连接层以适应2个输出类别
class VGG16(nn.Module):
    def __init__(self, num_classes=2):
        super(VGG16, self).__init__()
        self.model = models.vgg16(pretrained=True)  # 加载预训练的VGG16模型
        self.model.classifier[-1] = nn.Linear(self.model.classifier[-1].in_features, num_classes)  # 修改输出层

    def forward(self, x):
        return self.model(x)  # 返回模型的输出

# 使用ResNet18模型，并修改最后的全连接层以适应2个输出类别
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.model = models.resnet18(pretrained=False)  # 加载ResNet18模型
        self.model.fc = nn.Linear(self.model.fc.in_features, 2)  # 修改输出层为2个类别

    def forward(self, x):
        return self.model(x)  # 返回模型的输出

# 绘制混淆矩阵的函数
def plot_combined_confusion_matrix(true_labels_dict, predicted_labels_dict, classes,
                                   save_path='combined_confusion_matrix.png'):
    # 创建一个子图，用来显示多个模型的混淆矩阵
    fig, axes = plt.subplots(1, len(true_labels_dict), figsize=(15, 5))

    # 遍历每个模型并绘制其混淆矩阵
    for i, (model_name, true_labels) in enumerate(true_labels_dict.items()):
        predicted_labels = predicted_labels_dict[model_name]
        cm = confusion_matrix(true_labels, predicted_labels)  # 计算混淆矩阵

        # 使用Seaborn绘制热图
        sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=classes, yticklabels=classes,
                    ax=axes[i], cbar=False, annot_kws={"size": 14})
        axes[i].set_xlabel('Predicted labels', fontsize=12)
        axes[i].set_ylabel('True labels', fontsize=12)
        axes[i].set_title(f'{model_name} Confusion Matrix', fontsize=14)

    # 调整布局并保存图像
    plt.tight_layout()
    plt.savefig(save_path)
    plt.show()

# 计算模型在测试集上的准确率
def evaluate_accuracy(data_iter, net, device=None):
    if device is None and isinstance(net, torch.nn.Module):
        device = list(net.parameters())[0].device  # 获取模型的设备
    acc_sum, n = 0.0, 0
    predicted_labels = []
    true_labels = []
    with torch.no_grad():  # 在测试时不需要计算梯度
        for X, y in tqdm(data_iter, desc="加载中：", leave=True):
            net.eval()  # 将模型设置为评估模式
            outputs = net(X.to(device))  # 获取模型输出
            predicted = outputs.argmax(dim=1)  # 获取预测的标签
            true_labels.extend(y.cpu().numpy())  # 存储真实标签
            predicted_labels.extend(predicted.cpu().numpy())  # 存储预测标签
            acc_sum += (predicted == y.to(device)).float().sum().cpu().item()  # 累加准确的样本数
            n += y.shape[0]  # 累加样本总数

    return acc_sum / n, true_labels, predicted_labels  # 返回准确率，真实标签和预测标签

# 训练和评估模型
def train_and_evaluate_models(models, model_names, train_iter, test_iter, batch_size, optimizer_dict, device,
                              num_epochs, save_model_paths, plot_path):
    train_acc_history = {name: [] for name in model_names}  # 存储训练过程中每个模型的训练准确率
    test_acc_history = {name: [] for name in model_names}  # 存储测试过程中每个模型的测试准确率
    train_loss_history = {name: [] for name in model_names}  # 存储每个模型的训练损失

    # 存储每个模型的混淆矩阵数据
    true_labels_dict = {name: [] for name in model_names}
    predicted_labels_dict = {name: [] for name in model_names}

    # 迭代训练周期
    for epoch in range(num_epochs):
        for model, model_name in zip(models, model_names):  # 遍历每个模型
            model.train()
            optimizer = optimizer_dict[model_name]  # 获取当前模型的优化器
            loss_fn = torch.nn.CrossEntropyLoss()  # 定义损失函数
            scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.7)  # 学习率衰减策略
            train_l_sum, train_acc_sum, n, batch_count, start = 0.0, 0.0, 0, 0, time.time()

            # 训练每个模型
            for X, y in train_iter:
                X, y = X.to(device), y.to(device)
                y_hat = model(X)  # 获取模型预测
                loss = loss_fn(y_hat, y)  # 计算损失

                optimizer.zero_grad()  # 清空梯度
                loss.backward()  # 反向传播
                optimizer.step()  # 更新参数

                train_l_sum += loss.item()  # 累加损失
                train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()  # 累加准确的样本数
                n += y.shape[0]
                batch_count += 1

            scheduler.step()  # 学习率衰减

            # 计算训练集和测试集的准确率
            train_acc = train_acc_sum / n
            test_acc, true_labels, predicted_labels = evaluate_accuracy(test_iter, model, device)

            # 存储每个模型的混淆矩阵数据
            true_labels_dict[model_name].extend(true_labels)
            predicted_labels_dict[model_name].extend(predicted_labels)

            train_acc_history[model_name].append(train_acc)
            test_acc_history[model_name].append(test_acc)
            train_loss_history[model_name].append(train_l_sum / batch_count)

            print(f'{model_name} epoch {epoch + 1}, loss {train_l_sum / batch_count:.4f}, '
                  f'train acc {train_acc:.3f}, test acc {test_acc:.3f}, time {time.time() - start:.1f} sec')

            # 保存模型
            torch.save(model.state_dict(), save_model_paths[model_name])  # 保存模型的权重
            print(f"{model_name} Model saved to {save_model_paths[model_name]} after epoch {epoch + 1}")

    # 在所有训练完成后生成混淆矩阵的综合图
    plot_combined_confusion_matrix(true_labels_dict, predicted_labels_dict, ['Cat', 'Dog'],
                                   save_path=os.path.join(plot_path, 'combined_confusion_matrix.png'))

    return train_acc_history, test_acc_history, train_loss_history

# 可视化训练结果并保存
def plot_and_save_results(train_acc_history, test_acc_history, train_loss_history, num_epochs, save_plots_path):
    plt.figure(figsize=(10, 5))
    # 绘制每个模型的训练与测试准确率曲线
    for model_name in ['AlexNet', 'ResNet18', 'VGG16']:
        if model_name in train_acc_history and model_name in test_acc_history:
            plt.plot(range(num_epochs), train_acc_history[model_name], label=f'{model_name} Train Accuracy')
            plt.plot(range(num_epochs), test_acc_history[model_name], label=f'{model_name} Test Accuracy')

    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.title('AlexNet, ResNet18, VGG16 - Training and Test Accuracy Comparison')
    plt.legend()
    plt.grid(True)
    plt.savefig(os.path.join(save_plots_path, 'accuracy_plot.png'))  # 保存准确率图像
    plt.show()

    plt.figure(figsize=(10, 5))
    # 绘制每个模型的训练损失曲线
    for model_name in ['AlexNet', 'ResNet18', 'VGG16']:
        if model_name in train_loss_history:
            plt.plot(range(num_epochs), train_loss_history[model_name], label=f'{model_name} Train Loss')

    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title('Training Loss Comparison')
    plt.legend()
    plt.grid(True)
    plt.savefig(os.path.join(save_plots_path, 'loss_plot.png'))  # 保存损失图像
    plt.show()

if __name__ == '__main__':
    # 设置训练参数
    num_epochs = 25  # 设置为可配置参数
    batch_size = 16  # 设置为可配置参数
    learning_rate = 0.009  # 设置为可配置参数

    save_model_paths = {
        'AlexNet': 'AlexNet.pth',
        'ResNet18': 'ResNet18.pth',
        'VGG16': 'VGG16.pth'
    }
    save_plots_path = './python'
    os.makedirs(save_plots_path, exist_ok=True)  # 创建保存模型和图像的文件夹

    # 创建模型实例
    alexnet_model = AlexNet().to(device)
    resnet_model = ResNet18().to(device)
    vgg_model = VGG16().to(device)

    # 创建数据加载器
    train_iter = torch.utils.data.DataLoader(TrainDataSet, batch_size=batch_size, shuffle=True, num_workers=2)
    test_iter = torch.utils.data.DataLoader(TestDataSet, batch_size=batch_size, shuffle=False, num_workers=2)

    # 优化器字典
    optimizer_dict = {
        'AlexNet': torch.optim.SGD(alexnet_model.parameters(), lr=learning_rate),
        'ResNet18': torch.optim.SGD(resnet_model.parameters(), lr=learning_rate),
        'VGG16': torch.optim.SGD(vgg_model.parameters(), lr=learning_rate)
    }

    # 训练并评估
    models = [alexnet_model, resnet_model, vgg_model]
    model_names = ['AlexNet', 'ResNet18', 'VGG16']
    train_acc_history, test_acc_history, train_loss_history = train_and_evaluate_models(
        models, model_names, train_iter, test_iter, batch_size, optimizer_dict, device, num_epochs, save_model_paths, save_plots_path)

    # 绘制并保存准确率和损失曲线
    plot_and_save_results(train_acc_history, test_acc_history, train_loss_history, num_epochs, save_plots_path)

2、view.py（可视化界面）

py 复制代码

import sys
from PyQt5.QtCore import Qt
from PyQt5.QtWidgets import QApplication, QWidget, QLabel, QPushButton, QFileDialog, QVBoxLayout, QGridLayout, \
    QTextEdit, QComboBox, QSpacerItem, QSizePolicy
from PyQt5.QtGui import QPixmap, QFont, QTextCursor
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
import torchvision.models as models


class AnimalClassifierApp(QWidget):
    def __init__(self):
        super().__init__()

        self.initUI()

    def initUI(self):
        self.setWindowTitle('猫狗识别系统')
        self.resize(600, 400)  # 更小的窗口尺寸

        # 创建布局
        grid = QGridLayout()
        grid.setContentsMargins(10, 10, 10, 10)  # 设置间距
        grid.setSpacing(5)  # 设置控件间距

        # 显示图像的标签
        self.image_label = QLabel(self)
        self.image_label.setFixedSize(250, 250)  # 调整图像显示尺寸
        self.image_label.setAlignment(Qt.AlignCenter)
        grid.addWidget(self.image_label, 1, 0, 2, 1)

        # 识别结果的标签
        self.result_label = QTextEdit(self)
        self.result_label.setFixedSize(250, 80)
        self.result_label.setReadOnly(True)
        self.result_label.setStyleSheet("color: red; font-size: 14px;")
        self.result_label.setAlignment(Qt.AlignCenter)
        grid.addWidget(self.result_label, 1, 1, 1, 2)

        # 模型选择下拉框
        self.model_selector = QComboBox(self)
        self.model_selector.addItem("AlexNet")
        self.model_selector.addItem("VGG16")
        self.model_selector.addItem("ResNet18")
        grid.addWidget(self.model_selector, 2, 0, 1, 2)

        # 按钮布局
        button_layout = QVBoxLayout()
        button_layout.setSpacing(5)  # 设置按钮间距

        # 上传图像按钮
        upload_btn = QPushButton('上传', self)
        upload_btn.clicked.connect(self.load_image)
        button_layout.addWidget(upload_btn)

        # 识别按钮
        recognize_btn = QPushButton('识别', self)
        recognize_btn.clicked.connect(self.classify_image)
        button_layout.addWidget(recognize_btn)

        # 添加按钮布局
        button_layout.addSpacerItem(QSpacerItem(10, 10, QSizePolicy.Minimum, QSizePolicy.Expanding))
        grid.addLayout(button_layout, 3, 1, 1, 2)

        self.setLayout(grid)

        # 加载模型
        self.device = torch.device('cpu')

        # 定义数据转换
        self.transform = transforms.Compose([
            transforms.Resize((148, 148)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.4, 0.4, 0.4], std=[0.2, 0.2, 0.2])
        ])

        self.image_path = ''
        self.model = None  # 模型初始化为空

    def load_image(self):
        options = QFileDialog.Options()
        options |= QFileDialog.ReadOnly
        file_name, _ = QFileDialog.getOpenFileName(self, "上传图片", "", "图片文件 (*.jpg *.jpeg *.png)",
                                                   options=options)
        if file_name:
            self.image_path = file_name
            pixmap = QPixmap(file_name)
            pixmap = pixmap.scaled(self.image_label.width(), self.image_label.height(), Qt.KeepAspectRatio)
            self.image_label.setPixmap(pixmap)
            self.result_label.setText('识别结果: ')

    def classify_image(self):
        if self.image_path:
            # 根据选择的模型加载相应的模型
            selected_model = self.model_selector.currentText()
            if selected_model == "AlexNet":
                self.model = self.load_alexnet_model()
            elif selected_model == "VGG16":
                self.model = self.load_vgg16_model()
            elif selected_model == "ResNet18":
                self.model = self.load_resnet18_model()

            image = Image.open(self.image_path).convert('RGB')
            image_tensor = self.transform(image).unsqueeze(0).to(self.device)

            with torch.no_grad():
                output = self.model(image_tensor)
                probabilities = torch.nn.functional.softmax(output, dim=1)
                confidence, predicted = torch.max(probabilities, 1)
                label = 'cat' if predicted.item() == 0 else 'dog'
                confidence = confidence.item()

            # 将图像转换为QPixmap
            pixmap = QPixmap(self.image_path)
            pixmap = pixmap.scaled(self.image_label.width(), self.image_label.height(), Qt.KeepAspectRatio)
            self.image_label.setPixmap(pixmap)

            # 设置识别结果字体颜色和对齐方式
            self.result_label.setText(f'识别结果: {label} \n\n置信度: {confidence:.2f}')
            self.result_label.setAlignment(Qt.AlignCenter)
            cursor = self.result_label.textCursor()
            cursor.select(QTextCursor.Document)
            self.result_label.setTextCursor(cursor)

    def load_alexnet_model(self):
        model = models.alexnet(pretrained=True)
        model.classifier[6] = nn.Linear(model.classifier[6].in_features, 2)  # 修改最后一层
        model = model.to(self.device)
        model.eval()
        return model

    def load_vgg16_model(self):
        model = models.vgg16(pretrained=True)
        model.classifier[6] = nn.Linear(model.classifier[6].in_features, 2)  # 修改最后一层
        model = model.to(self.device)
        model.eval()
        return model

    def load_resnet18_model(self):
        model = models.resnet18(pretrained=True)
        model.fc = nn.Linear(model.fc.in_features, 2)  # 修改最后一层
        model = model.to(self.device)
        model.eval()
        return model


if __name__ == '__main__':
    app = QApplication(sys.argv)
    ex = AnimalClassifierApp()
    ex.show()
    sys.exit(app.exec_())