基于深度学习的猫狗识别系统【深度学习课设】

🏆 作者简介:席万里

⚡ 个人网站:https://dahua.bloggo.chat/

✍️ 一名后端开发小趴菜,同时略懂Vue与React前端技术,也了解一点微信小程序开发。

🍻 对计算机充满兴趣,愿意并且希望学习更多的技术,接触更多的大神,提高自己的编程思维和解决问题的能力。

文章目录

作品演示




代码

采用模型VGG16、ALEXNet、Resnet18,训练测试。python版本3.10.11 。

数据集:和鲸社区猫狗图像数据集。https://www.heywhale.com/mw/project/631aedb893f47b16cb062b2a

1.train_and_test.py

py 复制代码
# 导入 PyTorch 库和相关模块
import torch                                 # PyTorch 的核心库,提供张量计算和自动求导功能
import torchvision.transforms as transforms  # 提供图像数据增强和预处理的功能
from torch.utils.data import Dataset         # 用于自定义数据集
from torch import nn, optim                  # nn 用于构建神经网络,optim 用于优化算法
from PIL import Image                        # 用于加载和处理图像文件
import time                                  # 用于记录训练时长和其他时间相关操作
import torchvision.models as models          # 包含一些预训练模型,如 AlexNet、ResNet 等
import os                                    # 用于与操作系统交互,如文件路径处理、创建目录等
import matplotlib.pyplot as plt              # 用于绘制图表,如准确率曲线、损失曲线等
from tqdm import tqdm                        # 用于显示训练过程中的进度条
from sklearn.metrics import confusion_matrix # 用于计算混淆矩阵,评估分类性能
import seaborn as sns                        # 用于绘制混淆矩阵的热图,提供美观的图表风格


device = torch.device('cpu')

# 数据预处理:缩放到224x224大小,并转换为Tensor
transformer = transforms.Compose([transforms.Resize((224, 224)), transforms.ToTensor()])

# 加载训练数据集
DogTrainImageList = os.listdir(r"./catsdogs/train/Dog")  # 加载训练集中的狗图片列表
CatTrainImageList = os.listdir(r"./catsdogs/train/Cat")  # 加载训练集中的猫图片列表
train_label = []  # 存储训练数据的标签
train_data = []  # 存储训练数据的图像数据
dog_train_data_dir = r"./catsdogs/train/Dog/"  # 狗的图片目录路径
cat_train_data_dir = r"./catsdogs/train/Cat/"  # 猫的图片目录路径

# 将狗的图片加载进训练数据集
for i in range(len(DogTrainImageList)):
    train_label.append(1)  # 狗的标签为1
    dog_img = Image.open(dog_train_data_dir + DogTrainImageList[i]).convert('RGB')  # 打开图片并转换为RGB
    dog_img = transformer(dog_img)  # 进行预处理
    train_data.append(dog_img)  # 添加到训练数据

# 将猫的图片加载进训练数据集
for i in range(len(CatTrainImageList)):
    train_label.append(0)  # 猫的标签为0
    cat_img = Image.open(cat_train_data_dir + CatTrainImageList[i]).convert('RGB')  # 打开图片并转换为RGB
    cat_img = transformer(cat_img)  # 进行预处理
    train_data.append(cat_img)  # 添加到训练数据

# 加载测试数据集(与训练集类似)
DogTestImageList = os.listdir(r"./catsdogs/train/Dog")
CatTestImageList = os.listdir(r"./catsdogs/train/Cat")
test_label = []  # 存储测试数据的标签
test_data = []  # 存储测试数据的图像数据
dog_test_data_dir = r"./catsdogs/train/Dog/"  # 狗的测试图片目录路径
cat_test_data_dir = r"./catsdogs/train/Cat/"  # 猫的测试图片目录路径

# 将狗的测试图片加载进测试数据集
for i in range(len(DogTestImageList)):
    test_label.append(1)  # 狗的标签为1
    dog_img = Image.open(dog_test_data_dir + DogTestImageList[i]).convert('RGB')
    dog_img = transformer(dog_img)
    test_data.append(dog_img)

# 将猫的测试图片加载进测试数据集
for i in range(len(CatTestImageList)):
    test_label.append(0)  # 猫的标签为0
    cat_img = Image.open(cat_test_data_dir + CatTestImageList[i]).convert('RGB')
    cat_img = transformer(cat_img)
    test_data.append(cat_img)

# 自定义的数据集类,用于加载图像数据
class DealDataset(Dataset):
    def __init__(self, data, label, transform=None):
        self.data = data  # 图像数据
        self.label = label  # 图像标签
        self.transform = transform  # 图像预处理

    def __getitem__(self, index):
        data, label = self.data[index], int(self.label[index])  # 获取指定索引的数据和标签
        return data, label  # 返回数据和标签

    def __len__(self):
        return len(self.data)  # 返回数据集的大小

# 将训练数据集和测试数据集包装为DealDataset对象
TrainDataSet = DealDataset(train_data, train_label, transform=transformer)
TestDataSet = DealDataset(test_data, test_label, transform=transformer)

# 定义AlexNet模型
class AlexNet(nn.Module):
    def __init__(self):
        super(AlexNet, self).__init__()
        # 定义卷积层部分
        self.conv = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.BatchNorm2d(64),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.BatchNorm2d(192),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.BatchNorm2d(256)
        )
        # 定义全连接层部分
        self.fc = nn.Sequential(
            nn.Linear(256 * 5 * 5, 4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096, 2)  # 输出2个类别:猫或狗
        )

    def forward(self, img):
        feature = self.conv(img)  # 通过卷积层提取特征
        output = self.fc(feature.view(img.shape[0], -1))  # 展开特征并通过全连接层进行分类
        return output

# 使用预训练的VGG16模型,并修改最后的全连接层以适应2个输出类别
class VGG16(nn.Module):
    def __init__(self, num_classes=2):
        super(VGG16, self).__init__()
        self.model = models.vgg16(pretrained=True)  # 加载预训练的VGG16模型
        self.model.classifier[-1] = nn.Linear(self.model.classifier[-1].in_features, num_classes)  # 修改输出层

    def forward(self, x):
        return self.model(x)  # 返回模型的输出

# 使用ResNet18模型,并修改最后的全连接层以适应2个输出类别
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.model = models.resnet18(pretrained=False)  # 加载ResNet18模型
        self.model.fc = nn.Linear(self.model.fc.in_features, 2)  # 修改输出层为2个类别

    def forward(self, x):
        return self.model(x)  # 返回模型的输出

# 绘制混淆矩阵的函数
def plot_combined_confusion_matrix(true_labels_dict, predicted_labels_dict, classes,
                                   save_path='combined_confusion_matrix.png'):
    # 创建一个子图,用来显示多个模型的混淆矩阵
    fig, axes = plt.subplots(1, len(true_labels_dict), figsize=(15, 5))

    # 遍历每个模型并绘制其混淆矩阵
    for i, (model_name, true_labels) in enumerate(true_labels_dict.items()):
        predicted_labels = predicted_labels_dict[model_name]
        cm = confusion_matrix(true_labels, predicted_labels)  # 计算混淆矩阵

        # 使用Seaborn绘制热图
        sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=classes, yticklabels=classes,
                    ax=axes[i], cbar=False, annot_kws={"size": 14})
        axes[i].set_xlabel('Predicted labels', fontsize=12)
        axes[i].set_ylabel('True labels', fontsize=12)
        axes[i].set_title(f'{model_name} Confusion Matrix', fontsize=14)

    # 调整布局并保存图像
    plt.tight_layout()
    plt.savefig(save_path)
    plt.show()

# 计算模型在测试集上的准确率
def evaluate_accuracy(data_iter, net, device=None):
    if device is None and isinstance(net, torch.nn.Module):
        device = list(net.parameters())[0].device  # 获取模型的设备
    acc_sum, n = 0.0, 0
    predicted_labels = []
    true_labels = []
    with torch.no_grad():  # 在测试时不需要计算梯度
        for X, y in tqdm(data_iter, desc="加载中:", leave=True):
            net.eval()  # 将模型设置为评估模式
            outputs = net(X.to(device))  # 获取模型输出
            predicted = outputs.argmax(dim=1)  # 获取预测的标签
            true_labels.extend(y.cpu().numpy())  # 存储真实标签
            predicted_labels.extend(predicted.cpu().numpy())  # 存储预测标签
            acc_sum += (predicted == y.to(device)).float().sum().cpu().item()  # 累加准确的样本数
            n += y.shape[0]  # 累加样本总数

    return acc_sum / n, true_labels, predicted_labels  # 返回准确率,真实标签和预测标签

# 训练和评估模型
def train_and_evaluate_models(models, model_names, train_iter, test_iter, batch_size, optimizer_dict, device,
                              num_epochs, save_model_paths, plot_path):
    train_acc_history = {name: [] for name in model_names}  # 存储训练过程中每个模型的训练准确率
    test_acc_history = {name: [] for name in model_names}  # 存储测试过程中每个模型的测试准确率
    train_loss_history = {name: [] for name in model_names}  # 存储每个模型的训练损失

    # 存储每个模型的混淆矩阵数据
    true_labels_dict = {name: [] for name in model_names}
    predicted_labels_dict = {name: [] for name in model_names}

    # 迭代训练周期
    for epoch in range(num_epochs):
        for model, model_name in zip(models, model_names):  # 遍历每个模型
            model.train()
            optimizer = optimizer_dict[model_name]  # 获取当前模型的优化器
            loss_fn = torch.nn.CrossEntropyLoss()  # 定义损失函数
            scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.7)  # 学习率衰减策略
            train_l_sum, train_acc_sum, n, batch_count, start = 0.0, 0.0, 0, 0, time.time()

            # 训练每个模型
            for X, y in train_iter:
                X, y = X.to(device), y.to(device)
                y_hat = model(X)  # 获取模型预测
                loss = loss_fn(y_hat, y)  # 计算损失

                optimizer.zero_grad()  # 清空梯度
                loss.backward()  # 反向传播
                optimizer.step()  # 更新参数

                train_l_sum += loss.item()  # 累加损失
                train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()  # 累加准确的样本数
                n += y.shape[0]
                batch_count += 1

            scheduler.step()  # 学习率衰减

            # 计算训练集和测试集的准确率
            train_acc = train_acc_sum / n
            test_acc, true_labels, predicted_labels = evaluate_accuracy(test_iter, model, device)

            # 存储每个模型的混淆矩阵数据
            true_labels_dict[model_name].extend(true_labels)
            predicted_labels_dict[model_name].extend(predicted_labels)

            train_acc_history[model_name].append(train_acc)
            test_acc_history[model_name].append(test_acc)
            train_loss_history[model_name].append(train_l_sum / batch_count)

            print(f'{model_name} epoch {epoch + 1}, loss {train_l_sum / batch_count:.4f}, '
                  f'train acc {train_acc:.3f}, test acc {test_acc:.3f}, time {time.time() - start:.1f} sec')

            # 保存模型
            torch.save(model.state_dict(), save_model_paths[model_name])  # 保存模型的权重
            print(f"{model_name} Model saved to {save_model_paths[model_name]} after epoch {epoch + 1}")

    # 在所有训练完成后生成混淆矩阵的综合图
    plot_combined_confusion_matrix(true_labels_dict, predicted_labels_dict, ['Cat', 'Dog'],
                                   save_path=os.path.join(plot_path, 'combined_confusion_matrix.png'))

    return train_acc_history, test_acc_history, train_loss_history

# 可视化训练结果并保存
def plot_and_save_results(train_acc_history, test_acc_history, train_loss_history, num_epochs, save_plots_path):
    plt.figure(figsize=(10, 5))
    # 绘制每个模型的训练与测试准确率曲线
    for model_name in ['AlexNet', 'ResNet18', 'VGG16']:
        if model_name in train_acc_history and model_name in test_acc_history:
            plt.plot(range(num_epochs), train_acc_history[model_name], label=f'{model_name} Train Accuracy')
            plt.plot(range(num_epochs), test_acc_history[model_name], label=f'{model_name} Test Accuracy')

    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.title('AlexNet, ResNet18, VGG16 - Training and Test Accuracy Comparison')
    plt.legend()
    plt.grid(True)
    plt.savefig(os.path.join(save_plots_path, 'accuracy_plot.png'))  # 保存准确率图像
    plt.show()

    plt.figure(figsize=(10, 5))
    # 绘制每个模型的训练损失曲线
    for model_name in ['AlexNet', 'ResNet18', 'VGG16']:
        if model_name in train_loss_history:
            plt.plot(range(num_epochs), train_loss_history[model_name], label=f'{model_name} Train Loss')

    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title('Training Loss Comparison')
    plt.legend()
    plt.grid(True)
    plt.savefig(os.path.join(save_plots_path, 'loss_plot.png'))  # 保存损失图像
    plt.show()

if __name__ == '__main__':
    # 设置训练参数
    num_epochs = 25  # 设置为可配置参数
    batch_size = 16  # 设置为可配置参数
    learning_rate = 0.009  # 设置为可配置参数

    save_model_paths = {
        'AlexNet': 'AlexNet.pth',
        'ResNet18': 'ResNet18.pth',
        'VGG16': 'VGG16.pth'
    }
    save_plots_path = './python'
    os.makedirs(save_plots_path, exist_ok=True)  # 创建保存模型和图像的文件夹

    # 创建模型实例
    alexnet_model = AlexNet().to(device)
    resnet_model = ResNet18().to(device)
    vgg_model = VGG16().to(device)

    # 创建数据加载器
    train_iter = torch.utils.data.DataLoader(TrainDataSet, batch_size=batch_size, shuffle=True, num_workers=2)
    test_iter = torch.utils.data.DataLoader(TestDataSet, batch_size=batch_size, shuffle=False, num_workers=2)

    # 优化器字典
    optimizer_dict = {
        'AlexNet': torch.optim.SGD(alexnet_model.parameters(), lr=learning_rate),
        'ResNet18': torch.optim.SGD(resnet_model.parameters(), lr=learning_rate),
        'VGG16': torch.optim.SGD(vgg_model.parameters(), lr=learning_rate)
    }

    # 训练并评估
    models = [alexnet_model, resnet_model, vgg_model]
    model_names = ['AlexNet', 'ResNet18', 'VGG16']
    train_acc_history, test_acc_history, train_loss_history = train_and_evaluate_models(
        models, model_names, train_iter, test_iter, batch_size, optimizer_dict, device, num_epochs, save_model_paths, save_plots_path)

    # 绘制并保存准确率和损失曲线
    plot_and_save_results(train_acc_history, test_acc_history, train_loss_history, num_epochs, save_plots_path)

2、view.py(可视化界面)

py 复制代码
import sys
from PyQt5.QtCore import Qt
from PyQt5.QtWidgets import QApplication, QWidget, QLabel, QPushButton, QFileDialog, QVBoxLayout, QGridLayout, \
    QTextEdit, QComboBox, QSpacerItem, QSizePolicy
from PyQt5.QtGui import QPixmap, QFont, QTextCursor
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
import torchvision.models as models


class AnimalClassifierApp(QWidget):
    def __init__(self):
        super().__init__()

        self.initUI()

    def initUI(self):
        self.setWindowTitle('猫狗识别系统')
        self.resize(600, 400)  # 更小的窗口尺寸

        # 创建布局
        grid = QGridLayout()
        grid.setContentsMargins(10, 10, 10, 10)  # 设置间距
        grid.setSpacing(5)  # 设置控件间距

        # 显示图像的标签
        self.image_label = QLabel(self)
        self.image_label.setFixedSize(250, 250)  # 调整图像显示尺寸
        self.image_label.setAlignment(Qt.AlignCenter)
        grid.addWidget(self.image_label, 1, 0, 2, 1)

        # 识别结果的标签
        self.result_label = QTextEdit(self)
        self.result_label.setFixedSize(250, 80)
        self.result_label.setReadOnly(True)
        self.result_label.setStyleSheet("color: red; font-size: 14px;")
        self.result_label.setAlignment(Qt.AlignCenter)
        grid.addWidget(self.result_label, 1, 1, 1, 2)

        # 模型选择下拉框
        self.model_selector = QComboBox(self)
        self.model_selector.addItem("AlexNet")
        self.model_selector.addItem("VGG16")
        self.model_selector.addItem("ResNet18")
        grid.addWidget(self.model_selector, 2, 0, 1, 2)

        # 按钮布局
        button_layout = QVBoxLayout()
        button_layout.setSpacing(5)  # 设置按钮间距

        # 上传图像按钮
        upload_btn = QPushButton('上传', self)
        upload_btn.clicked.connect(self.load_image)
        button_layout.addWidget(upload_btn)

        # 识别按钮
        recognize_btn = QPushButton('识别', self)
        recognize_btn.clicked.connect(self.classify_image)
        button_layout.addWidget(recognize_btn)

        # 添加按钮布局
        button_layout.addSpacerItem(QSpacerItem(10, 10, QSizePolicy.Minimum, QSizePolicy.Expanding))
        grid.addLayout(button_layout, 3, 1, 1, 2)

        self.setLayout(grid)

        # 加载模型
        self.device = torch.device('cpu')

        # 定义数据转换
        self.transform = transforms.Compose([
            transforms.Resize((148, 148)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.4, 0.4, 0.4], std=[0.2, 0.2, 0.2])
        ])

        self.image_path = ''
        self.model = None  # 模型初始化为空

    def load_image(self):
        options = QFileDialog.Options()
        options |= QFileDialog.ReadOnly
        file_name, _ = QFileDialog.getOpenFileName(self, "上传图片", "", "图片文件 (*.jpg *.jpeg *.png)",
                                                   options=options)
        if file_name:
            self.image_path = file_name
            pixmap = QPixmap(file_name)
            pixmap = pixmap.scaled(self.image_label.width(), self.image_label.height(), Qt.KeepAspectRatio)
            self.image_label.setPixmap(pixmap)
            self.result_label.setText('识别结果: ')

    def classify_image(self):
        if self.image_path:
            # 根据选择的模型加载相应的模型
            selected_model = self.model_selector.currentText()
            if selected_model == "AlexNet":
                self.model = self.load_alexnet_model()
            elif selected_model == "VGG16":
                self.model = self.load_vgg16_model()
            elif selected_model == "ResNet18":
                self.model = self.load_resnet18_model()

            image = Image.open(self.image_path).convert('RGB')
            image_tensor = self.transform(image).unsqueeze(0).to(self.device)

            with torch.no_grad():
                output = self.model(image_tensor)
                probabilities = torch.nn.functional.softmax(output, dim=1)
                confidence, predicted = torch.max(probabilities, 1)
                label = 'cat' if predicted.item() == 0 else 'dog'
                confidence = confidence.item()

            # 将图像转换为QPixmap
            pixmap = QPixmap(self.image_path)
            pixmap = pixmap.scaled(self.image_label.width(), self.image_label.height(), Qt.KeepAspectRatio)
            self.image_label.setPixmap(pixmap)

            # 设置识别结果字体颜色和对齐方式
            self.result_label.setText(f'识别结果: {label} \n\n置信度: {confidence:.2f}')
            self.result_label.setAlignment(Qt.AlignCenter)
            cursor = self.result_label.textCursor()
            cursor.select(QTextCursor.Document)
            self.result_label.setTextCursor(cursor)

    def load_alexnet_model(self):
        model = models.alexnet(pretrained=True)
        model.classifier[6] = nn.Linear(model.classifier[6].in_features, 2)  # 修改最后一层
        model = model.to(self.device)
        model.eval()
        return model

    def load_vgg16_model(self):
        model = models.vgg16(pretrained=True)
        model.classifier[6] = nn.Linear(model.classifier[6].in_features, 2)  # 修改最后一层
        model = model.to(self.device)
        model.eval()
        return model

    def load_resnet18_model(self):
        model = models.resnet18(pretrained=True)
        model.fc = nn.Linear(model.fc.in_features, 2)  # 修改最后一层
        model = model.to(self.device)
        model.eval()
        return model


if __name__ == '__main__':
    app = QApplication(sys.argv)
    ex = AnimalClassifierApp()
    ex.show()
    sys.exit(app.exec_())
相关推荐
NAGNIP7 小时前
一文搞懂深度学习中的通用逼近定理!
人工智能·算法·面试
冬奇Lab8 小时前
一天一个开源项目(第36篇):EverMemOS - 跨 LLM 与平台的长时记忆 OS,让 Agent 会记忆更会推理
人工智能·开源·资讯
冬奇Lab8 小时前
OpenClaw 源码深度解析(一):Gateway——为什么需要一个"中枢"
人工智能·开源·源码阅读
AngelPP11 小时前
OpenClaw 架构深度解析:如何把 AI 助手搬到你的个人设备上
人工智能
宅小年12 小时前
Claude Code 换成了Kimi K2.5后,我再也回不去了
人工智能·ai编程·claude
九狼12 小时前
Flutter URL Scheme 跨平台跳转
人工智能·flutter·github
ZFSS12 小时前
Kimi Chat Completion API 申请及使用
前端·人工智能
天翼云开发者社区13 小时前
春节复工福利就位!天翼云息壤2500万Tokens免费送,全品类大模型一键畅玩!
人工智能·算力服务·息壤
知识浅谈13 小时前
教你如何用 Gemini 将课本图片一键转为精美 PPT
人工智能
Ray Liang14 小时前
被低估的量化版模型,小身材也能干大事
人工智能·ai·ai助手·mindx