基于 PyTorch 的 VGG16 深度学习人脸识别检测系统的实现+ui界面

VGG16深度学习人脸识别检测系统

文章目录

      • [**1. 数据准备**](#1. 数据准备)
        • [**1.1 数据加载器**](#1.1 数据加载器)
      • [**2. 模型定义**](#2. 模型定义)
      • [**3. 训练与评估**](#3. 训练与评估)
        • [**3.1 训练代码**](#3.1 训练代码)
      • [**4. 可视化 UI 界面**](#4. 可视化 UI 界面)
      • [1. 数据加载 (`data_loader.py`)](#1. 数据加载 (data_loader.py))
      • [2. 模型定义 (`model.py`)](#2. 模型定义 (model.py))
      • [3. 训练代码 (`train.py`)](#3. 训练代码 (train.py))
      • [4. 可视化 UI 界面 (`ui.py`)](#4. 可视化 UI 界面 (ui.py))
      • 目录结构

预达到预期:
基于Pytorch的VGG16学习实现
支持GPU加速训练
可视化UI界面

基于 PyTorch 的 VGG16 深度学习人脸识别检测系统的实现,支持 GPU 加速训练和可视化 UI 界面。我们将分步骤完成数据准备、模型定义、训练与评估以及 UI 界面的构建。


1. 数据准备

假设你有一个包含人脸图像的数据集,目录结构如下:

复制代码
dataset/
├── train/
│   ├── person1/
│   │   ├── img1.jpg
│   │   └── ...
│   ├── person2/
│   │   ├── img1.jpg
│   │   └── ...
│   └── ...
└── val/
    ├── person1/
    │   ├── img1.jpg
    │   └── ...
    ├── person2/
    │   ├── img1.jpg
    │   └── ...
    └── ...
1.1 数据加载器

使用 torchvision.datasets.ImageFoldertorch.utils.data.DataLoader 加载数据。

python 复制代码
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# 数据预处理
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # VGG16 输入大小为 224x224
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # ImageNet 标准化
])

# 加载数据集
train_dataset = datasets.ImageFolder(root="dataset/train", transform=transform)
val_dataset = datasets.ImageFolder(root="dataset/val", transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

2. 模型定义

加载预训练的 VGG16 模型,并修改最后的全连接层以适应人脸分类任务。

python 复制代码
import torch.nn as nn
from torchvision.models import vgg16

class FaceRecognitionModel(nn.Module):
    def __init__(self, num_classes):
        super(FaceRecognitionModel, self).__init__()
        self.vgg16 = vgg16(pretrained=True)  # 加载预训练的 VGG16
        self.vgg16.classifier[6] = nn.Linear(4096, num_classes)  # 修改最后一层

    def forward(self, x):
        return self.vgg16(x)

3. 训练与评估

3.1 训练代码
python 复制代码
import torch.optim as optim
from tqdm import tqdm

def train_model(model, train_loader, val_loader, num_epochs=20, lr=0.001, device='cuda'):
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    model.to(device)
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        correct = 0
        total = 0
        for images, labels in tqdm(train_loader):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {train_loss/len(train_loader):.4f}, Accuracy: {correct/total:.4f}")

        # 验证模型
        evaluate_model(model, val_loader, device)

def evaluate_model(model, val_loader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f"Validation Accuracy: {correct/total:.4f}")

4. 可视化 UI 界面

使用 PyQt5 构建一个简单的 GUI 应用程序,用于加载图片并进行人脸识别。

python 复制代码
from PyQt5.QtWidgets import QApplication, QMainWindow, QPushButton, QLabel, QVBoxLayout, QWidget, QFileDialog
from PyQt5.QtGui import QImage, QPixmap
import cv2
import torch

class App(QMainWindow):
    def __init__(self, model, class_names):
        super().__init__()
        self.setWindowTitle("VGG16 人脸识别系统")
        self.setGeometry(100, 100, 800, 600)

        self.model = model
        self.class_names = class_names

        # UI 元素
        self.label_image = QLabel(self)
        self.label_image.setGeometry(50, 50, 700, 400)

        self.btn_load = QPushButton("加载图片", self)
        self.btn_load.setGeometry(50, 500, 150, 40)
        self.btn_load.clicked.connect(self.load_image)

        self.label_result = QLabel("预测结果:", self)
        self.label_result.setGeometry(250, 500, 500, 40)

    def load_image(self):
        file_path, _ = QFileDialog.getOpenFileName(self, "选择图片", "", "Images (*.jpg *.png)")
        if file_path:
            image = cv2.imread(file_path)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

            # 显示原始图像
            height, width, channel = image.shape
            bytes_per_line = 3 * width
            q_img = QImage(image.data, width, height, bytes_per_line, QImage.Format_RGB888)
            self.label_image.setPixmap(QPixmap.fromImage(q_img))

            # 预测
            result = self.predict_image(file_path)
            self.label_result.setText(f"预测结果:{result}")

    def predict_image(self, image_path):
        from PIL import Image
        from torchvision import transforms

        # 图像预处理
        transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
        image = Image.open(image_path).convert("RGB")
        image = transform(image).unsqueeze(0).to('cuda')

        # 推理
        self.model.eval()
        with torch.no_grad():
            output = self.model(image)
            _, predicted = torch.max(output, 1)
            class_idx = predicted.item()

        return self.class_names[class_idx]

if __name__ == "__main__":
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    # 加载模型
    model = FaceRecognitionModel(num_classes=len(train_dataset.classes))
    model.load_state_dict(torch.load("face_recognition_model.pth"))
    model.to(device)
    model.eval()

    app = QApplication([])
    window = App(model, train_dataset.classes)
    window.show()
    app.exec_()

为了实现一个基于 VGG16 的人脸识别系统,并且支持 GPU 加速训练和可视化 UI 界面,我们可以分步骤进行。以下是详细的代码实现:

1. 数据加载 (data_loader.py)

python 复制代码
import torch
from torchvision import datasets, transforms

# 数据预处理
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # VGG16 输入大小为 224x224
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # ImageNet 标准化
])

def get_data_loaders(data_dir, batch_size=32):
    train_dataset = datasets.ImageFolder(root=f"{data_dir}/train", transform=transform)
    val_dataset = datasets.ImageFolder(root=f"{data_dir}/val", transform=transform)

    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, val_loader

2. 模型定义 (model.py)

python 复制代码
import torch.nn as nn
from torchvision.models import vgg16

class FaceRecognitionModel(nn.Module):
    def __init__(self, num_classes):
        super(FaceRecognitionModel, self).__init__()
        self.vgg16 = vgg16(pretrained=True)  # 加载预训练的 VGG16
        self.vgg16.classifier[6] = nn.Linear(4096, num_classes)  # 修改最后一层

    def forward(self, x):
        return self.vgg16(x)

3. 训练代码 (train.py)

python 复制代码
import torch
import torch.optim as optim
from model import FaceRecognitionModel
from data_loader import get_data_loaders

def train_model(model, train_loader, val_loader, num_epochs=20, lr=0.001, device='cuda'):
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    model.to(device)
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        correct = 0
        total = 0
        for images, labels in tqdm(train_loader):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {train_loss/len(train_loader):.4f}, Accuracy: {correct/total:.4f}")

        # 验证模型
        evaluate_model(model, val_loader, device)

def evaluate_model(model, val_loader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f"Validation Accuracy: {correct/total:.4f}")

if __name__ == "__main__":
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    num_classes = len(train_loader.dataset.classes)
    model = FaceRecognitionModel(num_classes)
    train_loader, val_loader = get_data_loaders("path_to_your_data")
    
    train_model(model, train_loader, val_loader, num_epochs=20, lr=0.001, device=device)
    torch.save(model.state_dict(), "best_model.pth")

4. 可视化 UI 界面 (ui.py)

python 复制代码
from PyQt5.QtWidgets import QApplication, QMainWindow, QPushButton, QLabel, QVBoxLayout, QWidget, QFileDialog
from PyQt5.QtGui import QImage, QPixmap
import cv2
import torch
from model import FaceRecognitionModel

class App(QMainWindow):
    def __init__(self, model, class_names):
        super().__init__()
        self.setWindowTitle("VGG16 人脸识别系统")
        self.setGeometry(100, 100, 800, 600)

        self.model = model
        self.class_names = class_names

        # UI 元素
        self.label_image = QLabel(self)
        self.label_image.setGeometry(50, 50, 700, 400)

        self.btn_load = QPushButton("加载图片", self)
        self.btn_load.setGeometry(50, 500, 150, 40)
        self.btn_load.clicked.connect(self.load_image)

        self.label_result = QLabel("预测结果:", self)
        self.label_result.setGeometry(250, 500, 500, 40)

    def load_image(self):
        file_path, _ = QFileDialog.getOpenFileName(self, "选择图片", "", "Images (*.jpg *.png)")
        if file_path:
            image = cv2.imread(file_path)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

            # 显示原始图像
            height, width, channel = image.shape
            bytes_per_line = 3 * width
            q_img = QImage(image.data, width, height, bytes_per_line, QImage.Format_RGB888)
            self.label_image.setPixmap(QPixmap.fromImage(q_img))

            # 预测
            result = self.predict_image(file_path)
            self.label_result.setText(f"预测结果:{result}")

    def predict_image(self, image_path):
        from PIL import Image
        from torchvision import transforms

        # 图像预处理
        transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
        image = Image.open(image_path).convert("RGB")
        image = transform(image).unsqueeze(0).to('cuda')

        # 推理
        self.model.eval()
        with torch.no_grad():
            output = self.model(image)
            _, predicted = torch.max(output, 1)
            class_idx = predicted.item()

        return self.class_names[class_idx]

if __name__ == "__main__":
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    # 加载模型
    model = FaceRecognitionModel(num_classes=len(train_loader.dataset.classes))
    model.load_state_dict(torch.load("best_model.pth"))
    model.to(device)
    model.eval()

    app = QApplication([])
    window = App(model, train_loader.dataset.classes)
    window.show()
    app.exec_()

目录结构

复制代码
基于VGG16的人脸识别/
├── data/
│   ├── train/
│   └── val/
├── hub/
│   ├── best_model.pth
│   ├── c罗.jpg
│   ├── c罗_1.jpg
│   ├── data_loader.py
│   ├── model.py
│   ├── train.py
│   ├── ui.py
│   ├── 小罗伯特唐尼.jpg
│   ├── 梅西.jpg
│   └── 梅西_1.jpg
└── 从部署/
相关推荐
老马聊技术7 小时前
AI对话功能之SpringBoot整合Vue3
vue.js·人工智能·spring boot·后端
阿寻寻7 小时前
【人工智能学习260612-软件测试篇】小工具实现 [特殊字符] Prompt工程 + RAG思路 + API调用 + 自动化测试
人工智能·功能测试·学习·prompt
甲维斯7 小时前
测一波Kimi K2.7,消耗一周配额!
前端·人工智能·游戏开发
石山代码7 小时前
给照片装上 AI 引擎:ACDSee 2025 安装详细步骤
人工智能
chase_my_dream7 小时前
A-LOAM中scanRegistration.cpp详细讲解
c++·人工智能·自动驾驶
ai_xiaogui7 小时前
AI Starter全面开源在即!PanelAI测试版即将上线,客户端+后端全开源,本地AI一键部署神器
人工智能·panelai测试版上线·本地ai一键部署系统·客户端后端开源·ai starter全面开源·跨平台ai模型管理工具·ai starter开源
邵宇然7 小时前
Pin、Unpin 与 Tokio 异步运行时:自引用结构在异步环境中的内存安全保证
人工智能
高洁017 小时前
医疗行业的数字孪生革命
python·机器学习·数据挖掘·transformer·知识图谱
MATLAB代码顾问7 小时前
Python Pandas数据分析入门指南
python·数据分析·pandas
逐米时代7 小时前
制造型企业AI智能体实施步骤详解:提升协同效率的实战指南
大数据·人工智能