基于 PyTorch 的 VGG16 深度学习人脸识别检测系统的实现+ui界面

VGG16深度学习人脸识别检测系统

文章目录

      • [**1. 数据准备**](#1. 数据准备)
        • [**1.1 数据加载器**](#1.1 数据加载器)
      • [**2. 模型定义**](#2. 模型定义)
      • [**3. 训练与评估**](#3. 训练与评估)
        • [**3.1 训练代码**](#3.1 训练代码)
      • [**4. 可视化 UI 界面**](#4. 可视化 UI 界面)
      • [1. 数据加载 (`data_loader.py`)](#1. 数据加载 (data_loader.py))
      • [2. 模型定义 (`model.py`)](#2. 模型定义 (model.py))
      • [3. 训练代码 (`train.py`)](#3. 训练代码 (train.py))
      • [4. 可视化 UI 界面 (`ui.py`)](#4. 可视化 UI 界面 (ui.py))
      • 目录结构

预达到预期:
基于Pytorch的VGG16学习实现
支持GPU加速训练
可视化UI界面

基于 PyTorch 的 VGG16 深度学习人脸识别检测系统的实现,支持 GPU 加速训练和可视化 UI 界面。我们将分步骤完成数据准备、模型定义、训练与评估以及 UI 界面的构建。


1. 数据准备

假设你有一个包含人脸图像的数据集,目录结构如下:

复制代码
dataset/
├── train/
│   ├── person1/
│   │   ├── img1.jpg
│   │   └── ...
│   ├── person2/
│   │   ├── img1.jpg
│   │   └── ...
│   └── ...
└── val/
    ├── person1/
    │   ├── img1.jpg
    │   └── ...
    ├── person2/
    │   ├── img1.jpg
    │   └── ...
    └── ...
1.1 数据加载器

使用 torchvision.datasets.ImageFoldertorch.utils.data.DataLoader 加载数据。

python 复制代码
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# 数据预处理
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # VGG16 输入大小为 224x224
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # ImageNet 标准化
])

# 加载数据集
train_dataset = datasets.ImageFolder(root="dataset/train", transform=transform)
val_dataset = datasets.ImageFolder(root="dataset/val", transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

2. 模型定义

加载预训练的 VGG16 模型,并修改最后的全连接层以适应人脸分类任务。

python 复制代码
import torch.nn as nn
from torchvision.models import vgg16

class FaceRecognitionModel(nn.Module):
    def __init__(self, num_classes):
        super(FaceRecognitionModel, self).__init__()
        self.vgg16 = vgg16(pretrained=True)  # 加载预训练的 VGG16
        self.vgg16.classifier[6] = nn.Linear(4096, num_classes)  # 修改最后一层

    def forward(self, x):
        return self.vgg16(x)

3. 训练与评估

3.1 训练代码
python 复制代码
import torch.optim as optim
from tqdm import tqdm

def train_model(model, train_loader, val_loader, num_epochs=20, lr=0.001, device='cuda'):
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    model.to(device)
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        correct = 0
        total = 0
        for images, labels in tqdm(train_loader):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {train_loss/len(train_loader):.4f}, Accuracy: {correct/total:.4f}")

        # 验证模型
        evaluate_model(model, val_loader, device)

def evaluate_model(model, val_loader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f"Validation Accuracy: {correct/total:.4f}")

4. 可视化 UI 界面

使用 PyQt5 构建一个简单的 GUI 应用程序,用于加载图片并进行人脸识别。

python 复制代码
from PyQt5.QtWidgets import QApplication, QMainWindow, QPushButton, QLabel, QVBoxLayout, QWidget, QFileDialog
from PyQt5.QtGui import QImage, QPixmap
import cv2
import torch

class App(QMainWindow):
    def __init__(self, model, class_names):
        super().__init__()
        self.setWindowTitle("VGG16 人脸识别系统")
        self.setGeometry(100, 100, 800, 600)

        self.model = model
        self.class_names = class_names

        # UI 元素
        self.label_image = QLabel(self)
        self.label_image.setGeometry(50, 50, 700, 400)

        self.btn_load = QPushButton("加载图片", self)
        self.btn_load.setGeometry(50, 500, 150, 40)
        self.btn_load.clicked.connect(self.load_image)

        self.label_result = QLabel("预测结果:", self)
        self.label_result.setGeometry(250, 500, 500, 40)

    def load_image(self):
        file_path, _ = QFileDialog.getOpenFileName(self, "选择图片", "", "Images (*.jpg *.png)")
        if file_path:
            image = cv2.imread(file_path)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

            # 显示原始图像
            height, width, channel = image.shape
            bytes_per_line = 3 * width
            q_img = QImage(image.data, width, height, bytes_per_line, QImage.Format_RGB888)
            self.label_image.setPixmap(QPixmap.fromImage(q_img))

            # 预测
            result = self.predict_image(file_path)
            self.label_result.setText(f"预测结果:{result}")

    def predict_image(self, image_path):
        from PIL import Image
        from torchvision import transforms

        # 图像预处理
        transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
        image = Image.open(image_path).convert("RGB")
        image = transform(image).unsqueeze(0).to('cuda')

        # 推理
        self.model.eval()
        with torch.no_grad():
            output = self.model(image)
            _, predicted = torch.max(output, 1)
            class_idx = predicted.item()

        return self.class_names[class_idx]

if __name__ == "__main__":
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    # 加载模型
    model = FaceRecognitionModel(num_classes=len(train_dataset.classes))
    model.load_state_dict(torch.load("face_recognition_model.pth"))
    model.to(device)
    model.eval()

    app = QApplication([])
    window = App(model, train_dataset.classes)
    window.show()
    app.exec_()

为了实现一个基于 VGG16 的人脸识别系统,并且支持 GPU 加速训练和可视化 UI 界面,我们可以分步骤进行。以下是详细的代码实现:

1. 数据加载 (data_loader.py)

python 复制代码
import torch
from torchvision import datasets, transforms

# 数据预处理
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # VGG16 输入大小为 224x224
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # ImageNet 标准化
])

def get_data_loaders(data_dir, batch_size=32):
    train_dataset = datasets.ImageFolder(root=f"{data_dir}/train", transform=transform)
    val_dataset = datasets.ImageFolder(root=f"{data_dir}/val", transform=transform)

    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, val_loader

2. 模型定义 (model.py)

python 复制代码
import torch.nn as nn
from torchvision.models import vgg16

class FaceRecognitionModel(nn.Module):
    def __init__(self, num_classes):
        super(FaceRecognitionModel, self).__init__()
        self.vgg16 = vgg16(pretrained=True)  # 加载预训练的 VGG16
        self.vgg16.classifier[6] = nn.Linear(4096, num_classes)  # 修改最后一层

    def forward(self, x):
        return self.vgg16(x)

3. 训练代码 (train.py)

python 复制代码
import torch
import torch.optim as optim
from model import FaceRecognitionModel
from data_loader import get_data_loaders

def train_model(model, train_loader, val_loader, num_epochs=20, lr=0.001, device='cuda'):
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    model.to(device)
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        correct = 0
        total = 0
        for images, labels in tqdm(train_loader):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {train_loss/len(train_loader):.4f}, Accuracy: {correct/total:.4f}")

        # 验证模型
        evaluate_model(model, val_loader, device)

def evaluate_model(model, val_loader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f"Validation Accuracy: {correct/total:.4f}")

if __name__ == "__main__":
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    num_classes = len(train_loader.dataset.classes)
    model = FaceRecognitionModel(num_classes)
    train_loader, val_loader = get_data_loaders("path_to_your_data")
    
    train_model(model, train_loader, val_loader, num_epochs=20, lr=0.001, device=device)
    torch.save(model.state_dict(), "best_model.pth")

4. 可视化 UI 界面 (ui.py)

python 复制代码
from PyQt5.QtWidgets import QApplication, QMainWindow, QPushButton, QLabel, QVBoxLayout, QWidget, QFileDialog
from PyQt5.QtGui import QImage, QPixmap
import cv2
import torch
from model import FaceRecognitionModel

class App(QMainWindow):
    def __init__(self, model, class_names):
        super().__init__()
        self.setWindowTitle("VGG16 人脸识别系统")
        self.setGeometry(100, 100, 800, 600)

        self.model = model
        self.class_names = class_names

        # UI 元素
        self.label_image = QLabel(self)
        self.label_image.setGeometry(50, 50, 700, 400)

        self.btn_load = QPushButton("加载图片", self)
        self.btn_load.setGeometry(50, 500, 150, 40)
        self.btn_load.clicked.connect(self.load_image)

        self.label_result = QLabel("预测结果:", self)
        self.label_result.setGeometry(250, 500, 500, 40)

    def load_image(self):
        file_path, _ = QFileDialog.getOpenFileName(self, "选择图片", "", "Images (*.jpg *.png)")
        if file_path:
            image = cv2.imread(file_path)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

            # 显示原始图像
            height, width, channel = image.shape
            bytes_per_line = 3 * width
            q_img = QImage(image.data, width, height, bytes_per_line, QImage.Format_RGB888)
            self.label_image.setPixmap(QPixmap.fromImage(q_img))

            # 预测
            result = self.predict_image(file_path)
            self.label_result.setText(f"预测结果:{result}")

    def predict_image(self, image_path):
        from PIL import Image
        from torchvision import transforms

        # 图像预处理
        transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
        image = Image.open(image_path).convert("RGB")
        image = transform(image).unsqueeze(0).to('cuda')

        # 推理
        self.model.eval()
        with torch.no_grad():
            output = self.model(image)
            _, predicted = torch.max(output, 1)
            class_idx = predicted.item()

        return self.class_names[class_idx]

if __name__ == "__main__":
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    # 加载模型
    model = FaceRecognitionModel(num_classes=len(train_loader.dataset.classes))
    model.load_state_dict(torch.load("best_model.pth"))
    model.to(device)
    model.eval()

    app = QApplication([])
    window = App(model, train_loader.dataset.classes)
    window.show()
    app.exec_()

目录结构

复制代码
基于VGG16的人脸识别/
├── data/
│   ├── train/
│   └── val/
├── hub/
│   ├── best_model.pth
│   ├── c罗.jpg
│   ├── c罗_1.jpg
│   ├── data_loader.py
│   ├── model.py
│   ├── train.py
│   ├── ui.py
│   ├── 小罗伯特唐尼.jpg
│   ├── 梅西.jpg
│   └── 梅西_1.jpg
└── 从部署/
相关推荐
CoovallyAIHub7 分钟前
SBP-YOLO:面向嵌入式悬架的轻量实时模型,实现减速带与坑洼高精度检测
深度学习·算法·计算机视觉
算法打盹中11 分钟前
基于树莓派与Jetson Nano集群的实验边缘设备上视觉语言模型(VLMs)的性能评估与实践探索
人工智能·计算机视觉·语言模型·自然语言处理·树莓派·多模态·jetson nano
卿·静17 分钟前
Node.js对接即梦AI实现“千军万马”视频
前端·javascript·人工智能·后端·node.js
YangYang9YangYan18 分钟前
2025年金融专业人士职业认证发展路径分析
大数据·人工智能·金融
AIbase202419 分钟前
GEO优化服务:技术演进如何重塑搜索优化行业新范式
大数据·人工智能
HuggingFace24 分钟前
ZeroGPU Spaces 加速实践:PyTorch 提前编译全解析
pytorch·zerogpu
摆烂z28 分钟前
ollama笔记
人工智能
连合机器人30 分钟前
城市脉搏中的“绿色卫士”:当智能科技邂逅城市清洁
人工智能·ai·设备租赁·连合直租·智能清洁专家·有鹿巡扫机器人
贾全33 分钟前
准备篇:搭建你的AI“炼丹炉“
人工智能·ai·vlm·多模态ai·vlm环境配置