基于 PyTorch 的 VGG16 深度学习人脸识别检测系统的实现+ui界面

VGG16深度学习人脸识别检测系统

文章目录

      • [**1. 数据准备**](#1. 数据准备)
        • [**1.1 数据加载器**](#1.1 数据加载器)
      • [**2. 模型定义**](#2. 模型定义)
      • [**3. 训练与评估**](#3. 训练与评估)
        • [**3.1 训练代码**](#3.1 训练代码)
      • [**4. 可视化 UI 界面**](#4. 可视化 UI 界面)
      • [1. 数据加载 (`data_loader.py`)](#1. 数据加载 (data_loader.py))
      • [2. 模型定义 (`model.py`)](#2. 模型定义 (model.py))
      • [3. 训练代码 (`train.py`)](#3. 训练代码 (train.py))
      • [4. 可视化 UI 界面 (`ui.py`)](#4. 可视化 UI 界面 (ui.py))
      • 目录结构

预达到预期:
基于Pytorch的VGG16学习实现
支持GPU加速训练
可视化UI界面

基于 PyTorch 的 VGG16 深度学习人脸识别检测系统的实现,支持 GPU 加速训练和可视化 UI 界面。我们将分步骤完成数据准备、模型定义、训练与评估以及 UI 界面的构建。


1. 数据准备

假设你有一个包含人脸图像的数据集,目录结构如下:

复制代码
dataset/
├── train/
│   ├── person1/
│   │   ├── img1.jpg
│   │   └── ...
│   ├── person2/
│   │   ├── img1.jpg
│   │   └── ...
│   └── ...
└── val/
    ├── person1/
    │   ├── img1.jpg
    │   └── ...
    ├── person2/
    │   ├── img1.jpg
    │   └── ...
    └── ...
1.1 数据加载器

使用 torchvision.datasets.ImageFoldertorch.utils.data.DataLoader 加载数据。

python 复制代码
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# 数据预处理
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # VGG16 输入大小为 224x224
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # ImageNet 标准化
])

# 加载数据集
train_dataset = datasets.ImageFolder(root="dataset/train", transform=transform)
val_dataset = datasets.ImageFolder(root="dataset/val", transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

2. 模型定义

加载预训练的 VGG16 模型,并修改最后的全连接层以适应人脸分类任务。

python 复制代码
import torch.nn as nn
from torchvision.models import vgg16

class FaceRecognitionModel(nn.Module):
    def __init__(self, num_classes):
        super(FaceRecognitionModel, self).__init__()
        self.vgg16 = vgg16(pretrained=True)  # 加载预训练的 VGG16
        self.vgg16.classifier[6] = nn.Linear(4096, num_classes)  # 修改最后一层

    def forward(self, x):
        return self.vgg16(x)

3. 训练与评估

3.1 训练代码
python 复制代码
import torch.optim as optim
from tqdm import tqdm

def train_model(model, train_loader, val_loader, num_epochs=20, lr=0.001, device='cuda'):
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    model.to(device)
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        correct = 0
        total = 0
        for images, labels in tqdm(train_loader):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {train_loss/len(train_loader):.4f}, Accuracy: {correct/total:.4f}")

        # 验证模型
        evaluate_model(model, val_loader, device)

def evaluate_model(model, val_loader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f"Validation Accuracy: {correct/total:.4f}")

4. 可视化 UI 界面

使用 PyQt5 构建一个简单的 GUI 应用程序,用于加载图片并进行人脸识别。

python 复制代码
from PyQt5.QtWidgets import QApplication, QMainWindow, QPushButton, QLabel, QVBoxLayout, QWidget, QFileDialog
from PyQt5.QtGui import QImage, QPixmap
import cv2
import torch

class App(QMainWindow):
    def __init__(self, model, class_names):
        super().__init__()
        self.setWindowTitle("VGG16 人脸识别系统")
        self.setGeometry(100, 100, 800, 600)

        self.model = model
        self.class_names = class_names

        # UI 元素
        self.label_image = QLabel(self)
        self.label_image.setGeometry(50, 50, 700, 400)

        self.btn_load = QPushButton("加载图片", self)
        self.btn_load.setGeometry(50, 500, 150, 40)
        self.btn_load.clicked.connect(self.load_image)

        self.label_result = QLabel("预测结果:", self)
        self.label_result.setGeometry(250, 500, 500, 40)

    def load_image(self):
        file_path, _ = QFileDialog.getOpenFileName(self, "选择图片", "", "Images (*.jpg *.png)")
        if file_path:
            image = cv2.imread(file_path)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

            # 显示原始图像
            height, width, channel = image.shape
            bytes_per_line = 3 * width
            q_img = QImage(image.data, width, height, bytes_per_line, QImage.Format_RGB888)
            self.label_image.setPixmap(QPixmap.fromImage(q_img))

            # 预测
            result = self.predict_image(file_path)
            self.label_result.setText(f"预测结果:{result}")

    def predict_image(self, image_path):
        from PIL import Image
        from torchvision import transforms

        # 图像预处理
        transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
        image = Image.open(image_path).convert("RGB")
        image = transform(image).unsqueeze(0).to('cuda')

        # 推理
        self.model.eval()
        with torch.no_grad():
            output = self.model(image)
            _, predicted = torch.max(output, 1)
            class_idx = predicted.item()

        return self.class_names[class_idx]

if __name__ == "__main__":
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    # 加载模型
    model = FaceRecognitionModel(num_classes=len(train_dataset.classes))
    model.load_state_dict(torch.load("face_recognition_model.pth"))
    model.to(device)
    model.eval()

    app = QApplication([])
    window = App(model, train_dataset.classes)
    window.show()
    app.exec_()

为了实现一个基于 VGG16 的人脸识别系统,并且支持 GPU 加速训练和可视化 UI 界面,我们可以分步骤进行。以下是详细的代码实现:

1. 数据加载 (data_loader.py)

python 复制代码
import torch
from torchvision import datasets, transforms

# 数据预处理
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # VGG16 输入大小为 224x224
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # ImageNet 标准化
])

def get_data_loaders(data_dir, batch_size=32):
    train_dataset = datasets.ImageFolder(root=f"{data_dir}/train", transform=transform)
    val_dataset = datasets.ImageFolder(root=f"{data_dir}/val", transform=transform)

    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, val_loader

2. 模型定义 (model.py)

python 复制代码
import torch.nn as nn
from torchvision.models import vgg16

class FaceRecognitionModel(nn.Module):
    def __init__(self, num_classes):
        super(FaceRecognitionModel, self).__init__()
        self.vgg16 = vgg16(pretrained=True)  # 加载预训练的 VGG16
        self.vgg16.classifier[6] = nn.Linear(4096, num_classes)  # 修改最后一层

    def forward(self, x):
        return self.vgg16(x)

3. 训练代码 (train.py)

python 复制代码
import torch
import torch.optim as optim
from model import FaceRecognitionModel
from data_loader import get_data_loaders

def train_model(model, train_loader, val_loader, num_epochs=20, lr=0.001, device='cuda'):
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    model.to(device)
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        correct = 0
        total = 0
        for images, labels in tqdm(train_loader):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {train_loss/len(train_loader):.4f}, Accuracy: {correct/total:.4f}")

        # 验证模型
        evaluate_model(model, val_loader, device)

def evaluate_model(model, val_loader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f"Validation Accuracy: {correct/total:.4f}")

if __name__ == "__main__":
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    num_classes = len(train_loader.dataset.classes)
    model = FaceRecognitionModel(num_classes)
    train_loader, val_loader = get_data_loaders("path_to_your_data")
    
    train_model(model, train_loader, val_loader, num_epochs=20, lr=0.001, device=device)
    torch.save(model.state_dict(), "best_model.pth")

4. 可视化 UI 界面 (ui.py)

python 复制代码
from PyQt5.QtWidgets import QApplication, QMainWindow, QPushButton, QLabel, QVBoxLayout, QWidget, QFileDialog
from PyQt5.QtGui import QImage, QPixmap
import cv2
import torch
from model import FaceRecognitionModel

class App(QMainWindow):
    def __init__(self, model, class_names):
        super().__init__()
        self.setWindowTitle("VGG16 人脸识别系统")
        self.setGeometry(100, 100, 800, 600)

        self.model = model
        self.class_names = class_names

        # UI 元素
        self.label_image = QLabel(self)
        self.label_image.setGeometry(50, 50, 700, 400)

        self.btn_load = QPushButton("加载图片", self)
        self.btn_load.setGeometry(50, 500, 150, 40)
        self.btn_load.clicked.connect(self.load_image)

        self.label_result = QLabel("预测结果:", self)
        self.label_result.setGeometry(250, 500, 500, 40)

    def load_image(self):
        file_path, _ = QFileDialog.getOpenFileName(self, "选择图片", "", "Images (*.jpg *.png)")
        if file_path:
            image = cv2.imread(file_path)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

            # 显示原始图像
            height, width, channel = image.shape
            bytes_per_line = 3 * width
            q_img = QImage(image.data, width, height, bytes_per_line, QImage.Format_RGB888)
            self.label_image.setPixmap(QPixmap.fromImage(q_img))

            # 预测
            result = self.predict_image(file_path)
            self.label_result.setText(f"预测结果:{result}")

    def predict_image(self, image_path):
        from PIL import Image
        from torchvision import transforms

        # 图像预处理
        transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
        image = Image.open(image_path).convert("RGB")
        image = transform(image).unsqueeze(0).to('cuda')

        # 推理
        self.model.eval()
        with torch.no_grad():
            output = self.model(image)
            _, predicted = torch.max(output, 1)
            class_idx = predicted.item()

        return self.class_names[class_idx]

if __name__ == "__main__":
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    # 加载模型
    model = FaceRecognitionModel(num_classes=len(train_loader.dataset.classes))
    model.load_state_dict(torch.load("best_model.pth"))
    model.to(device)
    model.eval()

    app = QApplication([])
    window = App(model, train_loader.dataset.classes)
    window.show()
    app.exec_()

目录结构

复制代码
基于VGG16的人脸识别/
├── data/
│   ├── train/
│   └── val/
├── hub/
│   ├── best_model.pth
│   ├── c罗.jpg
│   ├── c罗_1.jpg
│   ├── data_loader.py
│   ├── model.py
│   ├── train.py
│   ├── ui.py
│   ├── 小罗伯特唐尼.jpg
│   ├── 梅西.jpg
│   └── 梅西_1.jpg
└── 从部署/
相关推荐
GISer_Jing1 分钟前
前端组件库——shadcn/ui:轻量、自由、可拥有,解锁前端组件库的AI时代未来
前端·人工智能·ui
码农三叔3 分钟前
破解自动驾驶感知决策难题:智能导航技术与实战指南
人工智能·机器学习·自动驾驶
故城、3 分钟前
Python进阶
开发语言·python
Awu12274 分钟前
每天一个 Agent Skills:Context7 — 让 AI 永远写出最新的代码
人工智能·aigc·ai编程
机器白学10 分钟前
OpenClaw使用前置准备:Ollama+OpenwebUI本地部署/API调用大模型
人工智能·大语言模型
Blurpath住宅代理11 分钟前
AI代理配置实战指南:构建高可用、低风险的网络出口层
人工智能·ai·自动化·静态ip·动态代理·住宅ip·住宅代理
Mr数据杨15 分钟前
【Dv3Admin】Django一键配置权限规则
python·django·sqlite
心无旁骛~16 分钟前
【Claude Code开发】AI记账助手(miaozhang)微信小程序开发与部署完整指南
人工智能·微信小程序·notepad++
-杨豫17 分钟前
ZLibrary反爬破解实战:从验证码到加密全突破
python