基于 PyTorch 的 VGG16 深度学习人脸识别检测系统的实现+ui界面

VGG16深度学习人脸识别检测系统

文章目录

      • [**1. 数据准备**](#1. 数据准备)
        • [**1.1 数据加载器**](#1.1 数据加载器)
      • [**2. 模型定义**](#2. 模型定义)
      • [**3. 训练与评估**](#3. 训练与评估)
        • [**3.1 训练代码**](#3.1 训练代码)
      • [**4. 可视化 UI 界面**](#4. 可视化 UI 界面)
      • [1. 数据加载 (`data_loader.py`)](#1. 数据加载 (data_loader.py))
      • [2. 模型定义 (`model.py`)](#2. 模型定义 (model.py))
      • [3. 训练代码 (`train.py`)](#3. 训练代码 (train.py))
      • [4. 可视化 UI 界面 (`ui.py`)](#4. 可视化 UI 界面 (ui.py))
      • 目录结构

预达到预期:
基于Pytorch的VGG16学习实现
支持GPU加速训练
可视化UI界面

基于 PyTorch 的 VGG16 深度学习人脸识别检测系统的实现,支持 GPU 加速训练和可视化 UI 界面。我们将分步骤完成数据准备、模型定义、训练与评估以及 UI 界面的构建。


1. 数据准备

假设你有一个包含人脸图像的数据集,目录结构如下:

复制代码
dataset/
├── train/
│   ├── person1/
│   │   ├── img1.jpg
│   │   └── ...
│   ├── person2/
│   │   ├── img1.jpg
│   │   └── ...
│   └── ...
└── val/
    ├── person1/
    │   ├── img1.jpg
    │   └── ...
    ├── person2/
    │   ├── img1.jpg
    │   └── ...
    └── ...
1.1 数据加载器

使用 torchvision.datasets.ImageFoldertorch.utils.data.DataLoader 加载数据。

python 复制代码
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# 数据预处理
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # VGG16 输入大小为 224x224
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # ImageNet 标准化
])

# 加载数据集
train_dataset = datasets.ImageFolder(root="dataset/train", transform=transform)
val_dataset = datasets.ImageFolder(root="dataset/val", transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

2. 模型定义

加载预训练的 VGG16 模型,并修改最后的全连接层以适应人脸分类任务。

python 复制代码
import torch.nn as nn
from torchvision.models import vgg16

class FaceRecognitionModel(nn.Module):
    def __init__(self, num_classes):
        super(FaceRecognitionModel, self).__init__()
        self.vgg16 = vgg16(pretrained=True)  # 加载预训练的 VGG16
        self.vgg16.classifier[6] = nn.Linear(4096, num_classes)  # 修改最后一层

    def forward(self, x):
        return self.vgg16(x)

3. 训练与评估

3.1 训练代码
python 复制代码
import torch.optim as optim
from tqdm import tqdm

def train_model(model, train_loader, val_loader, num_epochs=20, lr=0.001, device='cuda'):
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    model.to(device)
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        correct = 0
        total = 0
        for images, labels in tqdm(train_loader):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {train_loss/len(train_loader):.4f}, Accuracy: {correct/total:.4f}")

        # 验证模型
        evaluate_model(model, val_loader, device)

def evaluate_model(model, val_loader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f"Validation Accuracy: {correct/total:.4f}")

4. 可视化 UI 界面

使用 PyQt5 构建一个简单的 GUI 应用程序,用于加载图片并进行人脸识别。

python 复制代码
from PyQt5.QtWidgets import QApplication, QMainWindow, QPushButton, QLabel, QVBoxLayout, QWidget, QFileDialog
from PyQt5.QtGui import QImage, QPixmap
import cv2
import torch

class App(QMainWindow):
    def __init__(self, model, class_names):
        super().__init__()
        self.setWindowTitle("VGG16 人脸识别系统")
        self.setGeometry(100, 100, 800, 600)

        self.model = model
        self.class_names = class_names

        # UI 元素
        self.label_image = QLabel(self)
        self.label_image.setGeometry(50, 50, 700, 400)

        self.btn_load = QPushButton("加载图片", self)
        self.btn_load.setGeometry(50, 500, 150, 40)
        self.btn_load.clicked.connect(self.load_image)

        self.label_result = QLabel("预测结果:", self)
        self.label_result.setGeometry(250, 500, 500, 40)

    def load_image(self):
        file_path, _ = QFileDialog.getOpenFileName(self, "选择图片", "", "Images (*.jpg *.png)")
        if file_path:
            image = cv2.imread(file_path)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

            # 显示原始图像
            height, width, channel = image.shape
            bytes_per_line = 3 * width
            q_img = QImage(image.data, width, height, bytes_per_line, QImage.Format_RGB888)
            self.label_image.setPixmap(QPixmap.fromImage(q_img))

            # 预测
            result = self.predict_image(file_path)
            self.label_result.setText(f"预测结果:{result}")

    def predict_image(self, image_path):
        from PIL import Image
        from torchvision import transforms

        # 图像预处理
        transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
        image = Image.open(image_path).convert("RGB")
        image = transform(image).unsqueeze(0).to('cuda')

        # 推理
        self.model.eval()
        with torch.no_grad():
            output = self.model(image)
            _, predicted = torch.max(output, 1)
            class_idx = predicted.item()

        return self.class_names[class_idx]

if __name__ == "__main__":
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    # 加载模型
    model = FaceRecognitionModel(num_classes=len(train_dataset.classes))
    model.load_state_dict(torch.load("face_recognition_model.pth"))
    model.to(device)
    model.eval()

    app = QApplication([])
    window = App(model, train_dataset.classes)
    window.show()
    app.exec_()

为了实现一个基于 VGG16 的人脸识别系统,并且支持 GPU 加速训练和可视化 UI 界面,我们可以分步骤进行。以下是详细的代码实现:

1. 数据加载 (data_loader.py)

python 复制代码
import torch
from torchvision import datasets, transforms

# 数据预处理
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # VGG16 输入大小为 224x224
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # ImageNet 标准化
])

def get_data_loaders(data_dir, batch_size=32):
    train_dataset = datasets.ImageFolder(root=f"{data_dir}/train", transform=transform)
    val_dataset = datasets.ImageFolder(root=f"{data_dir}/val", transform=transform)

    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, val_loader

2. 模型定义 (model.py)

python 复制代码
import torch.nn as nn
from torchvision.models import vgg16

class FaceRecognitionModel(nn.Module):
    def __init__(self, num_classes):
        super(FaceRecognitionModel, self).__init__()
        self.vgg16 = vgg16(pretrained=True)  # 加载预训练的 VGG16
        self.vgg16.classifier[6] = nn.Linear(4096, num_classes)  # 修改最后一层

    def forward(self, x):
        return self.vgg16(x)

3. 训练代码 (train.py)

python 复制代码
import torch
import torch.optim as optim
from model import FaceRecognitionModel
from data_loader import get_data_loaders

def train_model(model, train_loader, val_loader, num_epochs=20, lr=0.001, device='cuda'):
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    model.to(device)
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        correct = 0
        total = 0
        for images, labels in tqdm(train_loader):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {train_loss/len(train_loader):.4f}, Accuracy: {correct/total:.4f}")

        # 验证模型
        evaluate_model(model, val_loader, device)

def evaluate_model(model, val_loader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f"Validation Accuracy: {correct/total:.4f}")

if __name__ == "__main__":
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    num_classes = len(train_loader.dataset.classes)
    model = FaceRecognitionModel(num_classes)
    train_loader, val_loader = get_data_loaders("path_to_your_data")
    
    train_model(model, train_loader, val_loader, num_epochs=20, lr=0.001, device=device)
    torch.save(model.state_dict(), "best_model.pth")

4. 可视化 UI 界面 (ui.py)

python 复制代码
from PyQt5.QtWidgets import QApplication, QMainWindow, QPushButton, QLabel, QVBoxLayout, QWidget, QFileDialog
from PyQt5.QtGui import QImage, QPixmap
import cv2
import torch
from model import FaceRecognitionModel

class App(QMainWindow):
    def __init__(self, model, class_names):
        super().__init__()
        self.setWindowTitle("VGG16 人脸识别系统")
        self.setGeometry(100, 100, 800, 600)

        self.model = model
        self.class_names = class_names

        # UI 元素
        self.label_image = QLabel(self)
        self.label_image.setGeometry(50, 50, 700, 400)

        self.btn_load = QPushButton("加载图片", self)
        self.btn_load.setGeometry(50, 500, 150, 40)
        self.btn_load.clicked.connect(self.load_image)

        self.label_result = QLabel("预测结果:", self)
        self.label_result.setGeometry(250, 500, 500, 40)

    def load_image(self):
        file_path, _ = QFileDialog.getOpenFileName(self, "选择图片", "", "Images (*.jpg *.png)")
        if file_path:
            image = cv2.imread(file_path)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

            # 显示原始图像
            height, width, channel = image.shape
            bytes_per_line = 3 * width
            q_img = QImage(image.data, width, height, bytes_per_line, QImage.Format_RGB888)
            self.label_image.setPixmap(QPixmap.fromImage(q_img))

            # 预测
            result = self.predict_image(file_path)
            self.label_result.setText(f"预测结果:{result}")

    def predict_image(self, image_path):
        from PIL import Image
        from torchvision import transforms

        # 图像预处理
        transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
        image = Image.open(image_path).convert("RGB")
        image = transform(image).unsqueeze(0).to('cuda')

        # 推理
        self.model.eval()
        with torch.no_grad():
            output = self.model(image)
            _, predicted = torch.max(output, 1)
            class_idx = predicted.item()

        return self.class_names[class_idx]

if __name__ == "__main__":
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    # 加载模型
    model = FaceRecognitionModel(num_classes=len(train_loader.dataset.classes))
    model.load_state_dict(torch.load("best_model.pth"))
    model.to(device)
    model.eval()

    app = QApplication([])
    window = App(model, train_loader.dataset.classes)
    window.show()
    app.exec_()

目录结构

复制代码
基于VGG16的人脸识别/
├── data/
│   ├── train/
│   └── val/
├── hub/
│   ├── best_model.pth
│   ├── c罗.jpg
│   ├── c罗_1.jpg
│   ├── data_loader.py
│   ├── model.py
│   ├── train.py
│   ├── ui.py
│   ├── 小罗伯特唐尼.jpg
│   ├── 梅西.jpg
│   └── 梅西_1.jpg
└── 从部署/
相关推荐
dagouaofei5 小时前
PPT AI生成实测报告:哪些工具值得长期使用?
人工智能·python·powerpoint
蓝桉~MLGT5 小时前
Ai-Agent学习历程—— Agent认知框架
人工智能·学习
视觉&物联智能5 小时前
【杂谈】-边缘计算竞赛:人工智能硬件缘何超越云端
人工智能·ai·chatgpt·aigc·边缘计算·agi·deepseek
Java中文社群5 小时前
变天了!字节vs阿里大模型正面硬刚,双人视频一键生成?实测结果太意外...
人工智能
冒泡的肥皂5 小时前
AI小应用分享
人工智能·后端
BoBoZz196 小时前
ExtractPolyLinesFromPolyData切割一个三维模型(球体),并可视化切割后产生的多条等高线
python·vtk·图形渲染·图形处理
لا معنى له6 小时前
学习笔记:卷积神经网络(CNN)
人工智能·笔记·深度学习·神经网络·学习·cnn
资源补给站6 小时前
论文13 | Nature: 数据驱动的地球系统科学的深度学习和过程理解
人工智能·深度学习
金融小师妹6 小时前
非农数据LSTM时序建模强化未来降息预期,GVX-GARCH驱动金价4300点位多空博弈
大数据·人工智能·深度学习
quikai19816 小时前
python练习第六组
java·前端·python