【PYTHON-YOLOV8N】yoloface+pytorch+cnn进行面部表情识别

- - [第一步：先整理 FER2013 数据集结构](#第一步：先整理 FER2013 数据集结构)
  - 第二步：完整训练+验证代码
  - [第三步：FER2013 CSV 转文件夹结构（简易脚本）](#第三步：FER2013 CSV 转文件夹结构（简易脚本）)
  - 关键说明与优化点
  - 运行步骤
  - 常见问题解决

这里使用的数据集是 FER2013

第一步：先整理 FER2013 数据集结构

FER2013 原始数据集是 CSV 格式，需先转换为「按类别分文件夹」的结构（方便 PyTorch 加载），结构如下：

复制代码

FER2013/
├── train/                # 训练集（约28709张）
│   ├── angry/            # 愤怒（类别0）
│   ├── disgust/          # 厌恶（类别1）
│   ├── fear/             # 恐惧（类别2）
│   ├── happy/            # 开心（类别3）
│   ├── sad/              # 悲伤（类别4）
│   ├── surprise/         # 惊讶（类别5）
│   └── neutral/          # 中性（类别6）
└── val/                  # 验证集（约3589张）
    ├── angry/
    ├── disgust/
    ├── fear/
    ├── happy/
    ├── sad/
    ├── surprise/
    └── neutral/

✅ 若你只有原始 CSV 文件，可先运行「数据集转换脚本」（文末附简易转换代码）。

第二步：完整训练+验证代码

python 复制代码

import os
import cv2
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from torchvision.datasets import ImageFolder
from ultralytics import YOLO
import numpy as np
from tqdm import tqdm  # 进度条，需安装：pip install tqdm
from sklearn.metrics import accuracy_score, classification_report

# ===================== 1. 全局配置 =====================
# 数据集路径（替换为你的 FER2013 路径）
DATA_ROOT = "D:\\yolo\\biaoqing\\FER2013"
# 训练参数
BATCH_SIZE = 64
EPOCHS = 50
LEARNING_RATE = 0.001
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # 优先用GPU
NUM_CLASSES = 7  # FER2013 7类表情
# 权重保存路径
SAVE_PATH = "emotion_model.pth"
# 表情类别映射（与FER2013一致）
emotion_names = {0: '厌恶', 1: '惊讶', 2: '恐惧', 3: '悲伤', 4: '愤怒', 5: '中性', 6: '高兴'}

# ===================== 2. 定义表情分类模型（复用你的结构） =====================
class EmotionClassifier(nn.Module):
    def __init__(self, num_classes=7):
        super().__init__()
        # 预处理变换（训练时加数据增强，验证时仅基础变换）
        self.train_transform = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((48, 48)),
            transforms.Grayscale(),
            transforms.RandomHorizontalFlip(),  # 随机翻转（数据增强）
            transforms.RandomRotation(10),      # 随机旋转（数据增强）
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.5], std=[0.5])
        ])
        self.val_transform = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((48, 48)),
            transforms.Grayscale(),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.5], std=[0.5])
        ])
        # 简易CNN分类器
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Flatten(),
            nn.Linear(64*12*12, 256),
            nn.ReLU(),
            nn.Dropout(0.5),  # 加Dropout防止过拟合
            nn.Linear(256, num_classes)
        )
    
    def forward(self, x):
        return self.cnn(x)

# ===================== 3. 加载数据集（结合YOLO人脸检测） =====================
class FER2013Dataset(Dataset):
    def __init__(self, root, phase="train"):
        super().__init__()
        self.phase = phase
        self.emotion_model = EmotionClassifier()  # 仅用其变换函数
        self.face_detector = YOLO('./face_yolov8n.pt',task='detect').to(DEVICE)  # YOLO人脸检测模型
        self.face_detector.eval()  # 检测阶段固定模型
        
        # 加载原始图片路径和标签
        self.data = []
        self.label_map = {'disgust':0, 'surprise':1, 'fear':2, 'sad':3, 'angry':4, 'neutral':5, 'happy':6}
        for emotion in os.listdir(os.path.join(root, phase)):
            emotion_path = os.path.join(root, phase, emotion)
            if not os.path.isdir(emotion_path):
                continue
            label = self.label_map[emotion]
            for img_name in os.listdir(emotion_path):
                img_path = os.path.join(emotion_path, img_name)
                self.data.append((img_path, label))
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        img_path, label = self.data[idx]
        # 读取图片
        img = cv2.imread(img_path)
        if img is None:
            # 跳过损坏图片
            return self.__getitem__((idx + 1) % len(self))
        
        # 第一步：YOLO检测人脸（仅保留最大的人脸框）
        with torch.no_grad():
            results = self.face_detector(img, conf=0.001, iou=0.5)
        if len(results[0].boxes) == 0:
            # 未检测到人脸，用原图中心区域裁剪
            h, w = img.shape[:2]
            x1, y1 = w//4, h//4
            x2, y2 = 3*w//4, 3*h//4
        else:
            # 取置信度最高的人脸框
            box = results[0].boxes[0]
            x1, y1, x2, y2 = map(int, box.xyxy.cpu().numpy()[0])
            # 防止框越界
            x1 = max(0, x1)
            y1 = max(0, y1)
            x2 = min(img.shape[1], x2)
            y2 = min(img.shape[0], y2)
        
        # 裁剪人脸区域
        face_roi = img[y1:y2, x1:x2]
        # 第二步：预处理（训练/验证不同变换）
        if self.phase == "train":
            face_tensor = self.emotion_model.train_transform(face_roi)
        else:
            face_tensor = self.emotion_model.val_transform(face_roi)
        
        return face_tensor, torch.tensor(label, dtype=torch.long)

# 构建DataLoader
def build_dataloader(root, phase="train", batch_size=32):
    dataset = FER2013Dataset(root, phase)
    dataloader = DataLoader(
        dataset,
        batch_size=batch_size,
        shuffle=(phase=="train"),  # 训练集打乱，验证集不打乱
        num_workers=0,  # Windows下设0避免多进程报错
        drop_last=True
    )
    return dataloader

# ===================== 4. 训练函数 =====================
def train_model():
    # 初始化模型、损失函数、优化器
    model = EmotionClassifier(num_classes=NUM_CLASSES).to(DEVICE)
    criterion = nn.CrossEntropyLoss()  # 分类任务用交叉熵损失
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.8)  # 学习率衰减
    
    # 加载数据
    train_loader = build_dataloader(DATA_ROOT, phase="train", batch_size=BATCH_SIZE)
    val_loader = build_dataloader(DATA_ROOT, phase="val", batch_size=BATCH_SIZE)
    
    best_acc = 0.0  # 保存最优精度的模型
    
    # 开始训练
    for epoch in range(EPOCHS):
        # ---------- 训练阶段 ----------
        model.train()
        train_loss = 0.0
        train_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS} [Train]")
        for data in train_bar:
            inputs, labels = data
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
            
            # 前向传播
            optimizer.zero_grad()  # 清空梯度
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            # 反向传播+优化
            loss.backward()
            optimizer.step()
            
            # 统计损失
            train_loss += loss.item() * inputs.size(0)
            train_bar.set_postfix(loss=loss.item())
        
        # 计算训练集平均损失
        train_loss /= len(train_loader.dataset)
        
        # ---------- 验证阶段 ----------
        model.eval()
        val_loss = 0.0
        val_preds = []
        val_labels = []
        val_bar = tqdm(val_loader, desc=f"Epoch {epoch+1}/{EPOCHS} [Val]")
        with torch.no_grad():  # 验证阶段禁用梯度
            for data in val_bar:
                inputs, labels = data
                inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
                
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * inputs.size(0)
                
                # 统计预测结果
                preds = torch.argmax(outputs, dim=1).cpu().numpy()
                val_preds.extend(preds)
                val_labels.extend(labels.cpu().numpy())
        
        # 计算验证集指标
        val_loss /= len(val_loader.dataset)
        val_acc = accuracy_score(val_labels, val_preds)
        
        # 打印epoch结果
        print(f"\nEpoch {epoch+1} Summary:")
        print(f"Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")
        
        # 保存最优模型
        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(model.state_dict(), SAVE_PATH)
            print(f"Best model saved! Acc: {best_acc:.4f}")
        
        # 学习率衰减
        scheduler.step()
    
    # 训练完成
    print(f"\nTraining Finished! Best Val Acc: {best_acc:.4f}")
    return model

# ===================== 5. 验证模型（详细评估） =====================
def evaluate_model():
    # 加载最优模型
    model = EmotionClassifier(num_classes=NUM_CLASSES).to(DEVICE)
    model.load_state_dict(torch.load(SAVE_PATH))
    model.eval()
    
    # 加载验证集
    val_loader = build_dataloader(DATA_ROOT, phase="val", batch_size=BATCH_SIZE)
    
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for data in tqdm(val_loader, desc="Evaluating"):
            inputs, labels = data
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
            outputs = model(inputs)
            preds = torch.argmax(outputs, dim=1).cpu().numpy()
            all_preds.extend(preds)
            all_labels.extend(labels.cpu().numpy())
    
    # 打印详细分类报告
    print("\n===== Validation Classification Report =====")
    print(classification_report(
        all_labels, 
        all_preds, 
        target_names=list(emotion_names.values()),
        digits=4
    ))

# ===================== 6. 推理函数（复用并优化你的代码） =====================
def predict_emotion(img_path):
    # 加载训练好的模型
    model = EmotionClassifier(num_classes=NUM_CLASSES).to(DEVICE)
    model.load_state_dict(torch.load(SAVE_PATH))
    model.eval()
    
    # 读取图片
    img = cv2.imread(img_path)
    if img is None:
        print("Error: 图片读取失败！")
        return
    
    # YOLO检测人脸
    face_detector = YOLO('yolov8n-face.pt').to(DEVICE)
    results = face_detector(img, conf=0.001, iou=0.5)
    
    # 遍历检测到的人脸
    for box in results[0].boxes:
        x1, y1, x2, y2 = map(int, box.xyxy.cpu().numpy()[0])
        # 裁剪人脸并预处理
        face_roi = img[y1:y2, x1:x2]
        face_tensor = model.val_transform(face_roi).unsqueeze(0).to(DEVICE)
        
        # 表情分类
        with torch.no_grad():
            pred = model(face_tensor)
            emotion_idx = torch.argmax(pred, dim=1).item()
            emotion_name = emotion_names[emotion_idx]
            confidence = torch.softmax(pred, dim=1)[0][emotion_idx].item()  # 置信度
        
        # 绘制标注
        cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(
            img, 
            f"{emotion_name} ({confidence:.2f})", 
            (x1, y1-10), 
            cv2.FONT_HERSHEY_SIMPLEX, 
            0.8, 
            (0, 0, 255), 
            2
        )
    
    # 显示并保存结果
    cv2.imshow('YOLO Face Emotion', img)
    cv2.imwrite("pred_result.jpg", img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

# ===================== 7. 运行主流程 =====================
if __name__ == "__main__":
    # 1. 训练模型
    train_model()
    
    # 2. 验证模型（打印详细评估报告）
    evaluate_model()
    
    # 3. 测试单张图片（替换为你的测试图片路径）
    predict_emotion("test_face.jpg")

代码运行后输出，表示在训练了

复制代码

0: 640x640 1 face, 121.6ms
Speed: 6.1ms preprocess, 121.6ms inference, 1.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 face, 133.8ms
Speed: 4.3ms preprocess, 133.8ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 132.3ms
Speed: 5.5ms preprocess, 132.3ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 face, 135.2ms
Speed: 4.3ms preprocess, 135.2ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 132.6ms
Speed: 4.9ms preprocess, 132.6ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 133.0ms
Speed: 6.4ms preprocess, 133.0ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 face, 139.0ms
Speed: 4.8ms preprocess, 139.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 face, 126.7ms
Speed: 5.1ms preprocess, 126.7ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 122.5ms
Speed: 4.1ms preprocess, 122.5ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 face, 140.1ms
Speed: 5.0ms preprocess, 140.1ms inference, 1.1ms postprocess per image at shape (1, 3,

第三步：FER2013 CSV 转文件夹结构（简易脚本）

若你只有 FER2013 的 fer2013.csv 文件，先运行以下脚本转换：

python 复制代码

import csv
import os
import numpy as np
from PIL import Image

# 配置路径
CSV_PATH = "fer2013.csv"  # 原始CSV路径
SAVE_ROOT = "D:/datasets/FER2013"

# 表情标签映射
emotion_labels = {0: 'angry', 1: 'disgust', 2: 'fear', 3: 'happy', 4: 'sad', 5: 'surprise', 6: 'neutral'}
# 数据集划分
set_mapping = {'Training': 'train', 'PublicTest': 'val', 'PrivateTest': 'test'}

# 创建文件夹
for set_name in ['train', 'val', 'test']:
    for emotion in emotion_labels.values():
        os.makedirs(os.path.join(SAVE_ROOT, set_name, emotion), exist_ok=True)

# 解析CSV并保存图片
with open(CSV_PATH, 'r') as f:
    reader = csv.reader(f)
    next(reader)  # 跳过表头
    idx = 0
    for row in reader:
        emotion_idx = int(row[0])
        pixels = np.array(row[1].split(), dtype=np.uint8).reshape(48, 48)
        set_name = set_mapping[row[2]]
        emotion_name = emotion_labels[emotion_idx]
        
        # 保存图片
        img = Image.fromarray(pixels)
        img_path = os.path.join(SAVE_ROOT, set_name, emotion_name, f"{idx}.png")
        img.save(img_path)
        idx += 1

print("数据集转换完成！")

关键说明与优化点

训练优化 ：
- 加入 Dropout 防止过拟合，StepLR 学习率衰减；
- 训练集加数据增强（随机翻转、旋转），提升泛化能力；
- 保存「验证集精度最高」的模型，避免过拟合。
数据加载 ：
- 用 Dataset + DataLoader 高效加载数据，适配大批量训练；
- YOLO 检测人脸时，若未检测到则裁剪图片中心区域，保证数据不丢失。
评估指标 ：
- 除了准确率，还输出 classification_report（精确率、召回率、F1值），全面评估模型。
环境适配 ：
- 自动判断 GPU/CPU，Windows 下 num_workers=0 避免多进程报错；
- 进度条 tqdm 可视化训练过程，方便监控。

运行步骤

先运行「CSV 转文件夹脚本」，得到结构化的 FER2013 数据集；
修改代码中 DATA_ROOT 为你的数据集路径；

激活虚拟环境 about，安装依赖：

cmd 复制代码

pip install tqdm scikit-learn pillow -i https://pypi.tuna.tsinghua.edu.cn/simple

直接运行训练代码，训练完成后会自动验证并保存 emotion_model.pth；
替换 predict_emotion 中的测试图片路径，验证推理效果。

常见问题解决

GPU 内存不足 ：减小 BATCH_SIZE（如改为 32/16）；
YOLO 检测慢 ：用 yolov8n-face.pt（轻量化），或提前批量检测人脸并保存裁剪后的图片；
过拟合 ：增加数据增强（如随机亮度调整）、增大 Dropout 概率（如 0.6）、减少训练轮数。
提示no face detected，可以使用如下代码验证模型是否有效

from ultralytics import YOLO
test_img = "./1232.PNG" # 含人脸的示例图
face_model = YOLO('./face_yolov8n.pt', task='detect')
results = face_model(test_img, conf=0.3)
print(f"官方图片检测到的人脸数：{len(results[0].boxes)}")
results[0].show() # 可视化检测结果

如果还是有问题，修改代码中

复制代码

conf=0.01, iou=1.0