基于 PyTorch 和 OpenCV 的实时表情检测系统

可以通过摄像头或视频文件进行表情检测。系统会在画面上实时标注人脸和识别出的表情,并提供统计信息和推荐内容。

python

运行

复制代码
import cv2
import torch
import torch.nn as nn
import numpy as np
from torchvision import transforms, models
from PIL import Image
import time
import os
import argparse
from collections import defaultdict, deque

# 设置中文字体支持
try:
    cv2.putText(np.zeros((1, 1, 3), dtype=np.uint8), "测试", (0, 0),
                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
except:
    print("警告: 系统可能不支持中文字体,表情名称可能显示为乱码")

# 表情类别映射
EMOTION_CLASSES = {
    0: "angry",
    1: "contempt",
    2: "disgust",
    3: "fear",
    4: "happy",
    5: "natural",
    6: "sad",
    7: "sleepy",
    8: "surprised"
}

# 表情中文映射
EMOTION_CHINESE = {
    "angry": "愤怒",
    "contempt": "轻蔑",
    "disgust": "厌恶",
    "fear": "恐惧",
    "happy": "快乐",
    "natural": "中性",
    "sad": "悲伤",
    "sleepy": "困倦",
    "surprised": "惊讶"
}

# 表情颜色映射(用于可视化)
EMOTION_COLORS = {
    "angry": (0, 0, 255),        # 红色
    "contempt": (255, 191, 0),   # 浅蓝色
    "disgust": (0, 255, 0),      # 绿色
    "fear": (255, 0, 255),       # 紫色
    "happy": (0, 255, 255),      # 黄色
    "natural": (128, 128, 128),  # 灰色
    "sad": (255, 0, 0),          # 蓝色
    "sleepy": (255, 255, 0),     # 青色
    "surprised": (0, 165, 255)   # 橙色
}

# 表情推荐映射
RECOMMENDATIONS = {
    "angry": "推荐冥想放松视频、舒缓音乐",
    "contempt": "推荐人文纪录片、思想深度内容",
    "disgust": "推荐自然风景、美食视频",
    "fear": "推荐励志演讲、勇气主题内容",
    "happy": "推荐搞笑视频、喜剧电影片段",
    "natural": "推荐综合热门内容",
    "sad": "推荐治愈音乐、温馨短片",
    "sleepy": "推荐活力舞蹈、提神饮品介绍",
    "surprised": "推荐奇闻轶事、探索发现内容"
}

def parse_args():
    """解析命令行参数"""
    parser = argparse.ArgumentParser(description='实时表情检测系统')
    parser.add_argument('--video_file', type=str, default='0', 
                        help='视频文件路径或摄像头ID (默认: 0)')
    parser.add_argument('--model_file', required=True, 
                        help='预训练模型文件路径')
    parser.add_argument('--out_dir', type=str, default='output', 
                        help='输出结果目录')
    parser.add_argument('--gpu', action='store_true', 
                        help='是否使用GPU')
    parser.add_argument('--face_detector', type=str, default='haar', 
                        choices=['haar', 'dnn'], help='人脸检测器类型')
    parser.add_argument('--fps', type=int, default=30, 
                        help='显示帧率')
    return parser.parse_args()

def load_model(model_path, device):
    """加载预训练模型"""
    # 创建模型结构(这里假设使用MobileNetV2)
    model = models.mobilenet_v2(pretrained=False)
    
    # 修改分类器以匹配情感分析的类别数
    num_classes = len(EMOTION_CLASSES)
    model.classifier[1] = nn.Linear(model.last_channel, num_classes)
    
    # 加载预训练权重
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.to(device)
    model.eval()
    
    print(f"已加载模型: {model_path}")
    return model

def load_face_detector(detector_type='haar'):
    """加载人脸检测器"""
    if detector_type == 'haar':
        # 使用OpenCV的Haar级联分类器
        face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
        if face_cascade.empty():
            raise ValueError("无法加载Haar级联分类器,请检查OpenCV安装")
        return face_cascade
    elif detector_type == 'dnn':
        # 使用OpenCV的DNN人脸检测器(更准确但速度较慢)
        model_file = "opencv_face_detector_uint8.pb"
        config_file = "opencv_face_detector.pbtxt"
        
        # 检查模型文件是否存在
        if not os.path.exists(model_file) or not os.path.exists(config_file):
            print("警告: DNN人脸检测器模型文件不存在,将使用Haar级联分类器")
            return load_face_detector('haar')
        
        net = cv2.dnn.readNetFromTensorflow(model_file, config_file)
        return net
    else:
        raise ValueError(f"不支持的人脸检测器类型: {detector_type}")

def detect_faces(image, face_detector, detector_type='haar'):
    """检测图像中的人脸"""
    if detector_type == 'haar':
        # Haar级联分类器检测
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        faces = face_detector.detectMultiScale(
            gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
        return [(x, y, x+w, y+h) for (x, y, w, h) in faces]
    else:
        # DNN检测器
        blob = cv2.dnn.blobFromImage(image, 1.0, (300, 300), [104, 117, 123], False, False)
        face_detector.setInput(blob)
        detections = face_detector.forward()
        
        faces = []
        h, w = image.shape[:2]
        for i in range(detections.shape[2]):
            confidence = detections[0, 0, i, 2]
            if confidence > 0.5:  # 置信度阈值
                box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
                (x1, y1, x2, y2) = box.astype("int")
                faces.append((x1, y1, x2, y2))
        
        return faces

def preprocess_face(face_image):
    """预处理人脸图像用于表情识别"""
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    # 转换为PIL图像
    face_pil = Image.fromarray(cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB))
    return transform(face_pil).unsqueeze(0)

def detect_emotion(model, face_tensor, device):
    """检测人脸表情"""
    with torch.no_grad():
        face_tensor = face_tensor.to(device)
        outputs = model(face_tensor)
        probabilities = torch.nn.functional.softmax(outputs, dim=1)
        confidence, predicted = torch.max(probabilities, 1)
        
    emotion_id = predicted.item()
    confidence = confidence.item()
    return emotion_id, confidence

def draw_results(frame, faces, emotions, confidences):
    """在图像上绘制检测结果"""
    for i, (face, emotion_id, confidence) in enumerate(zip(faces, emotions, confidences)):
        x1, y1, x2, y2 = face
        emotion = EMOTION_CLASSES[emotion_id]
        emotion_cn = EMOTION_CHINESE.get(emotion, emotion)
        color = EMOTION_COLORS[emotion]
        
        # 绘制人脸框
        cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
        
        # 绘制表情标签
        label = f"{emotion_cn}: {confidence:.2f}"
        cv2.putText(frame, label, (x1, y1-10), 
                   cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)
    
    return frame

def main():
    args = parse_args()
    
    # 创建输出目录
    if not os.path.exists(args.out_dir):
        os.makedirs(args.out_dir)
        print(f"创建输出目录: {args.out_dir}")
    
    # 设置设备
    device = torch.device("cuda" if args.gpu and torch.cuda.is_available() else "cpu")
    print(f"使用设备: {device}")
    
    # 加载模型
    model = load_model(args.model_file, device)
    
    # 加载人脸检测器
    face_detector = load_face_detector(args.face_detector)
    
    # 打开视频文件或摄像头
    try:
        video_file = int(args.video_file)  # 尝试作为摄像头ID
    except ValueError:
        video_file = args.video_file  # 作为文件路径
    
    cap = cv2.VideoCapture(video_file)
    
    if not cap.isOpened():
        print(f"无法打开视频源: {args.video_file}")
        return
    
    # 获取视频信息
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    
    print(f"视频源: {args.video_file}")
    print(f"分辨率: {width}x{height}")
    print(f"帧率: {fps:.2f} FPS")
    
    # 创建视频写入器(可选)
    if isinstance(video_file, str) or args.video_file != '0':  # 非默认摄像头
        output_path = os.path.join(args.out_dir, f"output_{os.path.basename(str(video_file))}")
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(output_path, fourcc, args.fps, (width, height))
    else:
        out = None
    
    # 表情统计
    emotion_history = defaultdict(lambda: deque(maxlen=30))  # 保存最近30帧的表情
    frame_count = 0
    start_time = time.time()
    
    # 创建显示窗口
    cv2.namedWindow("实时表情检测", cv2.WINDOW_NORMAL)
    cv2.resizeWindow("实时表情检测", 1024, 768)
    
    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            
            frame_count += 1
            
            # 检测人脸
            faces = detect_faces(frame, face_detector, args.face_detector)
            
            emotions = []
            confidences = []
            
            # 对每个检测到的人脸进行表情识别
            for face in faces:
                x1, y1, x2, y2 = face
                
                # 提取人脸区域
                face_image = frame[y1:y2, x1:x2]
                if face_image.size == 0:
                    continue
                
                # 预处理人脸图像
                face_tensor = preprocess_face(face_image)
                
                # 检测表情
                emotion_id, confidence = detect_emotion(model, face_tensor, device)
                
                emotions.append(emotion_id)
                confidences.append(confidence)
                
                # 更新表情历史
                emotion_history[face].append(emotion_id)
            
            # 绘制检测结果
            result_frame = draw_results(frame.copy(), faces, emotions, confidences)
            
            # 计算FPS
            elapsed_time = time.time() - start_time
            current_fps = frame_count / elapsed_time
            
            # 显示FPS和其他信息
            cv2.putText(result_frame, f"FPS: {current_fps:.1f}", (10, 30),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
            
            # 显示主导表情(如果有检测到人脸)
            if faces and emotions:
                # 统计当前帧中最常见的表情
                emotion_counts = defaultdict(int)
                for emotion_id in emotions:
                    emotion_counts[emotion_id] += 1
                
                dominant_emotion_id = max(emotion_counts, key=emotion_counts.get)
                dominant_emotion = EMOTION_CLASSES[dominant_emotion_id]
                dominant_emotion_cn = EMOTION_CHINESE.get(dominant_emotion, dominant_emotion)
                dominant_color = EMOTION_COLORS[dominant_emotion]
                
                # 显示主导表情
                cv2.putText(result_frame, f"主导表情: {dominant_emotion_cn}", (10, 60),
                           cv2.FONT_HERSHEY_SIMPLEX, 0.7, dominant_color, 2)
                
                # 显示推荐内容
                recommendation = RECOMMENDATIONS.get(dominant_emotion, "无推荐内容")
                cv2.putText(result_frame, f"推荐: {recommendation}", (10, 90),
                           cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
            
            # 显示帧
            cv2.imshow("实时表情检测", result_frame)
            
            # 写入输出视频
            if out:
                out.write(result_frame)
            
            # 按 'q' 键退出
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
    
    except KeyboardInterrupt:
        print("程序被用户中断")
    finally:
        # 释放资源
        cap.release()
        if out:
            out.release()
        cv2.destroyAllWindows()
        print(f"程序已退出,共处理 {frame_count} 帧")

if __name__ == "__main__":
    main()

使用说明

  1. 安装依赖

bash

复制代码
pip install torch torchvision opencv-python numpy pillow
  1. 下载人脸检测模型(可选):

    • 如果使用 DNN 人脸检测器,需要下载模型文件:
      • opencv_face_detector_uint8.pb
      • opencv_face_detector.pbtxt
    • 模型文件可以从 OpenCV 官方仓库获取
  2. 运行程序

bash

复制代码
python realtime_emotion_detection.py --model_file data/pretrained/mobilenet_v2_1.0_CrossEntropyLoss_20230313090258/model/latest_model_099_94.7200.pth --video_file 0
  1. 参数说明
    • --video_file: 视频文件路径或摄像头 ID(默认 0 表示内置摄像头)
    • --model_file: 预训练模型文件路径(必填)
    • --out_dir: 输出结果目录
    • --gpu: 是否使用 GPU 加速
    • --face_detector: 人脸检测器类型(haar 或 dnn)
    • --fps: 显示帧率

功能特点

  1. 实时检测:通过摄像头或视频文件进行表情检测
  2. 多人脸支持:同时检测和识别画面中的多个人脸表情
  3. 表情可视化:在人脸周围绘制边框和表情标签
  4. 统计信息:显示当前主导表情和推荐内容
  5. 性能监控:实时显示处理帧率

q键可以退出程序。如果输入的是视频文件,程序会生成带检测结果的输出视频。

相关推荐
ZhengEnCi5 小时前
09bad-斯坦福CS336作业一-构建优化器
人工智能
ZhengEnCi6 小时前
09bac-斯坦福CS336作业一-实现训练损失计算
人工智能
冬奇Lab6 小时前
Skill 系列(01):Skill 评测体系——如何量化一个 AI Skill 的质量
人工智能
兵慌码乱8 小时前
基于 MediaPipe 与 PySide2 的手势交互音乐控制系统实现:轻量化视觉交互全流程解析
python·opencv·计算机视觉·人机交互·手势识别·mediapipe·pyside2
IT_陈寒9 小时前
Redis内存爆了,原来我漏掉了这个致命配置
前端·人工智能·后端
用户35218024547511 小时前
🎆从 Prompt 到 Skill:让 Spring AI Agent 学会"装新技能"
人工智能·spring boot·ai编程
米小虾11 小时前
手把手教你搭建第一个生产级AI Agent:从选型到实战的完整指南
人工智能·agent
任沫11 小时前
Agent之Function Call
javascript·人工智能·go
米小虾11 小时前
2026年AI Agent全面爆发:从开源生态到企业级应用的进化之路
人工智能·agent
用户69190268133911 小时前
Vibe Coding 开发项目的基本范式
人工智能·设计模式·代码规范