基于 PyTorch 和 OpenCV 的实时表情检测系统

可以通过摄像头或视频文件进行表情检测。系统会在画面上实时标注人脸和识别出的表情,并提供统计信息和推荐内容。

python

运行

复制代码
import cv2
import torch
import torch.nn as nn
import numpy as np
from torchvision import transforms, models
from PIL import Image
import time
import os
import argparse
from collections import defaultdict, deque

# 设置中文字体支持
try:
    cv2.putText(np.zeros((1, 1, 3), dtype=np.uint8), "测试", (0, 0),
                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
except:
    print("警告: 系统可能不支持中文字体,表情名称可能显示为乱码")

# 表情类别映射
EMOTION_CLASSES = {
    0: "angry",
    1: "contempt",
    2: "disgust",
    3: "fear",
    4: "happy",
    5: "natural",
    6: "sad",
    7: "sleepy",
    8: "surprised"
}

# 表情中文映射
EMOTION_CHINESE = {
    "angry": "愤怒",
    "contempt": "轻蔑",
    "disgust": "厌恶",
    "fear": "恐惧",
    "happy": "快乐",
    "natural": "中性",
    "sad": "悲伤",
    "sleepy": "困倦",
    "surprised": "惊讶"
}

# 表情颜色映射(用于可视化)
EMOTION_COLORS = {
    "angry": (0, 0, 255),        # 红色
    "contempt": (255, 191, 0),   # 浅蓝色
    "disgust": (0, 255, 0),      # 绿色
    "fear": (255, 0, 255),       # 紫色
    "happy": (0, 255, 255),      # 黄色
    "natural": (128, 128, 128),  # 灰色
    "sad": (255, 0, 0),          # 蓝色
    "sleepy": (255, 255, 0),     # 青色
    "surprised": (0, 165, 255)   # 橙色
}

# 表情推荐映射
RECOMMENDATIONS = {
    "angry": "推荐冥想放松视频、舒缓音乐",
    "contempt": "推荐人文纪录片、思想深度内容",
    "disgust": "推荐自然风景、美食视频",
    "fear": "推荐励志演讲、勇气主题内容",
    "happy": "推荐搞笑视频、喜剧电影片段",
    "natural": "推荐综合热门内容",
    "sad": "推荐治愈音乐、温馨短片",
    "sleepy": "推荐活力舞蹈、提神饮品介绍",
    "surprised": "推荐奇闻轶事、探索发现内容"
}

def parse_args():
    """解析命令行参数"""
    parser = argparse.ArgumentParser(description='实时表情检测系统')
    parser.add_argument('--video_file', type=str, default='0', 
                        help='视频文件路径或摄像头ID (默认: 0)')
    parser.add_argument('--model_file', required=True, 
                        help='预训练模型文件路径')
    parser.add_argument('--out_dir', type=str, default='output', 
                        help='输出结果目录')
    parser.add_argument('--gpu', action='store_true', 
                        help='是否使用GPU')
    parser.add_argument('--face_detector', type=str, default='haar', 
                        choices=['haar', 'dnn'], help='人脸检测器类型')
    parser.add_argument('--fps', type=int, default=30, 
                        help='显示帧率')
    return parser.parse_args()

def load_model(model_path, device):
    """加载预训练模型"""
    # 创建模型结构(这里假设使用MobileNetV2)
    model = models.mobilenet_v2(pretrained=False)
    
    # 修改分类器以匹配情感分析的类别数
    num_classes = len(EMOTION_CLASSES)
    model.classifier[1] = nn.Linear(model.last_channel, num_classes)
    
    # 加载预训练权重
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.to(device)
    model.eval()
    
    print(f"已加载模型: {model_path}")
    return model

def load_face_detector(detector_type='haar'):
    """加载人脸检测器"""
    if detector_type == 'haar':
        # 使用OpenCV的Haar级联分类器
        face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
        if face_cascade.empty():
            raise ValueError("无法加载Haar级联分类器,请检查OpenCV安装")
        return face_cascade
    elif detector_type == 'dnn':
        # 使用OpenCV的DNN人脸检测器(更准确但速度较慢)
        model_file = "opencv_face_detector_uint8.pb"
        config_file = "opencv_face_detector.pbtxt"
        
        # 检查模型文件是否存在
        if not os.path.exists(model_file) or not os.path.exists(config_file):
            print("警告: DNN人脸检测器模型文件不存在,将使用Haar级联分类器")
            return load_face_detector('haar')
        
        net = cv2.dnn.readNetFromTensorflow(model_file, config_file)
        return net
    else:
        raise ValueError(f"不支持的人脸检测器类型: {detector_type}")

def detect_faces(image, face_detector, detector_type='haar'):
    """检测图像中的人脸"""
    if detector_type == 'haar':
        # Haar级联分类器检测
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        faces = face_detector.detectMultiScale(
            gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
        return [(x, y, x+w, y+h) for (x, y, w, h) in faces]
    else:
        # DNN检测器
        blob = cv2.dnn.blobFromImage(image, 1.0, (300, 300), [104, 117, 123], False, False)
        face_detector.setInput(blob)
        detections = face_detector.forward()
        
        faces = []
        h, w = image.shape[:2]
        for i in range(detections.shape[2]):
            confidence = detections[0, 0, i, 2]
            if confidence > 0.5:  # 置信度阈值
                box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
                (x1, y1, x2, y2) = box.astype("int")
                faces.append((x1, y1, x2, y2))
        
        return faces

def preprocess_face(face_image):
    """预处理人脸图像用于表情识别"""
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    # 转换为PIL图像
    face_pil = Image.fromarray(cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB))
    return transform(face_pil).unsqueeze(0)

def detect_emotion(model, face_tensor, device):
    """检测人脸表情"""
    with torch.no_grad():
        face_tensor = face_tensor.to(device)
        outputs = model(face_tensor)
        probabilities = torch.nn.functional.softmax(outputs, dim=1)
        confidence, predicted = torch.max(probabilities, 1)
        
    emotion_id = predicted.item()
    confidence = confidence.item()
    return emotion_id, confidence

def draw_results(frame, faces, emotions, confidences):
    """在图像上绘制检测结果"""
    for i, (face, emotion_id, confidence) in enumerate(zip(faces, emotions, confidences)):
        x1, y1, x2, y2 = face
        emotion = EMOTION_CLASSES[emotion_id]
        emotion_cn = EMOTION_CHINESE.get(emotion, emotion)
        color = EMOTION_COLORS[emotion]
        
        # 绘制人脸框
        cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
        
        # 绘制表情标签
        label = f"{emotion_cn}: {confidence:.2f}"
        cv2.putText(frame, label, (x1, y1-10), 
                   cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)
    
    return frame

def main():
    args = parse_args()
    
    # 创建输出目录
    if not os.path.exists(args.out_dir):
        os.makedirs(args.out_dir)
        print(f"创建输出目录: {args.out_dir}")
    
    # 设置设备
    device = torch.device("cuda" if args.gpu and torch.cuda.is_available() else "cpu")
    print(f"使用设备: {device}")
    
    # 加载模型
    model = load_model(args.model_file, device)
    
    # 加载人脸检测器
    face_detector = load_face_detector(args.face_detector)
    
    # 打开视频文件或摄像头
    try:
        video_file = int(args.video_file)  # 尝试作为摄像头ID
    except ValueError:
        video_file = args.video_file  # 作为文件路径
    
    cap = cv2.VideoCapture(video_file)
    
    if not cap.isOpened():
        print(f"无法打开视频源: {args.video_file}")
        return
    
    # 获取视频信息
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    
    print(f"视频源: {args.video_file}")
    print(f"分辨率: {width}x{height}")
    print(f"帧率: {fps:.2f} FPS")
    
    # 创建视频写入器(可选)
    if isinstance(video_file, str) or args.video_file != '0':  # 非默认摄像头
        output_path = os.path.join(args.out_dir, f"output_{os.path.basename(str(video_file))}")
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(output_path, fourcc, args.fps, (width, height))
    else:
        out = None
    
    # 表情统计
    emotion_history = defaultdict(lambda: deque(maxlen=30))  # 保存最近30帧的表情
    frame_count = 0
    start_time = time.time()
    
    # 创建显示窗口
    cv2.namedWindow("实时表情检测", cv2.WINDOW_NORMAL)
    cv2.resizeWindow("实时表情检测", 1024, 768)
    
    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            
            frame_count += 1
            
            # 检测人脸
            faces = detect_faces(frame, face_detector, args.face_detector)
            
            emotions = []
            confidences = []
            
            # 对每个检测到的人脸进行表情识别
            for face in faces:
                x1, y1, x2, y2 = face
                
                # 提取人脸区域
                face_image = frame[y1:y2, x1:x2]
                if face_image.size == 0:
                    continue
                
                # 预处理人脸图像
                face_tensor = preprocess_face(face_image)
                
                # 检测表情
                emotion_id, confidence = detect_emotion(model, face_tensor, device)
                
                emotions.append(emotion_id)
                confidences.append(confidence)
                
                # 更新表情历史
                emotion_history[face].append(emotion_id)
            
            # 绘制检测结果
            result_frame = draw_results(frame.copy(), faces, emotions, confidences)
            
            # 计算FPS
            elapsed_time = time.time() - start_time
            current_fps = frame_count / elapsed_time
            
            # 显示FPS和其他信息
            cv2.putText(result_frame, f"FPS: {current_fps:.1f}", (10, 30),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
            
            # 显示主导表情(如果有检测到人脸)
            if faces and emotions:
                # 统计当前帧中最常见的表情
                emotion_counts = defaultdict(int)
                for emotion_id in emotions:
                    emotion_counts[emotion_id] += 1
                
                dominant_emotion_id = max(emotion_counts, key=emotion_counts.get)
                dominant_emotion = EMOTION_CLASSES[dominant_emotion_id]
                dominant_emotion_cn = EMOTION_CHINESE.get(dominant_emotion, dominant_emotion)
                dominant_color = EMOTION_COLORS[dominant_emotion]
                
                # 显示主导表情
                cv2.putText(result_frame, f"主导表情: {dominant_emotion_cn}", (10, 60),
                           cv2.FONT_HERSHEY_SIMPLEX, 0.7, dominant_color, 2)
                
                # 显示推荐内容
                recommendation = RECOMMENDATIONS.get(dominant_emotion, "无推荐内容")
                cv2.putText(result_frame, f"推荐: {recommendation}", (10, 90),
                           cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
            
            # 显示帧
            cv2.imshow("实时表情检测", result_frame)
            
            # 写入输出视频
            if out:
                out.write(result_frame)
            
            # 按 'q' 键退出
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
    
    except KeyboardInterrupt:
        print("程序被用户中断")
    finally:
        # 释放资源
        cap.release()
        if out:
            out.release()
        cv2.destroyAllWindows()
        print(f"程序已退出,共处理 {frame_count} 帧")

if __name__ == "__main__":
    main()

使用说明

  1. 安装依赖

bash

复制代码
pip install torch torchvision opencv-python numpy pillow
  1. 下载人脸检测模型(可选):

    • 如果使用 DNN 人脸检测器,需要下载模型文件:
      • opencv_face_detector_uint8.pb
      • opencv_face_detector.pbtxt
    • 模型文件可以从 OpenCV 官方仓库获取
  2. 运行程序

bash

复制代码
python realtime_emotion_detection.py --model_file data/pretrained/mobilenet_v2_1.0_CrossEntropyLoss_20230313090258/model/latest_model_099_94.7200.pth --video_file 0
  1. 参数说明
    • --video_file: 视频文件路径或摄像头 ID(默认 0 表示内置摄像头)
    • --model_file: 预训练模型文件路径(必填)
    • --out_dir: 输出结果目录
    • --gpu: 是否使用 GPU 加速
    • --face_detector: 人脸检测器类型(haar 或 dnn)
    • --fps: 显示帧率

功能特点

  1. 实时检测:通过摄像头或视频文件进行表情检测
  2. 多人脸支持:同时检测和识别画面中的多个人脸表情
  3. 表情可视化:在人脸周围绘制边框和表情标签
  4. 统计信息:显示当前主导表情和推荐内容
  5. 性能监控:实时显示处理帧率

q键可以退出程序。如果输入的是视频文件,程序会生成带检测结果的输出视频。

相关推荐
西猫雷婶43 分钟前
random.shuffle()函数随机打乱数据
开发语言·pytorch·python·学习·算法·线性回归·numpy
鑫宝的学习笔记1 小时前
Vmware虚拟机联网问题,显示:线缆已拔出!!!
人工智能·ubuntu
小李独爱秋1 小时前
机器学习中的聚类理论与K-means算法详解
人工智能·算法·机器学习·支持向量机·kmeans·聚类
comli_cn2 小时前
GSPO论文阅读
论文阅读·人工智能
大有数据可视化2 小时前
数字孪生背后的大数据技术:时序数据库为何是关键?
大数据·数据库·人工智能
Bioinfo Guy2 小时前
Genome Med|RAG-HPO做表型注释:学习一下大语言模型怎么作为发文思路
人工智能·大语言模型·多组学
张较瘦_2 小时前
[论文阅读] AI + 软件工程(Debug)| 告别 “猜 bug”:TreeMind 用 LLM+MCTS 破解 Android 不完整报告复现难题
论文阅读·人工智能·bug
深栈2 小时前
机器学习:线性回归
人工智能·pytorch·python·机器学习·线性回归·sklearn
AI视觉网奇3 小时前
虚拟机安装 网络问题
人工智能·虚拟机
云澈ovo3 小时前
FP16混合精度训练:Stable Diffusion生成速度提升300%的硬件配置方案
人工智能·机器学习·stable diffusion