Trae SOLO 生成 TensorFlow.js 手势抓取物品太牛了 程序员可以退下了

TensorFlow.js 实时手势抓取物品:从0到1实现手势交互

项目介绍

这是一个基于 TensorFlow.js 和 MediaPipeHands 模型的实时手势抓取物品应用,支持语音反馈功能。用户可以通过摄像头捕捉手部动作,使用手势控制虚拟光标,实现对屏幕上虚拟物品的抓取、移动、旋转和缩放操作。

功能特点

  • ✅ 实时手部检测和关键点跟踪
  • ✅ 多种手势识别(抓取、张开、OK、点赞、拳头)
  • ✅ 虚拟光标跟随手部移动
  • ✅ 物品抓取、移动、旋转和缩放
  • ✅ 语音反馈
  • ✅ 响应式设计,支持移动端
  • ✅ 动态性能调整

技术栈

  • 前端框架:纯 JavaScript (ES 模块)
  • 构建工具:Vite
  • 机器学习库
    • TensorFlow.js
    • @tensorflow-models/hand-pose-detection
    • @mediapipe/hands
  • 浏览器 API
    • WebRTC (getUserMedia)
    • Canvas API
    • Web Speech API

核心架构设计

项目采用模块化架构,各功能模块职责清晰,便于维护和扩展:

bash 复制代码
src/
├── js/
│   ├── camera.js        # 摄像头管理
│   ├── detector.js      # 手部检测和手势识别
│   ├── interaction.js   # 交互逻辑
│   ├── renderer.js      # 手部关键点绘制
│   ├── utils.js         # 工具函数
│   ├── voice.js         # 语音合成
│   └── config.js        # 配置文件
├── index.css           # 样式文件
└── main.js             # 项目入口

核心代码解析

1. 项目初始化与主流程

main.js - 项目入口文件,负责初始化和协调各模块

javascript 复制代码
// 导入核心模块
import { setupCamera, stopCamera, isCameraOn } from './js/camera.js';
import { initDetector, detectHands, stopDetection } from './js/detector.js';
import { updateStatus } from './js/utils.js';
import { initSpeechSynthesis } from './js/voice.js';

// 初始化语音合成
let voice = initSpeechSynthesis();

// 全局变量
let video, canvas, detector, animationId;

// 初始化函数
async function init() {
  video = document.getElementById('video');
  canvas = document.getElementById('output');
  document.getElementById('startBtn').addEventListener('click', toggleCamera);
  
  // 加载模型
  updateStatus('模型加载中...', true);
  try {
    detector = await initDetector();
    updateStatus('模型加载成功,请启动摄像头');
    voice.synthesizeSpeechSentenceBySentence("模型加载成功,请启动摄像头");
  } catch (error) {
    console.error('模型加载失败:', error);
    updateStatus('模型加载失败: ' + error.message);
  }
}

// 切换摄像头状态
async function toggleCamera() {
  if (isCameraOn) {
    voice.synthesizeSpeechSentenceBySentence("摄像头已关闭");
    stopCamera(video);
    stopDetection(animationId);
    document.getElementById('startBtn').textContent = '启动摄像头';
    updateStatus('摄像头已关闭');
  } else {
    try {
      const startBtn = document.getElementById('startBtn');
      startBtn.disabled = true;
      startBtn.textContent = '启动中...';
      
      voice.synthesizeSpeechSentenceBySentence("摄像头启动中");
      await setupCamera(video, canvas);
      startBtn.textContent = '关闭摄像头';
      startBtn.disabled = false;
      updateStatus('摄像头启动中...');
      
      // 开始手部检测
      animationId = detectHands(video, canvas, detector);
    } catch (error) {
      console.error('摄像头启动失败:', error);
      updateStatus('摄像头启动失败: ' + error.message);
    }
  }
}

// 页面加载完成后初始化
window.addEventListener('load', init);

2. 手部检测与手势识别

detector.js - 核心模块,负责手部检测和手势识别

javascript 复制代码
import * as handPoseDetection from '@tensorflow-models/hand-pose-detection';
import { drawHands, clearCanvas } from './renderer.js';
import { updateInteraction } from './interaction.js';
import { updateStatus, updateInteractionStatus } from './utils.js';
import { DETECTOR_CONFIG, GESTURE_CONFIG, PERFORMANCE_CONFIG } from './config.js';

// 全局变量
let detector;
let currentGesture = '未检测到手势';
let handPosition = { x: 0, y: 0 };

// 初始化手部检测器
async function initDetector() {
  detector = await handPoseDetection.createDetector(
    handPoseDetection.SupportedModels.MediaPipeHands, 
    {
      runtime: 'mediapipe',
      modelType: DETECTOR_CONFIG.MODEL_TYPE,
      maxHands: DETECTOR_CONFIG.MAX_HANDS,
      solutionPath: DETECTOR_CONFIG.SOLUTION_PATH
    }
  );
  return detector;
}

// 检测手部函数
function detectHands(video, canvas, detector) {
  let animationId;
  const ctx = canvas.getContext('2d');
  
  // 性能监控变量
  let frameCount = 0;
  let lastFpsTime = 0;
  let currentFps = 0;
  let detectionInterval = 0;
  let lastDetectionTime = 0;
  
  // 更新帧率函数
  function updateFps(timestamp) {
    frameCount++;
    if (timestamp - lastFpsTime >= PERFORMANCE_CONFIG.FPS_CHECK_INTERVAL) {
      currentFps = Math.round((frameCount * 1000) / (timestamp - lastFpsTime));
      frameCount = 0;
      lastFpsTime = timestamp;
      
      // 动态调整检测间隔
      if (currentFps > PERFORMANCE_CONFIG.MAX_FPS) {
        detectionInterval = Math.min(detectionInterval + 10, 100);
      } else if (currentFps < PERFORMANCE_CONFIG.MIN_FPS) {
        detectionInterval = Math.max(detectionInterval - 10, 0);
      }
    }
  }
  
  // 内部检测函数
  async function detect(timestamp) {
    try {
      updateFps(timestamp);
      
      // 根据检测间隔控制检测频率
      if (timestamp - lastDetectionTime >= detectionInterval) {
        // 检测手部
        const hands = await detector.estimateHands(video, { 
          flipHorizontal: DETECTOR_CONFIG.FLIP_HORIZONTAL 
        });
        
        clearCanvas(ctx, canvas);
        
        if (hands.length > 0) {
          drawHands(ctx, hands);
          analyzeGesture(hands[0]);
          updateInteraction(hands[0], canvas, currentGesture, handPosition);
        } else {
          currentGesture = '未检测到手势';
          updateInteractionStatus(currentGesture);
        }
        
        updateStatus(`检测到 ${hands.length} 只手 | 帧率: ${currentFps}`);
        lastDetectionTime = timestamp;
      }
    } catch (error) {
      console.error('手部检测错误:', error);
    }
    
    animationId = requestAnimationFrame(detect);
  }
  
  detect(0);
  return animationId;
}

// 分析手势函数
function analyzeGesture(hand) {
  // 获取手腕关键点
  const wrist = hand.keypoints.find(k => k.name === 'wrist');
  
  // 更新手部位置
  if (wrist) {
    handPosition.x = wrist.x;
    handPosition.y = wrist.y;
  }
  
  // 获取手指关键点
  const thumbTip = hand.keypoints.find(k => k.name === 'thumb_tip');
  const indexTip = hand.keypoints.find(k => k.name === 'index_finger_tip');
  const middleTip = hand.keypoints.find(k => k.name === 'middle_finger_tip');
  const ringTip = hand.keypoints.find(k => k.name === 'ring_finger_tip');
  const pinkyTip = hand.keypoints.find(k => k.name === 'pinky_tip');
  
  // 获取手指根部关键点
  const indexMcp = hand.keypoints.find(k => k.name === 'index_finger_mcp');
  const middleMcp = hand.keypoints.find(k => k.name === 'middle_finger_mcp');
  const ringMcp = hand.keypoints.find(k => k.name === 'ring_finger_mcp');
  const pinkyMcp = hand.keypoints.find(k => k.name === 'pinky_mcp');
  const thumbMcp = hand.keypoints.find(k => k.name === 'thumb_mcp');
  
  // 手势识别逻辑
  if (thumbTip && indexTip && middleTip && ringTip && pinkyTip && 
      indexMcp && middleMcp && ringMcp && pinkyMcp && thumbMcp && wrist) {
    
    // 计算拇指和食指指尖距离
    const thumbIndexDistance = Math.sqrt(
      Math.pow(thumbTip.x - indexTip.x, 2) + 
      Math.pow(thumbTip.y - indexTip.y, 2)
    );
    
    // 计算手指弯曲程度
    const indexBent = calculateFingerBent(indexTip, indexMcp, wrist);
    const middleBent = calculateFingerBent(middleTip, middleMcp, wrist);
    const ringBent = calculateFingerBent(ringTip, ringMcp, wrist);
    const pinkyBent = calculateFingerBent(pinkyTip, pinkyMcp, wrist);
    const thumbBent = calculateFingerBent(thumbTip, thumbMcp, wrist);
    
    const avgBent = (indexBent + middleBent + ringBent + pinkyBent) / 4;
    
    // 手势识别
    if (avgBent > GESTURE_CONFIG.FIST_THRESHOLD && thumbBent > GESTURE_CONFIG.FIST_THRESHOLD) {
      currentGesture = '拳头手势';
    }
    else if (indexBent < 0.3 && middleBent > 0.5 && ringBent > 0.5 && pinkyBent > 0.5) {
      currentGesture = '点赞手势';
    }
    else if (thumbIndexDistance < GESTURE_CONFIG.GRAB_THRESHOLD) {
      currentGesture = '抓取手势';
    }
    else if (thumbIndexDistance < GESTURE_CONFIG.OK_THRESHOLD && 
             indexBent < 0.3 && middleBent < 0.3 && ringBent < 0.3 && pinkyBent < 0.3) {
      currentGesture = 'OK手势';
    }
    else if (indexBent < 0.3 && middleBent < 0.3 && ringBent < 0.3 && pinkyBent < 0.3) {
      currentGesture = '张开手势';
    }
    else {
      currentGesture = '未检测到手势';
    }
  }
  else {
    currentGesture = '未检测到手势';
  }
  
  updateInteractionStatus(currentGesture);
}

// 辅助函数:计算手指弯曲程度
function calculateFingerBent(tip, mcp, wrist) {
  const fingerLength = Math.sqrt(
    Math.pow(tip.x - mcp.x, 2) + 
    Math.pow(tip.y - mcp.y, 2)
  );
  
  const wristToTip = Math.sqrt(
    Math.pow(tip.x - wrist.x, 2) + 
    Math.pow(tip.y - wrist.y, 2)
  );
  
  const wristToMcp = Math.sqrt(
    Math.pow(mcp.x - wrist.x, 2) + 
    Math.pow(mcp.y - wrist.y, 2)
  );
  
  const cosAngle = (wristToTip * wristToTip + wristToMcp * wristToMcp - fingerLength * fingerLength) / 
                  (2 * wristToTip * wristToMcp);
  
  const angle = Math.acos(Math.max(-1, Math.min(1, cosAngle)));
  return angle / Math.PI; // 归一化到0-1范围
}

3. 手部关键点绘制

renderer.js - 负责绘制手部关键点和连接线

javascript 复制代码
import { VISUAL_CONFIG } from './config.js';

// 清除画布函数
function clearCanvas(ctx, canvas) {
  ctx.clearRect(0, 0, canvas.width, canvas.height);
}

// 绘制手部关键点和连接线函数
function drawHands(ctx, hands) {
  for (const hand of hands) {
    // 绘制手部轮廓
    drawHandOutline(ctx, hand.keypoints);
    
    // 绘制连接线
    drawConnections(ctx, hand);
    
    // 绘制每个关键点
    for (const keypoint of hand.keypoints) {
      const { x, y } = keypoint;
      
      ctx.beginPath();
      ctx.arc(x, y, VISUAL_CONFIG.KEYPOINT_RADIUS, 0, 2 * Math.PI);
      
      // 根据关键点类型设置不同颜色
      if (keypoint.name === 'wrist') {
        ctx.fillStyle = 'purple';
      } else if (keypoint.name.includes('thumb')) {
        ctx.fillStyle = 'red';
      } else if (keypoint.name.includes('index')) {
        ctx.fillStyle = 'blue';
      } else if (keypoint.name.includes('middle')) {
        ctx.fillStyle = 'green';
      } else if (keypoint.name.includes('ring')) {
        ctx.fillStyle = 'orange';
      } else if (keypoint.name.includes('pinky')) {
        ctx.fillStyle = 'yellow';
      } else {
        ctx.fillStyle = 'white';
      }
      
      ctx.fill();
      ctx.strokeStyle = 'black';
      ctx.lineWidth = 1;
      ctx.stroke();
    }
  }
}

// 绘制连接线函数
function drawConnections(ctx, hand) {
  ctx.strokeStyle = 'rgba(255, 255, 255, 0.7)';
  ctx.lineWidth = VISUAL_CONFIG.CONNECTION_WIDTH;
  ctx.lineCap = 'round';
  
  // 使用模型提供的默认连接
  if (hand.connections) {
    for (const connection of hand.connections) {
      const startPoint = hand.keypoints[connection[0]];
      const endPoint = hand.keypoints[connection[1]];
      
      if (startPoint && endPoint) {
        ctx.beginPath();
        ctx.moveTo(startPoint.x, startPoint.y);
        ctx.lineTo(endPoint.x, endPoint.y);
        ctx.stroke();
      }
    }
  }
}

4. 物品交互逻辑

interaction.js - 处理物品的抓取、移动、旋转和缩放

javascript 复制代码
import { initSpeechSynthesis } from './voice.js';
import { INTERACTION_CONFIG, VISUAL_CONFIG } from './config.js';

let voice = initSpeechSynthesis();
let grabbedObject = null;
let grabOffset = { x: 0, y: 0 };
let objectTransform = { rotation: 0, scale: 1 };
let rotationReference = null;

// 更新交互函数
function updateInteraction(hand, canvas, currentGesture, handPosition) {
  const interactionArea = document.querySelector('.interaction-area');
  const rect = interactionArea.getBoundingClientRect();
  
  // 将手部位置从画布坐标系转换到交互区域坐标系
  const mappedX = (handPosition.x / canvas.width) * rect.width;
  const mappedY = (handPosition.y / canvas.height) * rect.height;
  
  // 显示虚拟光标
  showCursor(interactionArea, mappedX, mappedY, currentGesture);
  
  // 处理抓取逻辑
  if (currentGesture === '抓取手势' && !grabbedObject) {
    tryGrabObject(mappedX, mappedY);
  } 
  else if (currentGesture === '张开手势' && grabbedObject) {
    releaseObject();
  }
  else if (currentGesture === 'OK手势' && grabbedObject) {
    rotateObject(hand, mappedX, mappedY);
  }
  else if (currentGesture === '点赞手势' && grabbedObject) {
    scaleObject(hand, mappedX, mappedY);
  }
  
  // 移动已抓取的物品
  if (grabbedObject && currentGesture === '抓取手势') {
    moveObject(mappedX, mappedY);
  }
}

// 显示虚拟光标函数
function showCursor(area, x, y, currentGesture) {
  let cursor = document.getElementById('hand-cursor');
  
  if (!cursor) {
    cursor = document.createElement('div');
    cursor.id = 'hand-cursor';
    cursor.style.position = 'absolute';
    cursor.style.width = `${INTERACTION_CONFIG.CURSOR_SIZE}px`;
    cursor.style.height = `${INTERACTION_CONFIG.CURSOR_SIZE}px`;
    cursor.style.borderRadius = '50%';
    cursor.style.pointerEvents = 'none';
    cursor.style.zIndex = '10';
    cursor.style.transform = 'translate(-50%, -50%)';
    cursor.style.transition = 'all 0.1s ease';
    area.appendChild(cursor);
  }
  
  // 根据手势设置光标样式
  let cursorColor;
  let cursorSize = INTERACTION_CONFIG.CURSOR_SIZE;
  
  switch (currentGesture) {
    case '抓取手势':
      cursorColor = VISUAL_CONFIG.CURSOR_COLOR_GRAB;
      cursorSize = INTERACTION_CONFIG.CURSOR_SIZE * 1.2;
      break;
    case '张开手势':
      cursorColor = VISUAL_CONFIG.CURSOR_COLOR_OPEN;
      break;
    default:
      cursorColor = VISUAL_CONFIG.CURSOR_COLOR_OTHER;
      break;
  }
  
  cursor.style.left = x + 'px';
  cursor.style.top = y + 'px';
  cursor.style.background = cursorColor;
  cursor.style.width = `${cursorSize}px`;
  cursor.style.height = `${cursorSize}px`;
}

// 尝试抓取物品函数
function tryGrabObject(cursorX, cursorY) {
  const objects = document.querySelectorAll('.object');
  const interactionArea = document.querySelector('.interaction-area');
  
  for (const obj of objects) {
    const objRect = obj.getBoundingClientRect();
    const areaRect = interactionArea.getBoundingClientRect();
    
    const objLeft = objRect.left - areaRect.left;
    const objTop = objRect.top - areaRect.top;
    const objCenterX = objLeft + objRect.width / 2;
    const objCenterY = objTop + objRect.height / 2;
    
    // 计算距离
    const distance = Math.sqrt(
      Math.pow(cursorX - objCenterX, 2) + 
      Math.pow(cursorY - objCenterY, 2)
    );
    
    // 如果距离足够近,则抓取物品
    if (distance < INTERACTION_CONFIG.GRAB_DISTANCE) {
      // 播放抓取语音提示
      voice.synthesizeSpeechSentenceBySentence("抓取" + obj.innerHTML);
      
      // 计算偏移量
      grabOffset = {
        x: cursorX - objLeft,
        y: cursorY - objTop
      };
      
      grabbedObject = obj;
      obj.classList.add('grabbing');
      break;
    }
  }
}

// 移动物品函数
function moveObject(cursorX, cursorY) {
  if (grabbedObject) {
    const newLeft = cursorX - grabOffset.x;
    const newTop = cursorY - grabOffset.y;
    
    // 确保物品不会移出交互区域
    const interactionArea = document.querySelector('.interaction-area');
    const maxX = interactionArea.offsetWidth - grabbedObject.offsetWidth;
    const maxY = interactionArea.offsetHeight - grabbedObject.offsetHeight;
    
    // 更新物品位置
    grabbedObject.style.left = Math.max(0, Math.min(newLeft, maxX)) + 'px';
    grabbedObject.style.top = Math.max(0, Math.min(newTop, maxY)) + 'px';
  }
}

// 旋转物品函数
function rotateObject(hand, cursorX, cursorY) {
  if (grabbedObject && rotationReference) {
    const deltaX = cursorX - rotationReference.x;
    objectTransform.rotation += deltaX * INTERACTION_CONFIG.ROTATE_SPEED;
    applyTransform();
    rotationReference = { x: cursorX, y: cursorY };
  }
}

// 缩放物品函数
function scaleObject(hand, cursorX, cursorY) {
  if (grabbedObject) {
    const indexTip = hand.keypoints.find(k => k.name === 'index_finger_tip');
    const middleTip = hand.keypoints.find(k => k.name === 'middle_finger_tip');
    
    if (indexTip && middleTip) {
      const fingerDistance = Math.sqrt(
        Math.pow(indexTip.x - middleTip.x, 2) + 
        Math.pow(indexTip.y - middleTip.y, 2)
      );
      
      const scaleFactor = 1 + (fingerDistance - 50) * INTERACTION_CONFIG.SCALE_SPEED * 0.01;
      objectTransform.scale = Math.max(0.5, Math.min(objectTransform.scale * scaleFactor, 2));
      applyTransform();
    }
  }
}

// 应用变换函数
function applyTransform() {
  if (grabbedObject) {
    grabbedObject.style.transform = `rotate(${objectTransform.rotation}deg) scale(${objectTransform.scale})`;
  }
}

// 释放物品函数
function releaseObject() {
  if (grabbedObject) {
    grabbedObject.classList.remove('grabbing');
    grabbedObject = null;
    grabOffset = { x: 0, y: 0 };
    rotationReference = null;
  }
}

项目结构与配置

配置文件设计

config.js - 集中管理项目常量和配置项

javascript 复制代码
// 摄像头配置
export const CAMERA_CONFIG = {
  WIDTH: 640,
  HEIGHT: 480,
  FACING_MODE: 'user'
};

// 手部检测模型配置
export const DETECTOR_CONFIG = {
  MODEL_TYPE: 'full',
  MAX_HANDS: 2,
  FLIP_HORIZONTAL: true,
  SOLUTION_PATH: "node_modules/@mediapipe/hands/"
};

// 手势识别配置
export const GESTURE_CONFIG = {
  GRAB_THRESHOLD: 40,
  FIST_THRESHOLD: 0.5,
  OK_THRESHOLD: 30,
  LIKE_THRESHOLD: 0.8
};

// 交互配置
export const INTERACTION_CONFIG = {
  CURSOR_SIZE: 20,
  GRAB_DISTANCE: 50,
  MOVE_SPEED: 1,
  ROTATE_SPEED: 0.01,
  SCALE_SPEED: 0.01
};

// 视觉效果配置
export const VISUAL_CONFIG = {
  KEYPOINT_RADIUS: 5,
  CONNECTION_WIDTH: 3,
  HAND_OUTLINE_OPACITY: 0.5,
  CURSOR_COLOR_OPEN: 'green',
  CURSOR_COLOR_GRAB: 'red',
  CURSOR_COLOR_OTHER: 'yellow'
};

// 性能配置
export const PERFORMANCE_CONFIG = {
  MAX_FPS: 30,
  MIN_FPS: 10,
  FPS_CHECK_INTERVAL: 1000,
  PERFORMANCE_THRESHOLD: 0.8
};

// 语音配置
export const VOICE_CONFIG = {
  RATE: 1.2,
  PITCH: 1.0,
  VOLUME: 1.0,
  TIMEOUT: 1000
};

使用说明

  1. 启动应用:点击"启动摄像头"按钮,授权摄像头使用权限
  2. 手势控制
    • 张开手掌:移动虚拟光标
    • 拇指和食指靠近:抓取物品
    • 抓取状态下移动手部:移动物品
    • OK手势:旋转物品
    • 点赞手势:缩放物品
    • 再次张开手掌:释放物品
  3. 语音反馈:操作过程中会有语音提示

扩展建议

  1. 添加更多手势:可以扩展识别更多手势,如剪刀手、石头剪刀布等
  2. 增强物品交互:添加物品碰撞检测、物理引擎支持
  3. 多手协作:实现双手同时操作不同物品
  4. 自定义物品:允许用户添加、删除和自定义虚拟物品
  5. 保存和加载场景:支持保存当前物品布局,下次打开时恢复
  6. AR模式:结合 WebXR API,实现增强现实手势交互
  7. 模型优化:尝试使用自定义训练的模型,提高特定场景下的识别准确率

总结

本项目展示了如何使用 TensorFlow.js 和 MediaPipeHands 模型实现实时手势交互应用。通过模块化设计和清晰的代码结构,实现了手部检测、手势识别、虚拟光标控制、物品抓取移动以及语音反馈等功能。

该项目具有良好的可扩展性,可以根据需求添加更多手势和交互功能。同时,通过动态性能调整,确保在不同设备上都能流畅运行。

手势交互作为一种自然、直观的交互方式,在未来的 Web 应用中有着广阔的应用前景。希望本项目能为开发者提供一个良好的起点,激发更多创意和应用场景。

项目地址

GitHub 仓库链接

参考资料


通过这个项目,我们学习了如何将机器学习模型应用到 Web 前端,实现了从手部检测到手势识别,再到物品交互的完整流程。希望这篇文章能对你有所启发,欢迎交流和扩展!

相关推荐
出征1 小时前
Pnpm的进化进程
前端
屿小夏1 小时前
openGauss020-openGauss 向量数据库深度解析:从存储到AI的全栈优化
前端
Y***98511 小时前
【学术会议论文投稿】Spring Boot实战:零基础打造你的Web应用新纪元
前端·spring boot·后端
T***u3331 小时前
JavaScript在Node.js中的流处理大
开发语言·javascript·node.js
q***33371 小时前
SpringMVC新版本踩坑[已解决]
android·前端·后端
Croa-vo2 小时前
TikTok 数据工程师三轮 VO 超详细面经:技术深挖 + 建模推导 + 压力测试全记录
javascript·数据结构·经验分享·算法·面试
亿元程序员2 小时前
做了十年游戏,我才意识到:程序员最该投资的,是一台专业的编程显示器
前端
IT_陈寒2 小时前
Python高手都在用的5个隐藏技巧,让你的代码效率提升50%
前端·人工智能·后端
lcc1873 小时前
Vue3 ref函数和reactive函数
前端·vue.js