TensorFlow.js 实时手势抓取物品:从0到1实现手势交互
项目介绍
这是一个基于 TensorFlow.js 和 MediaPipeHands 模型的实时手势抓取物品应用,支持语音反馈功能。用户可以通过摄像头捕捉手部动作,使用手势控制虚拟光标,实现对屏幕上虚拟物品的抓取、移动、旋转和缩放操作。
功能特点
- ✅ 实时手部检测和关键点跟踪
- ✅ 多种手势识别(抓取、张开、OK、点赞、拳头)
- ✅ 虚拟光标跟随手部移动
- ✅ 物品抓取、移动、旋转和缩放
- ✅ 语音反馈
- ✅ 响应式设计,支持移动端
- ✅ 动态性能调整
技术栈
- 前端框架:纯 JavaScript (ES 模块)
- 构建工具:Vite
- 机器学习库 :
- TensorFlow.js
- @tensorflow-models/hand-pose-detection
- @mediapipe/hands
- 浏览器 API :
- WebRTC (getUserMedia)
- Canvas API
- Web Speech API
核心架构设计
项目采用模块化架构,各功能模块职责清晰,便于维护和扩展:
bash
src/
├── js/
│ ├── camera.js # 摄像头管理
│ ├── detector.js # 手部检测和手势识别
│ ├── interaction.js # 交互逻辑
│ ├── renderer.js # 手部关键点绘制
│ ├── utils.js # 工具函数
│ ├── voice.js # 语音合成
│ └── config.js # 配置文件
├── index.css # 样式文件
└── main.js # 项目入口
核心代码解析
1. 项目初始化与主流程
main.js - 项目入口文件,负责初始化和协调各模块
javascript
// 导入核心模块
import { setupCamera, stopCamera, isCameraOn } from './js/camera.js';
import { initDetector, detectHands, stopDetection } from './js/detector.js';
import { updateStatus } from './js/utils.js';
import { initSpeechSynthesis } from './js/voice.js';
// 初始化语音合成
let voice = initSpeechSynthesis();
// 全局变量
let video, canvas, detector, animationId;
// 初始化函数
async function init() {
video = document.getElementById('video');
canvas = document.getElementById('output');
document.getElementById('startBtn').addEventListener('click', toggleCamera);
// 加载模型
updateStatus('模型加载中...', true);
try {
detector = await initDetector();
updateStatus('模型加载成功,请启动摄像头');
voice.synthesizeSpeechSentenceBySentence("模型加载成功,请启动摄像头");
} catch (error) {
console.error('模型加载失败:', error);
updateStatus('模型加载失败: ' + error.message);
}
}
// 切换摄像头状态
async function toggleCamera() {
if (isCameraOn) {
voice.synthesizeSpeechSentenceBySentence("摄像头已关闭");
stopCamera(video);
stopDetection(animationId);
document.getElementById('startBtn').textContent = '启动摄像头';
updateStatus('摄像头已关闭');
} else {
try {
const startBtn = document.getElementById('startBtn');
startBtn.disabled = true;
startBtn.textContent = '启动中...';
voice.synthesizeSpeechSentenceBySentence("摄像头启动中");
await setupCamera(video, canvas);
startBtn.textContent = '关闭摄像头';
startBtn.disabled = false;
updateStatus('摄像头启动中...');
// 开始手部检测
animationId = detectHands(video, canvas, detector);
} catch (error) {
console.error('摄像头启动失败:', error);
updateStatus('摄像头启动失败: ' + error.message);
}
}
}
// 页面加载完成后初始化
window.addEventListener('load', init);
2. 手部检测与手势识别
detector.js - 核心模块,负责手部检测和手势识别
javascript
import * as handPoseDetection from '@tensorflow-models/hand-pose-detection';
import { drawHands, clearCanvas } from './renderer.js';
import { updateInteraction } from './interaction.js';
import { updateStatus, updateInteractionStatus } from './utils.js';
import { DETECTOR_CONFIG, GESTURE_CONFIG, PERFORMANCE_CONFIG } from './config.js';
// 全局变量
let detector;
let currentGesture = '未检测到手势';
let handPosition = { x: 0, y: 0 };
// 初始化手部检测器
async function initDetector() {
detector = await handPoseDetection.createDetector(
handPoseDetection.SupportedModels.MediaPipeHands,
{
runtime: 'mediapipe',
modelType: DETECTOR_CONFIG.MODEL_TYPE,
maxHands: DETECTOR_CONFIG.MAX_HANDS,
solutionPath: DETECTOR_CONFIG.SOLUTION_PATH
}
);
return detector;
}
// 检测手部函数
function detectHands(video, canvas, detector) {
let animationId;
const ctx = canvas.getContext('2d');
// 性能监控变量
let frameCount = 0;
let lastFpsTime = 0;
let currentFps = 0;
let detectionInterval = 0;
let lastDetectionTime = 0;
// 更新帧率函数
function updateFps(timestamp) {
frameCount++;
if (timestamp - lastFpsTime >= PERFORMANCE_CONFIG.FPS_CHECK_INTERVAL) {
currentFps = Math.round((frameCount * 1000) / (timestamp - lastFpsTime));
frameCount = 0;
lastFpsTime = timestamp;
// 动态调整检测间隔
if (currentFps > PERFORMANCE_CONFIG.MAX_FPS) {
detectionInterval = Math.min(detectionInterval + 10, 100);
} else if (currentFps < PERFORMANCE_CONFIG.MIN_FPS) {
detectionInterval = Math.max(detectionInterval - 10, 0);
}
}
}
// 内部检测函数
async function detect(timestamp) {
try {
updateFps(timestamp);
// 根据检测间隔控制检测频率
if (timestamp - lastDetectionTime >= detectionInterval) {
// 检测手部
const hands = await detector.estimateHands(video, {
flipHorizontal: DETECTOR_CONFIG.FLIP_HORIZONTAL
});
clearCanvas(ctx, canvas);
if (hands.length > 0) {
drawHands(ctx, hands);
analyzeGesture(hands[0]);
updateInteraction(hands[0], canvas, currentGesture, handPosition);
} else {
currentGesture = '未检测到手势';
updateInteractionStatus(currentGesture);
}
updateStatus(`检测到 ${hands.length} 只手 | 帧率: ${currentFps}`);
lastDetectionTime = timestamp;
}
} catch (error) {
console.error('手部检测错误:', error);
}
animationId = requestAnimationFrame(detect);
}
detect(0);
return animationId;
}
// 分析手势函数
function analyzeGesture(hand) {
// 获取手腕关键点
const wrist = hand.keypoints.find(k => k.name === 'wrist');
// 更新手部位置
if (wrist) {
handPosition.x = wrist.x;
handPosition.y = wrist.y;
}
// 获取手指关键点
const thumbTip = hand.keypoints.find(k => k.name === 'thumb_tip');
const indexTip = hand.keypoints.find(k => k.name === 'index_finger_tip');
const middleTip = hand.keypoints.find(k => k.name === 'middle_finger_tip');
const ringTip = hand.keypoints.find(k => k.name === 'ring_finger_tip');
const pinkyTip = hand.keypoints.find(k => k.name === 'pinky_tip');
// 获取手指根部关键点
const indexMcp = hand.keypoints.find(k => k.name === 'index_finger_mcp');
const middleMcp = hand.keypoints.find(k => k.name === 'middle_finger_mcp');
const ringMcp = hand.keypoints.find(k => k.name === 'ring_finger_mcp');
const pinkyMcp = hand.keypoints.find(k => k.name === 'pinky_mcp');
const thumbMcp = hand.keypoints.find(k => k.name === 'thumb_mcp');
// 手势识别逻辑
if (thumbTip && indexTip && middleTip && ringTip && pinkyTip &&
indexMcp && middleMcp && ringMcp && pinkyMcp && thumbMcp && wrist) {
// 计算拇指和食指指尖距离
const thumbIndexDistance = Math.sqrt(
Math.pow(thumbTip.x - indexTip.x, 2) +
Math.pow(thumbTip.y - indexTip.y, 2)
);
// 计算手指弯曲程度
const indexBent = calculateFingerBent(indexTip, indexMcp, wrist);
const middleBent = calculateFingerBent(middleTip, middleMcp, wrist);
const ringBent = calculateFingerBent(ringTip, ringMcp, wrist);
const pinkyBent = calculateFingerBent(pinkyTip, pinkyMcp, wrist);
const thumbBent = calculateFingerBent(thumbTip, thumbMcp, wrist);
const avgBent = (indexBent + middleBent + ringBent + pinkyBent) / 4;
// 手势识别
if (avgBent > GESTURE_CONFIG.FIST_THRESHOLD && thumbBent > GESTURE_CONFIG.FIST_THRESHOLD) {
currentGesture = '拳头手势';
}
else if (indexBent < 0.3 && middleBent > 0.5 && ringBent > 0.5 && pinkyBent > 0.5) {
currentGesture = '点赞手势';
}
else if (thumbIndexDistance < GESTURE_CONFIG.GRAB_THRESHOLD) {
currentGesture = '抓取手势';
}
else if (thumbIndexDistance < GESTURE_CONFIG.OK_THRESHOLD &&
indexBent < 0.3 && middleBent < 0.3 && ringBent < 0.3 && pinkyBent < 0.3) {
currentGesture = 'OK手势';
}
else if (indexBent < 0.3 && middleBent < 0.3 && ringBent < 0.3 && pinkyBent < 0.3) {
currentGesture = '张开手势';
}
else {
currentGesture = '未检测到手势';
}
}
else {
currentGesture = '未检测到手势';
}
updateInteractionStatus(currentGesture);
}
// 辅助函数:计算手指弯曲程度
function calculateFingerBent(tip, mcp, wrist) {
const fingerLength = Math.sqrt(
Math.pow(tip.x - mcp.x, 2) +
Math.pow(tip.y - mcp.y, 2)
);
const wristToTip = Math.sqrt(
Math.pow(tip.x - wrist.x, 2) +
Math.pow(tip.y - wrist.y, 2)
);
const wristToMcp = Math.sqrt(
Math.pow(mcp.x - wrist.x, 2) +
Math.pow(mcp.y - wrist.y, 2)
);
const cosAngle = (wristToTip * wristToTip + wristToMcp * wristToMcp - fingerLength * fingerLength) /
(2 * wristToTip * wristToMcp);
const angle = Math.acos(Math.max(-1, Math.min(1, cosAngle)));
return angle / Math.PI; // 归一化到0-1范围
}
3. 手部关键点绘制
renderer.js - 负责绘制手部关键点和连接线
javascript
import { VISUAL_CONFIG } from './config.js';
// 清除画布函数
function clearCanvas(ctx, canvas) {
ctx.clearRect(0, 0, canvas.width, canvas.height);
}
// 绘制手部关键点和连接线函数
function drawHands(ctx, hands) {
for (const hand of hands) {
// 绘制手部轮廓
drawHandOutline(ctx, hand.keypoints);
// 绘制连接线
drawConnections(ctx, hand);
// 绘制每个关键点
for (const keypoint of hand.keypoints) {
const { x, y } = keypoint;
ctx.beginPath();
ctx.arc(x, y, VISUAL_CONFIG.KEYPOINT_RADIUS, 0, 2 * Math.PI);
// 根据关键点类型设置不同颜色
if (keypoint.name === 'wrist') {
ctx.fillStyle = 'purple';
} else if (keypoint.name.includes('thumb')) {
ctx.fillStyle = 'red';
} else if (keypoint.name.includes('index')) {
ctx.fillStyle = 'blue';
} else if (keypoint.name.includes('middle')) {
ctx.fillStyle = 'green';
} else if (keypoint.name.includes('ring')) {
ctx.fillStyle = 'orange';
} else if (keypoint.name.includes('pinky')) {
ctx.fillStyle = 'yellow';
} else {
ctx.fillStyle = 'white';
}
ctx.fill();
ctx.strokeStyle = 'black';
ctx.lineWidth = 1;
ctx.stroke();
}
}
}
// 绘制连接线函数
function drawConnections(ctx, hand) {
ctx.strokeStyle = 'rgba(255, 255, 255, 0.7)';
ctx.lineWidth = VISUAL_CONFIG.CONNECTION_WIDTH;
ctx.lineCap = 'round';
// 使用模型提供的默认连接
if (hand.connections) {
for (const connection of hand.connections) {
const startPoint = hand.keypoints[connection[0]];
const endPoint = hand.keypoints[connection[1]];
if (startPoint && endPoint) {
ctx.beginPath();
ctx.moveTo(startPoint.x, startPoint.y);
ctx.lineTo(endPoint.x, endPoint.y);
ctx.stroke();
}
}
}
}
4. 物品交互逻辑
interaction.js - 处理物品的抓取、移动、旋转和缩放
javascript
import { initSpeechSynthesis } from './voice.js';
import { INTERACTION_CONFIG, VISUAL_CONFIG } from './config.js';
let voice = initSpeechSynthesis();
let grabbedObject = null;
let grabOffset = { x: 0, y: 0 };
let objectTransform = { rotation: 0, scale: 1 };
let rotationReference = null;
// 更新交互函数
function updateInteraction(hand, canvas, currentGesture, handPosition) {
const interactionArea = document.querySelector('.interaction-area');
const rect = interactionArea.getBoundingClientRect();
// 将手部位置从画布坐标系转换到交互区域坐标系
const mappedX = (handPosition.x / canvas.width) * rect.width;
const mappedY = (handPosition.y / canvas.height) * rect.height;
// 显示虚拟光标
showCursor(interactionArea, mappedX, mappedY, currentGesture);
// 处理抓取逻辑
if (currentGesture === '抓取手势' && !grabbedObject) {
tryGrabObject(mappedX, mappedY);
}
else if (currentGesture === '张开手势' && grabbedObject) {
releaseObject();
}
else if (currentGesture === 'OK手势' && grabbedObject) {
rotateObject(hand, mappedX, mappedY);
}
else if (currentGesture === '点赞手势' && grabbedObject) {
scaleObject(hand, mappedX, mappedY);
}
// 移动已抓取的物品
if (grabbedObject && currentGesture === '抓取手势') {
moveObject(mappedX, mappedY);
}
}
// 显示虚拟光标函数
function showCursor(area, x, y, currentGesture) {
let cursor = document.getElementById('hand-cursor');
if (!cursor) {
cursor = document.createElement('div');
cursor.id = 'hand-cursor';
cursor.style.position = 'absolute';
cursor.style.width = `${INTERACTION_CONFIG.CURSOR_SIZE}px`;
cursor.style.height = `${INTERACTION_CONFIG.CURSOR_SIZE}px`;
cursor.style.borderRadius = '50%';
cursor.style.pointerEvents = 'none';
cursor.style.zIndex = '10';
cursor.style.transform = 'translate(-50%, -50%)';
cursor.style.transition = 'all 0.1s ease';
area.appendChild(cursor);
}
// 根据手势设置光标样式
let cursorColor;
let cursorSize = INTERACTION_CONFIG.CURSOR_SIZE;
switch (currentGesture) {
case '抓取手势':
cursorColor = VISUAL_CONFIG.CURSOR_COLOR_GRAB;
cursorSize = INTERACTION_CONFIG.CURSOR_SIZE * 1.2;
break;
case '张开手势':
cursorColor = VISUAL_CONFIG.CURSOR_COLOR_OPEN;
break;
default:
cursorColor = VISUAL_CONFIG.CURSOR_COLOR_OTHER;
break;
}
cursor.style.left = x + 'px';
cursor.style.top = y + 'px';
cursor.style.background = cursorColor;
cursor.style.width = `${cursorSize}px`;
cursor.style.height = `${cursorSize}px`;
}
// 尝试抓取物品函数
function tryGrabObject(cursorX, cursorY) {
const objects = document.querySelectorAll('.object');
const interactionArea = document.querySelector('.interaction-area');
for (const obj of objects) {
const objRect = obj.getBoundingClientRect();
const areaRect = interactionArea.getBoundingClientRect();
const objLeft = objRect.left - areaRect.left;
const objTop = objRect.top - areaRect.top;
const objCenterX = objLeft + objRect.width / 2;
const objCenterY = objTop + objRect.height / 2;
// 计算距离
const distance = Math.sqrt(
Math.pow(cursorX - objCenterX, 2) +
Math.pow(cursorY - objCenterY, 2)
);
// 如果距离足够近,则抓取物品
if (distance < INTERACTION_CONFIG.GRAB_DISTANCE) {
// 播放抓取语音提示
voice.synthesizeSpeechSentenceBySentence("抓取" + obj.innerHTML);
// 计算偏移量
grabOffset = {
x: cursorX - objLeft,
y: cursorY - objTop
};
grabbedObject = obj;
obj.classList.add('grabbing');
break;
}
}
}
// 移动物品函数
function moveObject(cursorX, cursorY) {
if (grabbedObject) {
const newLeft = cursorX - grabOffset.x;
const newTop = cursorY - grabOffset.y;
// 确保物品不会移出交互区域
const interactionArea = document.querySelector('.interaction-area');
const maxX = interactionArea.offsetWidth - grabbedObject.offsetWidth;
const maxY = interactionArea.offsetHeight - grabbedObject.offsetHeight;
// 更新物品位置
grabbedObject.style.left = Math.max(0, Math.min(newLeft, maxX)) + 'px';
grabbedObject.style.top = Math.max(0, Math.min(newTop, maxY)) + 'px';
}
}
// 旋转物品函数
function rotateObject(hand, cursorX, cursorY) {
if (grabbedObject && rotationReference) {
const deltaX = cursorX - rotationReference.x;
objectTransform.rotation += deltaX * INTERACTION_CONFIG.ROTATE_SPEED;
applyTransform();
rotationReference = { x: cursorX, y: cursorY };
}
}
// 缩放物品函数
function scaleObject(hand, cursorX, cursorY) {
if (grabbedObject) {
const indexTip = hand.keypoints.find(k => k.name === 'index_finger_tip');
const middleTip = hand.keypoints.find(k => k.name === 'middle_finger_tip');
if (indexTip && middleTip) {
const fingerDistance = Math.sqrt(
Math.pow(indexTip.x - middleTip.x, 2) +
Math.pow(indexTip.y - middleTip.y, 2)
);
const scaleFactor = 1 + (fingerDistance - 50) * INTERACTION_CONFIG.SCALE_SPEED * 0.01;
objectTransform.scale = Math.max(0.5, Math.min(objectTransform.scale * scaleFactor, 2));
applyTransform();
}
}
}
// 应用变换函数
function applyTransform() {
if (grabbedObject) {
grabbedObject.style.transform = `rotate(${objectTransform.rotation}deg) scale(${objectTransform.scale})`;
}
}
// 释放物品函数
function releaseObject() {
if (grabbedObject) {
grabbedObject.classList.remove('grabbing');
grabbedObject = null;
grabOffset = { x: 0, y: 0 };
rotationReference = null;
}
}
项目结构与配置
配置文件设计
config.js - 集中管理项目常量和配置项
javascript
// 摄像头配置
export const CAMERA_CONFIG = {
WIDTH: 640,
HEIGHT: 480,
FACING_MODE: 'user'
};
// 手部检测模型配置
export const DETECTOR_CONFIG = {
MODEL_TYPE: 'full',
MAX_HANDS: 2,
FLIP_HORIZONTAL: true,
SOLUTION_PATH: "node_modules/@mediapipe/hands/"
};
// 手势识别配置
export const GESTURE_CONFIG = {
GRAB_THRESHOLD: 40,
FIST_THRESHOLD: 0.5,
OK_THRESHOLD: 30,
LIKE_THRESHOLD: 0.8
};
// 交互配置
export const INTERACTION_CONFIG = {
CURSOR_SIZE: 20,
GRAB_DISTANCE: 50,
MOVE_SPEED: 1,
ROTATE_SPEED: 0.01,
SCALE_SPEED: 0.01
};
// 视觉效果配置
export const VISUAL_CONFIG = {
KEYPOINT_RADIUS: 5,
CONNECTION_WIDTH: 3,
HAND_OUTLINE_OPACITY: 0.5,
CURSOR_COLOR_OPEN: 'green',
CURSOR_COLOR_GRAB: 'red',
CURSOR_COLOR_OTHER: 'yellow'
};
// 性能配置
export const PERFORMANCE_CONFIG = {
MAX_FPS: 30,
MIN_FPS: 10,
FPS_CHECK_INTERVAL: 1000,
PERFORMANCE_THRESHOLD: 0.8
};
// 语音配置
export const VOICE_CONFIG = {
RATE: 1.2,
PITCH: 1.0,
VOLUME: 1.0,
TIMEOUT: 1000
};
使用说明
- 启动应用:点击"启动摄像头"按钮,授权摄像头使用权限
- 手势控制 :
- 张开手掌:移动虚拟光标
- 拇指和食指靠近:抓取物品
- 抓取状态下移动手部:移动物品
- OK手势:旋转物品
- 点赞手势:缩放物品
- 再次张开手掌:释放物品
- 语音反馈:操作过程中会有语音提示
扩展建议
- 添加更多手势:可以扩展识别更多手势,如剪刀手、石头剪刀布等
- 增强物品交互:添加物品碰撞检测、物理引擎支持
- 多手协作:实现双手同时操作不同物品
- 自定义物品:允许用户添加、删除和自定义虚拟物品
- 保存和加载场景:支持保存当前物品布局,下次打开时恢复
- AR模式:结合 WebXR API,实现增强现实手势交互
- 模型优化:尝试使用自定义训练的模型,提高特定场景下的识别准确率
总结
本项目展示了如何使用 TensorFlow.js 和 MediaPipeHands 模型实现实时手势交互应用。通过模块化设计和清晰的代码结构,实现了手部检测、手势识别、虚拟光标控制、物品抓取移动以及语音反馈等功能。
该项目具有良好的可扩展性,可以根据需求添加更多手势和交互功能。同时,通过动态性能调整,确保在不同设备上都能流畅运行。
手势交互作为一种自然、直观的交互方式,在未来的 Web 应用中有着广阔的应用前景。希望本项目能为开发者提供一个良好的起点,激发更多创意和应用场景。
项目地址
参考资料
通过这个项目,我们学习了如何将机器学习模型应用到 Web 前端,实现了从手部检测到手势识别,再到物品交互的完整流程。希望这篇文章能对你有所启发,欢迎交流和扩展!