Python OpenCV实践 - 用mediapipe做一个手势鼠标(简单版)

使用mediapipe的手势识别模块,封装一个HandDetector,然后基于这个类做一个手势虚拟鼠标。

手势用法:一只手(hand0)的食指做鼠标移动,另一只手(hand1)的食指做点击操作

纯练手项目,只简单做了左键点击功能,也没有做左右手的判断。hand0和hand1的顺序会受到哪只手先被检测出来影响,可以再优化。

鼠标的操作使用了pynput,直接用pip install pynput安装即可

获取屏幕的分辨率使用了tkinter,python内置,无需特别安装

废话不多说,上代码:

import cv2 as cv
import math
import mediapipe as mp
import time
import ctypes
#使用pynput做鼠标控制,安装使用pip install pynput
from pynput import mouse
#使用tkinter获得Windows显示器的分辨率
import tkinter as tk

#在Windows系统支持应用缩放功能的时候,pynput的click会有点问题,需要用下面的语句进行修复
#https://blog.csdn.net/qq_33303386/article/details/133746983
#https://ask.csdn.net/questions/7471494
PROCESS_PER_MONITOR_DPI_AWARE = 1
ctypes.windll.shcore.SetProcessDpiAwareness(PROCESS_PER_MONITOR_DPI_AWARE)
#获得屏幕缩放比例,如果使用上面的DPI设置后,还是没有获得真实的分辨率,
#请考虑使用缩放比率计算原始屏幕分辨率或者将缩放比率设为100%
#我没有遇到问题,因此这个值没有用到
screenScale = ctypes.windll.shcore.GetScaleFactorForDevice(0)

print("Screen Scaling Factor:", screenScale)

class HandDetector():
    def __init__(self, mode=False,
                 maxNumHands=2,
                 modelComplexity=1,
                 minDetectionConfidence=0.5,
                 minTrackingConfidence=0.5):
        self.mode = mode
        self.maxNumHands = maxNumHands
        self.modelComplexity = modelComplexity
        self.minDetectionConfidence = minDetectionConfidence
        self.minTrackingConfidence = minTrackingConfidence
        #创建mediapipe的solutions.hands对象
        self.mpHands = mp.solutions.hands
        self.handsDetector = self.mpHands.Hands(self.mode, self.maxNumHands, self.modelComplexity, self.minDetectionConfidence, self.minTrackingConfidence)
        #创建mediapipe的绘画工具
        self.mpDrawUtils = mp.solutions.drawing_utils

    def findHands(self, img, drawOnImage=True):
        #mediapipe手部检测器需要输入图像格式为RGB
        #cv默认的格式是BGR,需要转换
        imgRGB = cv.cvtColor(img, cv.COLOR_BGR2RGB)
        #调用手部检测器的process方法进行检测
        self.results = self.handsDetector.process(imgRGB)
    
        #如果multi_hand_landmarks有值表示检测到了手
        if self.results.multi_hand_landmarks:
            #print("Hands Detected:", len(self.results.multi_hand_landmarks))
            #遍历每一只手的landmarks
            for handLandmarks in self.results.multi_hand_landmarks:
                if drawOnImage:
                    self.mpDrawUtils.draw_landmarks(img, handLandmarks, self.mpHands.HAND_CONNECTIONS)
        return img;

    #从结果中查询某只手的landmark list
    def findHandPositions(self, img, handID=0, drawOnImage=True, drawColor=(0,255,0)):
        landmarkList = []
        if self.results.multi_hand_landmarks:
            if (handID >= len(self.results.multi_hand_landmarks)):
                return landmarkList
            handLandmarks = self.results.multi_hand_landmarks[handID]
            for id,landmark in enumerate(handLandmarks.landmark):
                #处理每一个landmark,将landmark里的X,Y(比例)转换为帧数据的XY坐标
                h,w,c = img.shape
                centerX,centerY = int(landmark.x * w), int(landmark.y * h)
                landmarkList.append([id, centerX, centerY])
                if (drawOnImage):
                    #将landmark绘制成圆
                    cv.circle(img, (centerX,centerY), 8, drawColor)
        return landmarkList

def DisplayFPS(img, preTime):
    curTime = time.time()
    if (curTime - preTime == 0):
        return curTime;
    fps = 1 / (curTime - preTime)
    cv.putText(img, "FPS:" + str(int(fps)), (10,70), cv.FONT_HERSHEY_PLAIN,
              3, (0,255,0), 3)
    return curTime

def MouseMoveRel(mouseController, relX,relY):
    mouseController.move(relX, relY)

def MouseMoveAbs(mouseController, x, y):
    mouseController.position = (x,y)

def MouseButtonDown(mouseController, button):
    mouseController.press(button)

def MouseButtonUp(mouseController, button):
    mouseController.release(button)

def GetScreenSize():
    root = tk.Tk()
    screenW = root.winfo_screenwidth()
    screenH = root.winfo_screenheight()
    root.destroy()
    return (screenW,screenH)

#视频帧里鼠标移动区域的坐标(归一化)转换为屏幕的坐标
def FrameXY2ScreenXY(frameX, frameY, vMouseRectInfo, screenW, screenH):
    (x1,y1,x2,y2,w,h) = vMouseRectInfo
    vMouseX = frameX - x1
    if (vMouseX < 0):
        vMouseX = 0
    if (vMouseX > w):
        vMouseX = w

    vMouseY = frameY - y1
    if (vMouseY < 0):
        vMouseY = 0
    if (vMouseY >= h):
        vMouseY = h
        
    #归一化
    vMouseX = vMouseX / w
    vMouseY = vMouseY / h
    #print("vritual mouse at:", vMouseX, vMouseY)
    return (vMouseX * screenW, vMouseY * screenH)

#消除抖动,判断当前鼠标位置和上一次鼠标位置是否在以上一次鼠标位置为中心的一小片范围内(使用的是圆形检测)
def MouseDebounce(curX, curY, lastX, lastY, radius):
    distance = math.hypot(curX - lastX, curY - lastY)
    if distance > radius:
        return (curX, curY)
    else:
        return (lastX, lastY)

videoW = 640
videoH = 480
videoFlipX = True
#食指指尖低于大拇指指尖的距离是多少会产生鼠标按下的event
clickEventThreshold = 15

def main():
    #video = cv.VideoCapture('../../SampleVideos/mouseMove.mp4')
    video = cv.VideoCapture(0)
    #FPS显示
    preTime = 0
    handDetector = HandDetector(minDetectionConfidence=0.7)
    #获得鼠标控制对象
    mouseController = mouse.Controller()
    mouseLastX = -1
    mouseLastY = -1
    #获得屏幕分辨率
    screenW,screenH = GetScreenSize()
    print("Screen Size: " + str(screenW) + "x" + str(screenH))

    #摄像头视频帧的宽和高
    frameW = int(video.get(3))
    frameH = int(video.get(4))
    print("Camera Frame Resolution:", frameW,frameH)
    #虚拟鼠标移动范围是以摄像头画面中心为中点的一个矩形区域(大小可以调整,代码里用的是2/3)
    vMouseMoveAreaRatio = 2/3
    vMouseRectX,vMouseRectY = frameW / 2,frameH / 2
    vMouseRectW,vMouseRectH = frameW * vMouseMoveAreaRatio, frameH * vMouseMoveAreaRatio
    vMouseRectTopLeftX, vMouseRectTopLeftY = int(vMouseRectX - vMouseRectW / 2), int(vMouseRectY - vMouseRectH / 2)
    vMouseRectBtmRightX,vMouseRectBtmRightY = int(vMouseRectX + vMouseRectW / 2), int(vMouseRectY + vMouseRectH / 2)
    vMouseRectInfo = (vMouseRectTopLeftX, vMouseRectTopLeftY, vMouseRectBtmRightX, vMouseRectBtmRightY, vMouseRectW, vMouseRectH)
    mouseButtonDown = False
    while True:
        ret,frame = video.read()
        if ret == False:
            break;
        if (videoFlipX):
            frame = cv.flip(frame, 1)
        frame = handDetector.findHands(frame, drawOnImage=True)
        #规定hand0的食指用于鼠标移动,hand1的食指用于鼠标点击
        hand0Landmarks = handDetector.findHandPositions(frame, handID=0)
        hand1Landmarks = handDetector.findHandPositions(frame, handID=1)
        if (len(hand0Landmarks) != 0):
            #取出食指(8)的指尖的点对应的坐标
            indexFingerX,indexFingerY = hand0Landmarks[8][1],hand0Landmarks[8][2]
            #用实心圆突出显示出这个点
            cv.circle(frame, (indexFingerX,indexFingerY), 18, (0,120,255), cv.FILLED)

            mouseX,mouseY = FrameXY2ScreenXY(indexFingerX, indexFingerY, vMouseRectInfo, screenW, screenH)
            #print("MouseX,mouseY = (" + str(mouseX) + "," + str(mouseY) + ")")
            if (mouseLastX >= 0):               
                mouseX, mouseY = MouseDebounce(mouseX, mouseY, mouseLastX, mouseLastY, 10)
            MouseMoveAbs(mouseController, mouseX, mouseY)
            mouseLastX = mouseX
            mouseLastY = mouseY
            cv.putText(frame, "Mouse XY:(" + str(int(mouseX)) + "," + str(int(mouseY)) + ")", (indexFingerX, indexFingerY), cv.FONT_HERSHEY_PLAIN,
              3, (0,255,0), 3)

        if (len(hand1Landmarks) != 0):
            #去除食指指尖[8]和食指第二关节[6]的landmark
            #当指尖的Y坐标小于食指第二关节的坐标时,产生一个click事件
            fingerTipX,fingerTipY = hand1Landmarks[8][1],hand1Landmarks[8][2]
            fingerJointX,fingerJointY = hand1Landmarks[6][1],hand1Landmarks[6][2]
            cv.circle(frame, (fingerTipX,fingerTipY), 18, (0,255,0), cv.FILLED)
            cv.circle(frame, (fingerJointX,fingerJointY), 18, (0,120,255), cv.FILLED)
            if (fingerTipY > fingerJointY + clickEventThreshold):
                if (mouseButtonDown == False):
                    mouseButtonDown = True
                    mouseController.click(mouse.Button.left)
                    print("Mouse Button Clicked!")
            else:
                if (mouseButtonDown == True):
                    mouseButtonDown = False

        preTime = DisplayFPS(frame, preTime)
        #绘制虚拟鼠标移动区域
        cv.rectangle(frame, (vMouseRectTopLeftX, vMouseRectTopLeftY), (vMouseRectBtmRightX, vMouseRectBtmRightY), (0,255,0), 2, cv.FILLED)
        frame = cv.resize(frame, (videoW, videoH))
        cv.imshow('Virtual Hand Mouse', frame)
        cv.setWindowProperty('Virtual Hand Mouse', cv.WND_PROP_TOPMOST, 1) 	# 设置窗口置顶
        if cv.waitKey(1) & 0xFF == ord('q'):
            break;
    video.release()
    cv.destroyAllWindows()

if __name__ == "__main__":
    main()

运行结果:

视频参考我的B站:

Python Opencv练手 - mediapipe手势鼠标_哔哩哔哩_bilibili

相关推荐
coberup5 分钟前
django Forbidden (403)错误解决方法
python·django·403错误
龙哥说跨境36 分钟前
如何利用指纹浏览器爬虫绕过Cloudflare的防护?
服务器·网络·python·网络爬虫
小白学大数据1 小时前
正则表达式在Kotlin中的应用:提取图片链接
开发语言·python·selenium·正则表达式·kotlin
flashman9111 小时前
python在word中插入图片
python·microsoft·自动化·word
菜鸟的人工智能之路1 小时前
桑基图在医学数据分析中的更复杂应用示例
python·数据分析·健康医疗
懒大王爱吃狼2 小时前
Python教程:python枚举类定义和使用
开发语言·前端·javascript·python·python基础·python编程·python书籍
秃头佛爷3 小时前
Python学习大纲总结及注意事项
开发语言·python·学习
浮生如梦_4 小时前
Halcon基于laws纹理特征的SVM分类
图像处理·人工智能·算法·支持向量机·计算机视觉·分类·视觉检测
深度学习lover4 小时前
<项目代码>YOLOv8 苹果腐烂识别<目标检测>
人工智能·python·yolo·目标检测·计算机视觉·苹果腐烂识别
API快乐传递者5 小时前
淘宝反爬虫机制的主要手段有哪些?
爬虫·python