Python OpenCV实践 - 用mediapipe做一个手势鼠标（简单版）

使用mediapipe的手势识别模块，封装一个HandDetector，然后基于这个类做一个手势虚拟鼠标。
手势用法：一只手（hand0）的食指做鼠标移动，另一只手(hand1)的食指做点击操作
纯练手项目，只简单做了左键点击功能，也没有做左右手的判断。hand0和hand1的顺序会受到哪只手先被检测出来影响，可以再优化。
鼠标的操作使用了pynput，直接用pip install pynput安装即可
获取屏幕的分辨率使用了tkinter，python内置，无需特别安装
废话不多说，上代码：
复制代码
import cv2 as cv
import math
import mediapipe as mp
import time
import ctypes
#使用pynput做鼠标控制，安装使用pip install pynput
from pynput import mouse
#使用tkinter获得Windows显示器的分辨率
import tkinter as tk

#在Windows系统支持应用缩放功能的时候，pynput的click会有点问题，需要用下面的语句进行修复
#https://blog.csdn.net/qq_33303386/article/details/133746983
#https://ask.csdn.net/questions/7471494
PROCESS_PER_MONITOR_DPI_AWARE = 1
ctypes.windll.shcore.SetProcessDpiAwareness(PROCESS_PER_MONITOR_DPI_AWARE)
#获得屏幕缩放比例，如果使用上面的DPI设置后，还是没有获得真实的分辨率，
#请考虑使用缩放比率计算原始屏幕分辨率或者将缩放比率设为100%
#我没有遇到问题，因此这个值没有用到
screenScale = ctypes.windll.shcore.GetScaleFactorForDevice(0)

print("Screen Scaling Factor:", screenScale)

class HandDetector():
    def __init__(self, mode=False,
                 maxNumHands=2,
                 modelComplexity=1,
                 minDetectionConfidence=0.5,
                 minTrackingConfidence=0.5):
        self.mode = mode
        self.maxNumHands = maxNumHands
        self.modelComplexity = modelComplexity
        self.minDetectionConfidence = minDetectionConfidence
        self.minTrackingConfidence = minTrackingConfidence
        #创建mediapipe的solutions.hands对象
        self.mpHands = mp.solutions.hands
        self.handsDetector = self.mpHands.Hands(self.mode, self.maxNumHands, self.modelComplexity, self.minDetectionConfidence, self.minTrackingConfidence)
        #创建mediapipe的绘画工具
        self.mpDrawUtils = mp.solutions.drawing_utils

    def findHands(self, img, drawOnImage=True):
        #mediapipe手部检测器需要输入图像格式为RGB
        #cv默认的格式是BGR，需要转换
        imgRGB = cv.cvtColor(img, cv.COLOR_BGR2RGB)
        #调用手部检测器的process方法进行检测
        self.results = self.handsDetector.process(imgRGB)
    
        #如果multi_hand_landmarks有值表示检测到了手
        if self.results.multi_hand_landmarks:
            #print("Hands Detected:", len(self.results.multi_hand_landmarks))
            #遍历每一只手的landmarks
            for handLandmarks in self.results.multi_hand_landmarks:
                if drawOnImage:
                    self.mpDrawUtils.draw_landmarks(img, handLandmarks, self.mpHands.HAND_CONNECTIONS)
        return img;

    #从结果中查询某只手的landmark list
    def findHandPositions(self, img, handID=0, drawOnImage=True, drawColor=(0,255,0)):
        landmarkList = []
        if self.results.multi_hand_landmarks:
            if (handID >= len(self.results.multi_hand_landmarks)):
                return landmarkList
            handLandmarks = self.results.multi_hand_landmarks[handID]
            for id,landmark in enumerate(handLandmarks.landmark):
                #处理每一个landmark,将landmark里的X,Y（比例）转换为帧数据的XY坐标
                h,w,c = img.shape
                centerX,centerY = int(landmark.x * w), int(landmark.y * h)
                landmarkList.append([id, centerX, centerY])
                if (drawOnImage):
                    #将landmark绘制成圆
                    cv.circle(img, (centerX,centerY), 8, drawColor)
        return landmarkList

def DisplayFPS(img, preTime):
    curTime = time.time()
    if (curTime - preTime == 0):
        return curTime;
    fps = 1 / (curTime - preTime)
    cv.putText(img, "FPS:" + str(int(fps)), (10,70), cv.FONT_HERSHEY_PLAIN,
              3, (0,255,0), 3)
    return curTime

def MouseMoveRel(mouseController, relX,relY):
    mouseController.move(relX, relY)

def MouseMoveAbs(mouseController, x, y):
    mouseController.position = (x,y)

def MouseButtonDown(mouseController, button):
    mouseController.press(button)

def MouseButtonUp(mouseController, button):
    mouseController.release(button)

def GetScreenSize():
    root = tk.Tk()
    screenW = root.winfo_screenwidth()
    screenH = root.winfo_screenheight()
    root.destroy()
    return (screenW,screenH)

#视频帧里鼠标移动区域的坐标（归一化）转换为屏幕的坐标
def FrameXY2ScreenXY(frameX, frameY, vMouseRectInfo, screenW, screenH):
    (x1,y1,x2,y2,w,h) = vMouseRectInfo
    vMouseX = frameX - x1
    if (vMouseX < 0):
        vMouseX = 0
    if (vMouseX > w):
        vMouseX = w

    vMouseY = frameY - y1
    if (vMouseY < 0):
        vMouseY = 0
    if (vMouseY >= h):
        vMouseY = h
        
    #归一化
    vMouseX = vMouseX / w
    vMouseY = vMouseY / h
    #print("vritual mouse at:", vMouseX, vMouseY)
    return (vMouseX * screenW, vMouseY * screenH)

#消除抖动，判断当前鼠标位置和上一次鼠标位置是否在以上一次鼠标位置为中心的一小片范围内(使用的是圆形检测)
def MouseDebounce(curX, curY, lastX, lastY, radius):
    distance = math.hypot(curX - lastX, curY - lastY)
    if distance > radius:
        return (curX, curY)
    else:
        return (lastX, lastY)

videoW = 640
videoH = 480
videoFlipX = True
#食指指尖低于大拇指指尖的距离是多少会产生鼠标按下的event
clickEventThreshold = 15

def main():
    #video = cv.VideoCapture('../../SampleVideos/mouseMove.mp4')
    video = cv.VideoCapture(0)
    #FPS显示
    preTime = 0
    handDetector = HandDetector(minDetectionConfidence=0.7)
    #获得鼠标控制对象
    mouseController = mouse.Controller()
    mouseLastX = -1
    mouseLastY = -1
    #获得屏幕分辨率
    screenW,screenH = GetScreenSize()
    print("Screen Size: " + str(screenW) + "x" + str(screenH))

    #摄像头视频帧的宽和高
    frameW = int(video.get(3))
    frameH = int(video.get(4))
    print("Camera Frame Resolution:", frameW,frameH)
    #虚拟鼠标移动范围是以摄像头画面中心为中点的一个矩形区域(大小可以调整，代码里用的是2/3)
    vMouseMoveAreaRatio = 2/3
    vMouseRectX,vMouseRectY = frameW / 2,frameH / 2
    vMouseRectW,vMouseRectH = frameW * vMouseMoveAreaRatio, frameH * vMouseMoveAreaRatio
    vMouseRectTopLeftX, vMouseRectTopLeftY = int(vMouseRectX - vMouseRectW / 2), int(vMouseRectY - vMouseRectH / 2)
    vMouseRectBtmRightX,vMouseRectBtmRightY = int(vMouseRectX + vMouseRectW / 2), int(vMouseRectY + vMouseRectH / 2)
    vMouseRectInfo = (vMouseRectTopLeftX, vMouseRectTopLeftY, vMouseRectBtmRightX, vMouseRectBtmRightY, vMouseRectW, vMouseRectH)
    mouseButtonDown = False
    while True:
        ret,frame = video.read()
        if ret == False:
            break;
        if (videoFlipX):
            frame = cv.flip(frame, 1)
        frame = handDetector.findHands(frame, drawOnImage=True)
        #规定hand0的食指用于鼠标移动，hand1的食指用于鼠标点击
        hand0Landmarks = handDetector.findHandPositions(frame, handID=0)
        hand1Landmarks = handDetector.findHandPositions(frame, handID=1)
        if (len(hand0Landmarks) != 0):
            #取出食指(8)的指尖的点对应的坐标
            indexFingerX,indexFingerY = hand0Landmarks[8][1],hand0Landmarks[8][2]
            #用实心圆突出显示出这个点
            cv.circle(frame, (indexFingerX,indexFingerY), 18, (0,120,255), cv.FILLED)

            mouseX,mouseY = FrameXY2ScreenXY(indexFingerX, indexFingerY, vMouseRectInfo, screenW, screenH)
            #print("MouseX,mouseY = (" + str(mouseX) + "," + str(mouseY) + ")")
            if (mouseLastX >= 0):               
                mouseX, mouseY = MouseDebounce(mouseX, mouseY, mouseLastX, mouseLastY, 10)
            MouseMoveAbs(mouseController, mouseX, mouseY)
            mouseLastX = mouseX
            mouseLastY = mouseY
            cv.putText(frame, "Mouse XY:(" + str(int(mouseX)) + "," + str(int(mouseY)) + ")", (indexFingerX, indexFingerY), cv.FONT_HERSHEY_PLAIN,
              3, (0,255,0), 3)

        if (len(hand1Landmarks) != 0):
            #去除食指指尖[8]和食指第二关节[6]的landmark
            #当指尖的Y坐标小于食指第二关节的坐标时，产生一个click事件
            fingerTipX,fingerTipY = hand1Landmarks[8][1],hand1Landmarks[8][2]
            fingerJointX,fingerJointY = hand1Landmarks[6][1],hand1Landmarks[6][2]
            cv.circle(frame, (fingerTipX,fingerTipY), 18, (0,255,0), cv.FILLED)
            cv.circle(frame, (fingerJointX,fingerJointY), 18, (0,120,255), cv.FILLED)
            if (fingerTipY > fingerJointY + clickEventThreshold):
                if (mouseButtonDown == False):
                    mouseButtonDown = True
                    mouseController.click(mouse.Button.left)
                    print("Mouse Button Clicked!")
            else:
                if (mouseButtonDown == True):
                    mouseButtonDown = False

        preTime = DisplayFPS(frame, preTime)
        #绘制虚拟鼠标移动区域
        cv.rectangle(frame, (vMouseRectTopLeftX, vMouseRectTopLeftY), (vMouseRectBtmRightX, vMouseRectBtmRightY), (0,255,0), 2, cv.FILLED)
        frame = cv.resize(frame, (videoW, videoH))
        cv.imshow('Virtual Hand Mouse', frame)
        cv.setWindowProperty('Virtual Hand Mouse', cv.WND_PROP_TOPMOST, 1) 	# 设置窗口置顶
        if cv.waitKey(1) & 0xFF == ord('q'):
            break;
    video.release()
    cv.destroyAllWindows()

if __name__ == "__main__":
    main()
运行结果：
视频参考我的B站：
Python Opencv练手 - mediapipe手势鼠标_哔哩哔哩_bilibili