Hi,大家好,我是半亩花海。在当今科技飞速发展的时代,我们身边充斥着各种智能设备,然而,如何更便捷地与这些设备进行交互却是一个不断被探索的课题。本文将主要介绍一个基于 OpenCV 的手势识别项目,通过手势来控制电脑屏幕亮度 和音量大小,为用户提供了一种全新的交互方式。
目录
[1. 导入必要库](#1. 导入必要库)
[2. 初始化手部关键点](#2. 初始化手部关键点)
[3. 数据格式转换](#3. 数据格式转换)
[4. 画手势关键点](#4. 画手势关键点)
[5. 手势状态缓冲处理](#5. 手势状态缓冲处理)
[6. 画直线](#6. 画直线)
[7. 屏幕亮度和音量控制](#7. 屏幕亮度和音量控制)
[8. 初始化摄像头和手部关键点识别器](#8. 初始化摄像头和手部关键点识别器)
[9. Pygame 界面初始化和事件监听](#9. Pygame 界面初始化和事件监听)
[1. 亮度------light](#1. 亮度——light)
[2. 音量------voice](#2. 音量——voice)
[3. 菜单------menu](#3. 菜单——menu)
一、代码拆解
1. 导入必要库
在开始介绍项目的实现细节之前,我们首先需要导入项目所需的必要库。这些库包括:
- OpenCV:用于处理图像和视频数据。
- Mediapipe:提供了对手部关键点的识别和跟踪功能。
- Pygame:用于创建图形界面和显示摄像头捕获的图像。
- WMI:用于调节电脑屏幕亮度。
- pycaw:用于控制电脑的音量。
python
# 导入必要库
import math
import sys
import numpy as np
import cv2
import pygame
import wmi
import mediapipe as mp
from ctypes import cast, POINTER
from comtypes import CLSCTX_ALL
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume
import warnings # 忽略警告
warnings.filterwarnings("ignore")
2. 初始化手部关键点
首先创建一个 HandKeyPoint 类,用于初始化手部关键点检测器,并提供对图像进行处理的方法。
python
# 手部关键点类
class HandKeyPoint:
def __init__(self,
static_image_mode=False,
max_num_hands=2,
model_complexity=1,
min_detection_confidence=0.5,
min_tracking_confidence=0.5):
# 手部识别api
self.mp_hands = mp.solutions.hands
# 获取手部识别类
self.hands = self.mp_hands.Hands(static_image_mode=static_image_mode,
max_num_hands=max_num_hands,
model_complexity=model_complexity,
min_detection_confidence=min_detection_confidence,
min_tracking_confidence=min_tracking_confidence)
def process(self, image):
# 将BGR转换为RGB
img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# 识别图像中的手势,并返回结果
results = self.hands.process(img)
# numpy格式的数据
np_arr = landmarks_to_numpy(results)
return results, np_arr
3. 数据格式转换
将手部关键点的检测结果(将 landmarks 格式的数据)转换为 numpy 数组,以便后续的处理和分析。
python
# 将landmarks格式的数据转换为numpy格式的数据
def landmarks_to_numpy(results):
"""
将landmarks格式的数据转换为numpy格式的数据
numpy shape:(2, 21, 3)
:param results:
:return:
"""
shape = (2, 21, 3)
landmarks = results.multi_hand_landmarks
if landmarks is None:
# 没有检测到手
return np.zeros(shape)
elif len(landmarks) == 1:
# 检测出一只手,先判断是左手还是右手
label = results.multi_handedness[0].classification[0].label
hand = landmarks[0]
# print(label)
if label == "Left":
return np.array(
[np.array([[hand.landmark[i].x, hand.landmark[i].y, hand.landmark[i].z] for i in range(21)]),
np.zeros((21, 3))])
else:
return np.array([np.zeros((21, 3)),
np.array(
[[hand.landmark[i].x, hand.landmark[i].y, hand.landmark[i].z] for i in range(21)])])
elif len(landmarks) == 2:
# print(results.multi_handedness)
lh_idx = 0
rh_idx = 0
for idx, hand_type in enumerate(results.multi_handedness):
label = hand_type.classification[0].label
if label == 'Left':
lh_idx = idx
if label == 'Right':
rh_idx = idx
lh = np.array(
[[landmarks[lh_idx].landmark[i].x, landmarks[lh_idx].landmark[i].y, landmarks[lh_idx].landmark[i].z] for i
in range(21)])
rh = np.array(
[[landmarks[rh_idx].landmark[i].x, landmarks[rh_idx].landmark[i].y, landmarks[rh_idx].landmark[i].z] for i
in range(21)])
return np.array([lh, rh])
else:
return np.zeros((2, 21, 3))
4. 画手势关键点
python
# 画手势关键点
def draw_landmark(img, results):
if results.multi_hand_landmarks:
for hand_landmark in results.multi_hand_landmarks:
mp.solutions.drawing_utils.draw_landmarks(img,
hand_landmark,
mp.solutions.hands.HAND_CONNECTIONS,
mp.solutions.drawing_styles.get_default_hand_landmarks_style(),
mp.solutions.drawing_styles.get_default_hand_connections_style())
return img
5. 手势状态缓冲处理
为了平滑处理手势状态的变化,我们实现了一个 Buffer 类,用于缓存手势状态的变化,并提供了添加正例和负例的方法。
python
# 缓冲区类
class Buffer:
def __init__(self, volume=20):
self.__positive = 0
self.state = False
self.__negative = 0
self.__volume = volume
self.__count = 0
def add_positive(self):
self.__count += 1
if self.__positive >= self.__volume:
# 如果正例个数大于容量,将状态定为True
self.state = True
self.__negative = 0
self.__count = 0
else:
self.__positive += 1
if self.__count > self.__volume:
# 如果大于容量次操作后还没有确定状态
self.__positive = 0
self.__count = 0
def add_negative(self):
self.__count += 1
if self.__negative >= self.__volume:
# 如果负例个数大于容量,将状态定为False
self.state = False
self.__positive = 0
else:
self.__negative += 1
if self.__count > self.__volume:
# 如果大于容量次操作后还没有确定状态
self.__positive = 0
self.__count = 0
# print(f"pos:{self.__positive} neg:{self.__negative} count:{self.__count}")
def clear(self):
self.__positive = 0
self.state = False
self.__negative = 0
self.__count = 0
6. 画直线
python
# 画线函数
def draw_line(frame, p1, p2, color=(255, 127, 0), thickness=3):
"""
画一条直线
:param p1:
:param p2:
:return:
"""
return cv2.line(frame, (int(p1[0] * CAM_W), int(p1[1] * CAM_H)), (int(p2[0] * CAM_W), int(p2[1] * CAM_H)), color,
thickness)
7. 屏幕亮度和音量控制
python
# 控制屏幕亮度
def screen_change(percent): # percent/2即为亮度百分比
SCREEN = wmi.WMI(namespace='root/WMI')
a = SCREEN.WmiMonitorBrightnessMethods()[0]
a.WmiSetBrightness(Brightness=percent, Timeout=500)
# 初始化音量控制
def init_voice():
devices = AudioUtilities.GetSpeakers()
interface = devices.Activate(
IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
volume = cast(interface, POINTER(IAudioEndpointVolume))
volume.SetMute(0, None)
volume_range = volume.GetVolumeRange()
min_volume = volume_range[0]
max_volume = volume_range[1]
return (min_volume, max_volume), volume
8. 初始化摄像头和手部关键点识别器
在项目的初始化阶段,我们需要加载摄像头实例和手部关键点识别实例,以便后续对手势进行识别和处理。
python
# 加载摄像头实例
cap = cv2.VideoCapture(0)
CAM_W = 640
CAM_H = 480
CAM_SCALE = CAM_W / CAM_H
# 加载手部关键点识别实例
hand = HandKeyPoint()
9. Pygame 界面初始化和事件监听
为了展示手势控制效果,并提供交互界面 ,我们使用了 Pygame 库。在初始化阶段,我们创建了一个窗口,并设置了标题。同时,我们实现了事件监听功能,以便在需要时退出程序。
具体来说,我们使用 Pygame 创建了一个窗口,并将摄像头捕获的图像显示在窗口中。同时,我们利用 Pygame 的事件监听功能,监听用户的键盘事件,例如按下"q"键时退出程序。这样,用户就可以通过手势控制屏幕亮度和音量大小,同时在 Pygame 窗口中观察手势识别效果。
python
# 初始化pygame
pygame.init()
# 设置窗口全屏
screen = pygame.display.set_mode((800, 600))
pygame.display.set_caption("virtual_control_screen")
# 获取当前窗口大小
window_size = list(screen.get_size())
# 主循环
while True:
······
# 事件监听 若按q则退出程序
for event in pygame.event.get():
if event.type == pygame.KEYDOWN:
if event.key == pygame.K_q:
sys.exit(0)
二、实战演示
1. 亮度------light
如果 20 < angle < 90,那么**"light ready"即手势控制亮度**。
2. 音量------voice
如果**-20 > angle > -50**,那么**"voice ready"即手势控制音量**。
3. 菜单------menu
上述两种情况除外,那么处于**"menu"状态即进入菜单**。
通过演示可以发现,食指与大拇指在屏幕中的距离越远,亮度越高(音量越大),反之越小,实现了通过手势对亮度和音量的控制。
三、完整代码
python
#!/usr/bin/env python
# -*- coding:utf-8 -*-
"""
@Project : virtual
@File : virtual_control.py
@IDE : PyCharm
@Author : 半亩花海
@Date : 2024:02:06 18:01
"""
# 导入模块
import math
import sys
import numpy as np
import cv2
import pygame
import wmi
import mediapipe as mp
from ctypes import cast, POINTER
from comtypes import CLSCTX_ALL
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume
import warnings # 忽略警告
warnings.filterwarnings("ignore")
# 手部关键点类
class HandKeyPoint:
def __init__(self,
static_image_mode=False,
max_num_hands=2,
model_complexity=1,
min_detection_confidence=0.5,
min_tracking_confidence=0.5):
# 手部识别api
self.mp_hands = mp.solutions.hands
# 获取手部识别类
self.hands = self.mp_hands.Hands(static_image_mode=static_image_mode,
max_num_hands=max_num_hands,
model_complexity=model_complexity,
min_detection_confidence=min_detection_confidence,
min_tracking_confidence=min_tracking_confidence)
def process(self, image):
# 将BGR转换为RGB
img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# 识别图像中的手势,并返回结果
results = self.hands.process(img)
# numpy格式的数据
np_arr = landmarks_to_numpy(results)
return results, np_arr
# 将landmarks格式的数据转换为numpy格式的数据
def landmarks_to_numpy(results):
"""
将landmarks格式的数据转换为numpy格式的数据
numpy shape:(2, 21, 3)
:param results:
:return:
"""
shape = (2, 21, 3)
landmarks = results.multi_hand_landmarks
if landmarks is None:
# 没有检测到手
return np.zeros(shape)
elif len(landmarks) == 1:
# 检测出一只手,先判断是左手还是右手
label = results.multi_handedness[0].classification[0].label
hand = landmarks[0]
# print(label)
if label == "Left":
return np.array(
[np.array([[hand.landmark[i].x, hand.landmark[i].y, hand.landmark[i].z] for i in range(21)]),
np.zeros((21, 3))])
else:
return np.array([np.zeros((21, 3)),
np.array(
[[hand.landmark[i].x, hand.landmark[i].y, hand.landmark[i].z] for i in range(21)])])
elif len(landmarks) == 2:
# print(results.multi_handedness)
lh_idx = 0
rh_idx = 0
for idx, hand_type in enumerate(results.multi_handedness):
label = hand_type.classification[0].label
if label == 'Left':
lh_idx = idx
if label == 'Right':
rh_idx = idx
lh = np.array(
[[landmarks[lh_idx].landmark[i].x, landmarks[lh_idx].landmark[i].y, landmarks[lh_idx].landmark[i].z] for i
in range(21)])
rh = np.array(
[[landmarks[rh_idx].landmark[i].x, landmarks[rh_idx].landmark[i].y, landmarks[rh_idx].landmark[i].z] for i
in range(21)])
return np.array([lh, rh])
else:
return np.zeros((2, 21, 3))
# 画手势关键点
def draw_landmark(img, results):
if results.multi_hand_landmarks:
for hand_landmark in results.multi_hand_landmarks:
mp.solutions.drawing_utils.draw_landmarks(img,
hand_landmark,
mp.solutions.hands.HAND_CONNECTIONS,
mp.solutions.drawing_styles.get_default_hand_landmarks_style(),
mp.solutions.drawing_styles.get_default_hand_connections_style())
return img
# 缓冲区类
class Buffer:
def __init__(self, volume=20):
self.__positive = 0
self.state = False
self.__negative = 0
self.__volume = volume
self.__count = 0
def add_positive(self):
self.__count += 1
if self.__positive >= self.__volume:
# 如果正例个数大于容量,将状态定为True
self.state = True
self.__negative = 0
self.__count = 0
else:
self.__positive += 1
if self.__count > self.__volume:
# 如果大于容量次操作后还没有确定状态
self.__positive = 0
self.__count = 0
def add_negative(self):
self.__count += 1
if self.__negative >= self.__volume:
# 如果负例个数大于容量,将状态定为False
self.state = False
self.__positive = 0
else:
self.__negative += 1
if self.__count > self.__volume:
# 如果大于容量次操作后还没有确定状态
self.__positive = 0
self.__count = 0
# print(f"pos:{self.__positive} neg:{self.__negative} count:{self.__count}")
def clear(self):
self.__positive = 0
self.state = False
self.__negative = 0
self.__count = 0
# 画线函数
def draw_line(frame, p1, p2, color=(255, 127, 0), thickness=3):
"""
画一条直线
:param p1:
:param p2:
:return:
"""
return cv2.line(frame, (int(p1[0] * CAM_W), int(p1[1] * CAM_H)), (int(p2[0] * CAM_W), int(p2[1] * CAM_H)), color,
thickness)
# 控制屏幕亮度
def screen_change(percent): # percent/2即为亮度百分比
SCREEN = wmi.WMI(namespace='root/WMI')
a = SCREEN.WmiMonitorBrightnessMethods()[0]
a.WmiSetBrightness(Brightness=percent, Timeout=500)
# 初始化音量控制
def init_voice():
devices = AudioUtilities.GetSpeakers()
interface = devices.Activate(
IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
volume = cast(interface, POINTER(IAudioEndpointVolume))
volume.SetMute(0, None)
volume_range = volume.GetVolumeRange()
min_volume = volume_range[0]
max_volume = volume_range[1]
return (min_volume, max_volume), volume
# 加载摄像头实例
cap = cv2.VideoCapture(0)
CAM_W = 640
CAM_H = 480
CAM_SCALE = CAM_W / CAM_H
# 加载手部关键点识别实例
hand = HandKeyPoint()
# 初始化pygame
pygame.init()
# 设置窗口全屏
screen = pygame.display.set_mode((800, 600))
pygame.display.set_caption("virtual_control_screen")
# 获取当前窗口大小
window_size = list(screen.get_size())
# 设置缓冲区
buffer_light = Buffer(10)
buffer_voice = Buffer(10)
last_y = 0
last_2_y = 1
last_2_x = 0
# 初始化声音控制
voice_range, volume = init_voice()
# 设置亮度条参数
bright_bar_length = 300
bright_bar_height = 20
bright_bar_x = 50
bright_bar_y = 100
# 设置音量条参数
vol_bar_length = 300
vol_bar_height = 20
vol_bar_x = 50
vol_bar_y = 50
# 主循环 每次循环就是对每帧的处理
while True:
img_menu = None
lh_index = -1
# 读取摄像头画面
success, frame = cap.read()
# 将opencv中图片格式的BGR转换为常规的RGB
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# 镜面反转
frame = cv2.flip(frame, 1)
# 处理图像
res, arr = hand.process(frame)
frame = draw_landmark(frame, res)
scale = math.hypot((arr[0, 7, 0] - arr[0, 8, 0]),
(arr[0, 7, 1] - arr[0, 8, 1]),
(arr[0, 7, 2] - arr[0, 8, 2]))
# 计算tan值
tan = (arr[0, 0, 1] - arr[0, 12, 1]) / (arr[0, 0, 0] - arr[0, 12, 0])
# 计算角度
angle = np.arctan(tan) * 180 / np.pi
# print(angle)
if 20 < angle < 90:
path = 'resources/menu/light.png'
buffer_light.add_positive()
buffer_voice.add_negative()
# 显示亮度条和亮度刻度值
show_brightness = True
show_volume = False
elif -20 > angle > -50:
path = 'resources/menu/voice.png'
buffer_voice.add_positive()
buffer_light.add_negative()
# 显示音量条和音量刻度值
show_brightness = False
show_volume = True
else:
path = 'resources/menu/menu.png'
buffer_light.add_negative()
buffer_voice.add_negative()
# 不显示刻度值和百分比
show_brightness = False
show_volume = False
# 计算拇指与食指之间的距离
dis = math.hypot(int((arr[1, 4, 0] - arr[1, 8, 0]) * CAM_W), int((arr[1, 4, 1] - arr[1, 8, 1]) * CAM_H))
# 右手映射时的缩放尺度
s = math.hypot((arr[1, 5, 0] - arr[1, 9, 0]), (arr[1, 5, 1] - arr[1, 9, 1]), (arr[1, 5, 2] - arr[1, 9, 2]))
# 调节亮度
if buffer_light.state:
frame = cv2.putText(frame, 'light ready', (10, 35), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 127, 0))
frame = draw_line(frame, arr[1, 4], arr[1, 8], thickness=5, color=(255, 188, 66))
if dis != 0:
# 线性插值,可以理解为将一个区间中的一个值映射到另一区间内
light = np.interp(dis, [int(500 * s), int(3000 * s)], (0, 100))
# 调节亮度
screen_change(light)
# 调节声音
elif buffer_voice.state:
frame = cv2.putText(frame, 'voice ready', (10, 35), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 127, 0))
frame = draw_line(frame, arr[1, 4], arr[1, 8], thickness=5, color=(132, 134, 248))
if dis != 0:
vol = np.interp(dis, [int(500 * s), int(3000 * s)], voice_range)
# 调节音量
volume.SetMasterVolumeLevel(vol, None)
# 将图片改为与窗口一样的大小
frame = cv2.resize(frame, (int(window_size[1] * CAM_SCALE), window_size[1]))
frame = cv2.transpose(frame)
# 渲染图片
frame = pygame.surfarray.make_surface(frame)
screen.blit(frame, (int(0.5 * (CAM_W - CAM_H * CAM_SCALE)), 0))
img_menu = pygame.image.load(path).convert_alpha()
img_w, img_h = img_menu.get_size()
img_menu = pygame.transform.scale(img_menu, (int(img_w * scale * 5), int(img_h * scale * 5)))
x = (arr[0][9][0] + arr[0][13][0] + arr[0][0][0]) / 3
y = (arr[0][9][1] + arr[0][13][1] + arr[0][0][1]) / 3
x = int(x * window_size[0] - window_size[0] * scale * 3.5)
y = int(y * window_size[1] - window_size[1] * scale * 12)
# print(x, y)
screen.blit(img_menu, (x, y))
# 绘制音量条和亮度条的外框
if show_volume:
pygame.draw.rect(screen, (255, 255, 255), (vol_bar_x, vol_bar_y, vol_bar_length, vol_bar_height), 3)
elif show_brightness:
pygame.draw.rect(screen, (255, 255, 255), (bright_bar_x, bright_bar_y, bright_bar_length, bright_bar_height),
3)
# 计算当前音量和亮度在条上的位置和大小,并绘制已填充的条
if show_volume:
vol = volume.GetMasterVolumeLevel()
vol_range = voice_range[1] - voice_range[0]
vol_bar_fill_length = int((vol - voice_range[0]) / vol_range * vol_bar_length)
pygame.draw.rect(screen, (0, 255, 0), (vol_bar_x, vol_bar_y, vol_bar_fill_length, vol_bar_height))
# 显示音量刻度值和当前音量大小
vol_text = f"Volume: {int((vol - voice_range[0]) / vol_range * 100)}%"
vol_text_surface = pygame.font.SysFont(None, 24).render(vol_text, True, (255, 255, 255))
screen.blit(vol_text_surface, (vol_bar_x + vol_bar_length + 10, vol_bar_y))
elif show_brightness:
brightness = wmi.WMI(namespace='root/WMI').WmiMonitorBrightness()[0].CurrentBrightness
bright_bar_fill_length = int(brightness / 100 * bright_bar_length)
pygame.draw.rect(screen, (255, 255, 0), (bright_bar_x, bright_bar_y, bright_bar_fill_length, bright_bar_height))
# 显示亮度刻度值和当前亮度大小
bright_text = f"Brightness: {brightness}%"
bright_text_surface = pygame.font.SysFont(None, 24).render(bright_text, True, (255, 255, 255))
screen.blit(bright_text_surface, (bright_bar_x + bright_bar_length + 10, bright_bar_y))
pygame.display.flip()
# 事件监听 若按q则退出程序
for event in pygame.event.get():
if event.type == pygame.KEYDOWN:
if event.key == pygame.K_q:
sys.exit(0)