YOLO11 实现智能健身应用

界面预览

实现原理

我们使用的模型为 yolo11n-pose.pt,能实时检测出人体16个关键点。

环境准备

首先我们要安装ultralytics,千万不能遗漏torch库的安装。

perl 复制代码
pip install ultralytics
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

代码实现

python 复制代码
import cv2

from ultralytics import solutions

cap = cv2.VideoCapture("demov4.mp4")
assert cap.isOpened(), "Error reading video file"
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))

# Video writer
video_writer = cv2.VideoWriter("workouts.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))

# Init AIGym
gym = solutions.AIGym(
    show=True,  # Display the frame
    kpts=[6, 8, 10],  # keypoints index of person for monitoring specific exercise, by default it's for pushup
    model="yolo11n-pose.pt",  # Path to the YOLO11 pose estimation model file
    line_width=2,  # Adjust the line width for bounding boxes and text display
    up_angle=135,
    down_angle=70
)

# Process video
while cap.isOpened():
    success, im0 = cap.read()
    if not success:
        print("Video frame is empty or video processing has been successfully completed.")
        break
    im0 = gym.monitor(im0)
    video_writer.write(im0)

cv2.destroyAllWindows()
video_writer.release()

源码解读

上面代码其实都基于源码类 AIGym 实现,我们需要精读这个类,方便日后定制自己的 AIGym 类,实现业务功能。

python 复制代码
# Ultralytics YOLO 🚀, AGPL-3.0 license

from ultralytics.solutions.solutions import BaseSolution
from ultralytics.utils.plotting import Annotator


class AIGym(BaseSolution):
    """
    A class to manage gym steps of people in a real-time video stream based on their poses.

    This class extends BaseSolution to monitor workouts using YOLO pose estimation models. It tracks and counts
    repetitions of exercises based on predefined angle thresholds for up and down positions.

    Attributes:
        count (List[int]): Repetition counts for each detected person.
        angle (List[float]): Current angle of the tracked body part for each person.
        stage (List[str]): Current exercise stage ('up', 'down', or '-') for each person.
        initial_stage (str | None): Initial stage of the exercise.
        up_angle (float): Angle threshold for considering the 'up' position of an exercise.
        down_angle (float): Angle threshold for considering the 'down' position of an exercise.
        kpts (List[int]): Indices of keypoints used for angle calculation.
        annotator (Annotator): Object for drawing annotations on the image.

    Methods:
        monitor: Processes a frame to detect poses, calculate angles, and count repetitions.

    Examples:
        >>> gym = AIGym(model="yolov8n-pose.pt")
        >>> image = cv2.imread("gym_scene.jpg")
        >>> processed_image = gym.monitor(image)
        >>> cv2.imshow("Processed Image", processed_image)
        >>> cv2.waitKey(0)
    """

    def __init__(self, **kwargs):
        """Initializes AIGym for workout monitoring using pose estimation and predefined angles."""
        # Check if the model name ends with '-pose'
        if "model" in kwargs and "-pose" not in kwargs["model"]:
            kwargs["model"] = "yolo11n-pose.pt"
        elif "model" not in kwargs:
            kwargs["model"] = "yolo11n-pose.pt"

        super().__init__(**kwargs)
        self.count = []  # List for counts, necessary where there are multiple objects in frame
        self.angle = []  # List for angle, necessary where there are multiple objects in frame
        self.stage = []  # List for stage, necessary where there are multiple objects in frame

        # Extract details from CFG single time for usage later
        self.initial_stage = None
        self.up_angle = float(self.CFG["up_angle"])  # Pose up predefined angle to consider up pose
        self.down_angle = float(self.CFG["down_angle"])  # Pose down predefined angle to consider down pose
        self.kpts = self.CFG["kpts"]  # User selected kpts of workouts storage for further usage

    def monitor(self, im0):
        """
        Monitors workouts using Ultralytics YOLO Pose Model.

        This function processes an input image to track and analyze human poses for workout monitoring. It uses
        the YOLO Pose model to detect keypoints, estimate angles, and count repetitions based on predefined
        angle thresholds.

        Args:
            im0 (ndarray): Input image for processing.

        Returns:
            (ndarray): Processed image with annotations for workout monitoring.

        Examples:
            >>> gym = AIGym()
            >>> image = cv2.imread("workout.jpg")
            >>> processed_image = gym.monitor(image)
        """
        # Extract tracks
        tracks = self.model.track(source=im0, persist=True, classes=self.CFG["classes"])[0]

        if tracks.boxes.id is not None:
            # Extract and check keypoints
            if len(tracks) > len(self.count):
                new_human = len(tracks) - len(self.count)
                self.angle += [0] * new_human
                self.count += [0] * new_human
                self.stage += ["-"] * new_human

            # Initialize annotator
            self.annotator = Annotator(im0, line_width=self.line_width)

            # Enumerate over keypoints
            for ind, k in enumerate(reversed(tracks.keypoints.data)):
                # Get keypoints and estimate the angle
                kpts = [k[int(self.kpts[i])].cpu() for i in range(3)]
                self.angle[ind] = self.annotator.estimate_pose_angle(*kpts)
                im0 = self.annotator.draw_specific_points(k, self.kpts, radius=self.line_width * 3)

                # Determine stage and count logic based on angle thresholds
                if self.angle[ind] < self.down_angle:
                    if self.stage[ind] == "up":
                        self.count[ind] += 1
                    self.stage[ind] = "down"
                elif self.angle[ind] > self.up_angle:
                    self.stage[ind] = "up"

                # Display angle, count, and stage text
                self.annotator.plot_angle_and_count_and_stage(
                    angle_text=self.angle[ind],  # angle text for display
                    count_text=self.count[ind],  # count text for workouts
                    stage_text=self.stage[ind],  # stage position text
                    center_kpt=k[int(self.kpts[1])],  # center keypoint for display
                )

        self.display_output(im0)  # Display output image, if environment support display
        return im0  # return an image for writing or further usage

最主要的是 AIGym 类,用于实时视频流中基于人体姿态来监测和计数健身动作。类的主要属性:

  • count: 记录每个检测到的人的运动重复次数

  • angle: 记录每个人当前动作的角度

  • stage: 记录每个人当前的运动阶段("up"上升、"down"下降或"-"初始状态)

  • up_angle/down_angle: 定义动作上升和下降的角度阈值

  • kpts: 用于计算角度的关键点索引

这么设计是为了开发者方便定制。

核心方法 monitor 中包含了计数逻辑:

python 复制代码
    if self.angle[ind] < self.down_angle:

        if self.stage[ind] == "up":  # 如果从"上升"变为"下降",计数加1

            self.count[ind] += 1

        self.stage[ind] = "down"

    elif self.angle[ind] > self.up_angle:

        self.stage[ind] = "up"

不管怎么说,这个类的设计很适合用于健身房场景,可以同时追踪多人 的运动状态,并为每个人独立计数 。我们可以预定义的角度阈值来判断动作的完成情况,是一个实用的运动姿态分析工具。

功能拓展

分析完源码后,我们便可以在 AIGym 类的基础上加上画面推流功能,实现多个终端收看实时画面。

首先安装 ffmpeg,我们可以通过在终端中输入 ffmpeg -version 来检查 FFmpeg 是否已安装以及其版本信息。

然后安装流媒体服务器 mediamtx,如果是 Windows 系统,我们只要下载 mediamtx.exe ,然后在终端中运行即可。

下面是拓展后的代码。

python 复制代码
import cv2
import subprocess as sp
from AIGym import AIGym
cap = cv2.VideoCapture("test.mp4")
assert cap.isOpened(), "Error reading video file"
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))

# Video writer
video_writer = cv2.VideoWriter("workouts.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))

# Init AIGym
gym = AIGym(
    show=True,  # Display the frame
    kpts=[6, 8, 10],  # keypoints index of person for monitoring specific exercise, by default it's for pushup
    model="yolo11n-pose.pt",  # Path to the YOLO11 pose estimation model file
    line_width=2,  # Adjust the line width for bounding boxes and text display
    up_angle=135,
    down_angle=70
)
# RTSP推流地址
rtsp_url = 'rtsp://127.0.0.1:8554/channels001'

# 获取视频属性
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# 构建FFmpeg命令
command = [
    'ffmpeg',
    '-y',  # 覆盖输出文件
    '-f', 'rawvideo',  # 输入格式为原始视频
    '-vcodec', 'rawvideo',  # 视频编解码器为原始视频
    '-pix_fmt', 'bgr24',  # 像素格式
    '-s', '{}x{}'.format(width, height),  # 视频大小
    '-r', str(fps),  # 帧率
    '-i', '-',  # 输入来自管道
    '-c:v', 'libx264',  # 使用libx264编码器
    '-pix_fmt', 'yuv420p',  # 输出像素格式
    '-preset', 'ultrafast',  # 编码速度
    '-f', 'rtsp',  # 输出格式为RTSP
    rtsp_url  # RTSP推流地址
]
# 创建FFmpeg子进程
pipe = sp.Popen(command, stdin=sp.PIPE)
# Process video
while cap.isOpened():
    success, im0 = cap.read()
    if not success:
        print("Video frame is empty or video processing has been successfully completed.")
        break
    im0 = gym.monitor(im0)
    # video_writer.write(im0)
    pipe.stdin.write(im0.tobytes())

# 释放资源
cap.release()
pipe.stdin.close()
pipe.wait()

cv2.destroyAllWindows()
# video_writer.release()

此时我们打开 127.0.0.1:8888/channels001/

便可以在浏览器上看到推送的画面。

相关推荐
love530love26 分钟前
Windows避坑部署CosyVoice多语言大语言模型
人工智能·windows·python·语言模型·自然语言处理·pycharm
985小水博一枚呀1 小时前
【AI大模型学习路线】第二阶段之RAG基础与架构——第七章(【项目实战】基于RAG的PDF文档助手)技术方案与架构设计?
人工智能·学习·语言模型·架构·大模型
白熊1881 小时前
【图像生成大模型】Wan2.1:下一代开源大规模视频生成模型
人工智能·计算机视觉·开源·文生图·音视频
weixin_514548891 小时前
一种开源的高斯泼溅实现库——gsplat: An Open-Source Library for Gaussian Splatting
人工智能·计算机视觉·3d
四口鲸鱼爱吃盐2 小时前
BMVC2023 | 多样化高层特征以提升对抗迁移性
人工智能·深度学习·cnn·vit·对抗攻击·迁移攻击
Echo``2 小时前
3:OpenCV—视频播放
图像处理·人工智能·opencv·算法·机器学习·视觉检测·音视频
Douglassssssss2 小时前
【深度学习】使用块的网络(VGG)
网络·人工智能·深度学习
okok__TXF2 小时前
SpringBoot3+AI
java·人工智能·spring
SAP工博科技2 小时前
如何提升新加坡SAP实施成功率?解答中企出海的“税务合规密码” | 工博科技SAP金牌服务商
人工智能·科技·制造
闭月之泪舞3 小时前
OpenCv高阶(八)——摄像头调用、摄像头OCR
人工智能·opencv·ocr