C# OpenCVSharp实现Hand Pose Estimation Mediapipe

OpenCV模型库中有个手势识别的例子,是python版本的,路径是opencv_zoo-main\models\handpose_estimation_mediapipe,现将该例子改写,用OpenCVSharp实现,不依赖其他库,只需要安装OpenCVSharp即可。模型在GitHub,OpenCV官方模型库下载。

官方效果:

需要用到两个模型,一个是手掌检测,确定手部方向,进行摆正,一个是手部关键点检测

代码如下:

///PalmDetector.cs

using OpenCvSharp;

using OpenCvSharp.Dnn;

using System;

using System.Collections.Generic;

using System.Linq;

using System.Runtime.InteropServices;

using Point = OpenCvSharp.Point;

using Scalar = OpenCvSharp.Scalar;

using Size = OpenCvSharp.Size;

namespace yolo_world_opencvsharp_net4._8

{

public class PalmDetector : IDisposable

{

private string model_path;

private float nms_threshold;

private float score_threshold;

private int topK;

private int backend_id;

private int target_id;

private Size input_size;

private Net model;

private List<Point2f> anchors;

private bool _disposed = false;

public PalmDetector(string modelPath, float nmsThreshold = 0.3f, float scoreThreshold = 0.5f,

int topK = 5000, Backend backend = Backend.OPENCV, Target target = Target.CPU)

{

model_path = modelPath;

nms_threshold = nmsThreshold;

score_threshold = scoreThreshold;

this.topK = topK;

input_size = new Size(192, 192);

// 加载模型

model = CvDnn.ReadNetFromOnnx(model_path);

if (model.Empty())

{

throw new ArgumentException($"Failed to load model from {modelPath}");

}

model.SetPreferableBackend(backend);

model.SetPreferableTarget(target);

anchors = MPPalmAnchors.LoadAnchors();

Console.WriteLine($"PalmDetector initialized with {anchors?.Count ?? 0} anchors");

}

public List<List<float>> Infer(Mat image)

{

if (image == null || image.Empty())

throw new ArgumentException("Input image is null or empty");

var (preprocessed_image, pad_bias) = Preprocess(image);

try

{

model.SetInput(preprocessed_image);

var outputNames = model.GetUnconnectedOutLayersNames();

Mat[] outputs = outputNames.Select(_ => new Mat()).ToArray();

model.Forward(outputs, outputNames);

return Postprocess(outputs, image.Size(), pad_bias);

}

finally

{

preprocessed_image?.Dispose();

}

}

public Mat Visualize(Mat image, List<List<float>> results, bool print_results = false, float fps = 0.0f)

{

Mat output = image.Clone();

if (fps > 0)

{

Cv2.PutText(output, $"FPS: {fps:F1}", new Point(10, 30),

HersheyFonts.HersheySimplex, 0.7, new Scalar(0, 0, 255), 2);

}

for (int i = 0; i < results.Count; i++)

{

List<float> result = results[i];

if (result.Count < 19) continue; // 4 box + 14 landmarks + 1 score

float score = result[result.Count - 1];

// Draw bounding box

Cv2.Rectangle(

output,

new Point((int)result[0], (int)result[1]),

new Point((int)result[2], (int)result[3]),

new Scalar(0, 255, 0), 2);

// Draw confidence score

Cv2.PutText(output, $"{score:F3}",

new Point((int)result[0], (int)result[1] - 10),

HersheyFonts.HersheySimplex, 0.5, new Scalar(0, 255, 0), 1);

// Draw landmarks with different colors

Scalar[] landmarkColors = new Scalar[]

{

new Scalar(255, 0, 0), // Blue

new Scalar(0, 255, 255), // Yellow

new Scalar(255, 0, 255), // Magenta

new Scalar(0, 255, 0), // Green

new Scalar(255, 255, 0), // Cyan

new Scalar(128, 0, 128), // Purple

new Scalar(0, 165, 255) // Orange

};

for (int j = 0; j < 7; j++)

{

Point point = new Point((int)result[4 + j * 2], (int)result[4 + j * 2 + 1]);

Cv2.Circle(output, point, 4, landmarkColors[j], -1);

Cv2.Circle(output, point, 2, new Scalar(255, 255, 255), -1);

// Draw landmark number

Cv2.PutText(output, $"{j + 1}",

new Point(point.X + 6, point.Y - 6),

HersheyFonts.HersheySimplex, 0.4, new Scalar(255, 255, 255), 1);

}

if (print_results)

{

Console.WriteLine($"----------- Palm {i + 1} -----------");

Console.WriteLine($"Score: {score:F4}");

Console.WriteLine($"Box: [{result[0]:F0}, {result[1]:F0}, {result[2]:F0}, {result[3]:F0}]");

Console.WriteLine($"Box Size: {result[2] - result[0]:F0} x {result[3] - result[1]:F0}");

Console.WriteLine("Landmarks:");

for (int j = 0; j < 7; j++)

{

Console.WriteLine($" Point {j + 1}: ({result[4 + j * 2]:F0}, {result[4 + j * 2 + 1]:F0})");

}

Console.WriteLine();

}

}

return output;

}

private (Mat, Point) Preprocess(Mat image)

{

Point pad_bias = new Point(0, 0);

float ratio = Math.Min((float)input_size.Width / image.Width, (float)input_size.Height / image.Height);

Mat processed_image = new Mat();

if (image.Height != input_size.Height || image.Width != input_size.Width)

{

Size ratio_size = new Size((int)(image.Width * ratio), (int)(image.Height * ratio));

Cv2.Resize(image, processed_image, ratio_size, 0, 0, InterpolationFlags.Linear);

int pad_h = input_size.Height - ratio_size.Height;

int pad_w = input_size.Width - ratio_size.Width;

pad_bias.X = pad_w / 2;

pad_bias.Y = pad_h / 2;

Cv2.CopyMakeBorder(processed_image, processed_image, pad_bias.Y, pad_h - pad_bias.Y,

pad_bias.X, pad_w - pad_bias.X, BorderTypes.Constant, new Scalar(0, 0, 0));

}

else

{

processed_image = image.Clone();

}

// Create NHWC blob

processed_image.ConvertTo(processed_image, MatType.CV_32FC3, 1.0 / 255.0);

Cv2.CvtColor(processed_image, processed_image, ColorConversionCodes.BGR2RGB);

float[] imgData = new float[processed_image.Total() * processed_image.Channels()];

Marshal.Copy(processed_image.Data, imgData, 0, imgData.Length);

int[] dims = new int[] { 1, input_size.Height, input_size.Width, 3 };

Mat blob = new Mat(dims, MatType.CV_32FC1, imgData);

processed_image.Dispose();

// Adjust pad bias for original image coordinates

pad_bias.X = (int)(pad_bias.X / ratio);

pad_bias.Y = (int)(pad_bias.Y / ratio);

return (blob, pad_bias);

}

private List<List<float>> Postprocess(Mat[] output_blobs, Size original_size, Point pad_bias)

{

if (output_blobs.Length < 2)

return new List<List<float>>();

Mat scores = output_blobs[1].Reshape(1, (int)(output_blobs[1].Total()));

Mat boxes = output_blobs[0].Reshape(1, (int)(output_blobs[0].Total() / 18));

List<float> score_vec = new List<float>();

List<Rect2f> boxes_vec = new List<Rect2f>();

List<List<Point2f>> landmarks_vec = new List<List<Point2f>>();

float scale = Math.Max(original_size.Height, original_size.Width);

// Process all detections

for (int i = 0; i < scores.Rows; i++)

{

float score = 1.0f / (1.0f + (float)Math.Exp(-scores.At<float>(i, 0)));

if (score < score_threshold)

continue;

// Extract box and landmark deltas

var box_delta = boxes.Row(i).ColRange(0, 4);

var landmark_delta = boxes.Row(i).ColRange(4, 18);

var anchor = anchors[i];

// Decode bounding box coordinates

Point2f cxy_delta = new Point2f(

box_delta.At<float>(0, 0) / input_size.Width,

box_delta.At<float>(0, 1) / input_size.Height);

Point2f wh_delta = new Point2f(

box_delta.At<float>(0, 2) / input_size.Width,

box_delta.At<float>(0, 3) / input_size.Height);

// Calculate box coordinates

Point2f xy1 = new Point2f(

(cxy_delta.X - wh_delta.X / 2 + anchor.X) * scale - pad_bias.X,

(cxy_delta.Y - wh_delta.Y / 2 + anchor.Y) * scale - pad_bias.Y);

Point2f xy2 = new Point2f(

(cxy_delta.X + wh_delta.X / 2 + anchor.X) * scale - pad_bias.X,

(cxy_delta.Y + wh_delta.Y / 2 + anchor.Y) * scale - pad_bias.Y);

// Clip to image boundaries

xy1.X = Math.Max(0, Math.Min(original_size.Width - 1, xy1.X));

xy1.Y = Math.Max(0, Math.Min(original_size.Height - 1, xy1.Y));

xy2.X = Math.Max(0, Math.Min(original_size.Width - 1, xy2.X));

xy2.Y = Math.Max(0, Math.Min(original_size.Height - 1, xy2.Y));

// Only add valid detections

if (xy2.X > xy1.X && xy2.Y > xy1.Y && (xy2.X - xy1.X) > 20 && (xy2.Y - xy1.Y) > 20)

{

score_vec.Add(score);

boxes_vec.Add(new Rect2f(xy1.X, xy1.Y, xy2.X - xy1.X, xy2.Y - xy1.Y));

// Process landmarks

List<Point2f> landmarks = new List<Point2f>();

for (int j = 0; j < 7; j++)

{

float dx = landmark_delta.At<float>(0, j * 2) / input_size.Width + anchor.X;

float dy = landmark_delta.At<float>(0, j * 2 + 1) / input_size.Height + anchor.Y;

// Convert to original image coordinates

dx = dx * scale - pad_bias.X;

dy = dy * scale - pad_bias.Y;

// Clip to image boundaries

dx = Math.Max(0, Math.Min(original_size.Width - 1, dx));

dy = Math.Max(0, Math.Min(original_size.Height - 1, dy));

landmarks.Add(new Point2f(dx, dy));

}

landmarks_vec.Add(landmarks);

}

}

// Perform NMS

List<Rect> boxes_int = boxes_vec.Select(box =>

new Rect((int)box.X, (int)box.Y, (int)box.Width, (int)box.Height)).ToList();

CvDnn.NMSBoxes(boxes_int, score_vec, score_threshold, nms_threshold, out int[] indices);

// Prepare results

List<List<float>> results = new List<List<float>>();

foreach (int idx in indices)

{

List<float> result = new List<float>();

Rect2f box = boxes_vec[idx];

// Bounding box coordinates

result.Add(box.X);

result.Add(box.Y);

result.Add(box.X + box.Width);

result.Add(box.Y + box.Height);

// Landmark coordinates

foreach (Point2f point in landmarks_vec[idx])

{

result.Add(point.X);

result.Add(point.Y);

}

// Confidence score

result.Add(score_vec[idx]);

results.Add(result);

}

// Clean up

scores.Dispose();

boxes.Dispose();

return results;

}

public void Dispose()

{

Dispose(true);

GC.SuppressFinalize(this);

}

protected virtual void Dispose(bool disposing)

{

if (!_disposed)

{

if (disposing)

{

model?.Dispose();

}

_disposed = true;

}

}

~PalmDetector()

{

Dispose(false);

}

}

}

///MPHandPose.cs

using OpenCvSharp;

using OpenCvSharp.Dnn;

using System;

using System.Collections.Generic;

using System.Linq;

namespace yolo_world_opencvsharp_net4._8

{

public class MPHandPose : IDisposable

{

private string model_path;

private float conf_threshold;

private int backend_id;

private int target_id;

private Size input_size;

private Net model;

private bool _disposed = false;

// Constants from the Python version

private readonly int[] PALM_LANDMARK_IDS = { 0, 5, 9, 13, 17, 1, 2 };

private const int PALM_LANDMARKS_INDEX_OF_PALM_BASE = 0;

private const int PALM_LANDMARKS_INDEX_OF_MIDDLE_FINGER_BASE = 2;

private readonly double[] PALM_BOX_PRE_SHIFT_VECTOR = { 0, 0 };

private const double PALM_BOX_PRE_ENLARGE_FACTOR = 4;

private readonly double[] PALM_BOX_SHIFT_VECTOR = { 0, -0.4 };

private const double PALM_BOX_ENLARGE_FACTOR = 3;

private readonly double[] HAND_BOX_SHIFT_VECTOR = { 0, -0.1 };

private const double HAND_BOX_ENLARGE_FACTOR = 1.65;

public MPHandPose(string modelPath, float confThreshold = 0.8f, Backend backend = Backend.OPENCV, Target target = Target.CPU)

{

model_path = modelPath;

conf_threshold = confThreshold;

backend_id = (int)backend;

target_id = (int)target;

input_size = new Size(224, 224);

model = CvDnn.ReadNetFromOnnx(model_path);

if (model.Empty())

{

throw new ArgumentException($"Failed to load hand pose model from {modelPath}");

}

model.SetPreferableBackend(backend);

model.SetPreferableTarget(target);

Console.WriteLine($"MPHandPose initialized with input size: {input_size}");

}

public void SetBackendAndTarget(Backend backend, Target target)

{

backend_id = (int)backend;

target_id = (int)target;

model.SetPreferableBackend(backend);

model.SetPreferableTarget(target);

}

private (Mat, Rect, Point) CropAndPadFromPalm(Mat image, Rect palmBbox, bool forRotation = false)

{

// Shift bounding box

var whPalmBbox = new Point(palmBbox.Width, palmBbox.Height);

double[] shiftVector = forRotation ? PALM_BOX_PRE_SHIFT_VECTOR : PALM_BOX_SHIFT_VECTOR;

var shift = new Point(whPalmBbox.X * shiftVector[0], whPalmBbox.Y * shiftVector[1]);

var shiftedBbox = new Rect(

palmBbox.X + (int)shift.X,

palmBbox.Y + (int)shift.Y,

palmBbox.Width,

palmBbox.Height

);

// Enlarge bounding box

var centerPalmBbox = new Point(

shiftedBbox.X + shiftedBbox.Width / 2,

shiftedBbox.Y + shiftedBbox.Height / 2

);

double enlargeScale = forRotation ? PALM_BOX_PRE_ENLARGE_FACTOR : PALM_BOX_ENLARGE_FACTOR;

var newHalfSize = new Size(

(int)(whPalmBbox.X * enlargeScale / 2),

(int)(whPalmBbox.Y * enlargeScale / 2)

);

var enlargedBbox = new Rect(

centerPalmBbox.X - newHalfSize.Width,

centerPalmBbox.Y - newHalfSize.Height,

newHalfSize.Width * 2,

newHalfSize.Height * 2

);

// Clip to image boundaries

enlargedBbox.X = Math.Max(0, Math.Min(image.Width - 1, enlargedBbox.X));

enlargedBbox.Y = Math.Max(0, Math.Min(image.Height - 1, enlargedBbox.Y));

enlargedBbox.Width = Math.Min(enlargedBbox.Width, image.Width - enlargedBbox.X);

enlargedBbox.Height = Math.Min(enlargedBbox.Height, image.Height - enlargedBbox.Y);

// Crop to the size of interest

Mat croppedImage = new Mat(image, enlargedBbox);

// Pad to ensure corner pixels won't be cropped

int sideLen = forRotation ?

(int)Math.Sqrt(croppedImage.Width * croppedImage.Width + croppedImage.Height * croppedImage.Height) :

Math.Max(croppedImage.Width, croppedImage.Height);

int padH = sideLen - croppedImage.Height;

int padW = sideLen - croppedImage.Width;

int left = padW / 2;

int top = padH / 2;

int right = padW - left;

int bottom = padH - top;

Cv2.CopyMakeBorder(croppedImage, croppedImage, top, bottom, left, right, BorderTypes.Constant, Scalar.Black);

var bias = new Point(enlargedBbox.X - left, enlargedBbox.Y - top);

return (croppedImage, enlargedBbox, bias);

}

private (Mat, Rect, double, Mat, Point) Preprocess(Mat image, List<float> palm)

{

if (palm.Count < 18)

throw new ArgumentException("Palm data must contain at least 18 elements");

// Extract palm bounding box and landmarks

var palmBbox = new Rect(

(int)palm[0], (int)palm[1],

(int)(palm[2] - palm[0]),

(int)(palm[3] - palm[1])

);

// Crop and pad image to interest range

var (croppedImage, rotatedPalmBbox, bias) = CropAndPadFromPalm(image, palmBbox, true);

// Convert to RGB

Cv2.CvtColor(croppedImage, croppedImage, ColorConversionCodes.BGR2RGB);

// Extract palm landmarks

var palmLandmarks = new List<Point2f>();

for (int i = 0; i < 7; i++)

{

palmLandmarks.Add(new Point2f(

palm[4 + i * 2] - bias.X,

palm[4 + i * 2 + 1] - bias.Y

));

}

// Rotate input to have vertically oriented hand image

var p1 = palmLandmarks[PALM_LANDMARKS_INDEX_OF_PALM_BASE];

var p2 = palmLandmarks[PALM_LANDMARKS_INDEX_OF_MIDDLE_FINGER_BASE];

double radians = Math.PI / 2 - Math.Atan2(-(p2.Y - p1.Y), p2.X - p1.X);

radians = radians - 2 * Math.PI * Math.Floor((radians + Math.PI) / (2 * Math.PI));

double angle = radians * (180.0 / Math.PI);

// Get bbox center

var centerPalmBbox = new Point2f(

rotatedPalmBbox.X + rotatedPalmBbox.Width / 2.0f - bias.X,

rotatedPalmBbox.Y + rotatedPalmBbox.Height / 2.0f - bias.Y

);

// Get rotation matrix (注意:这里返回的是CV_64FC1类型)

var rotationMatrix = Cv2.GetRotationMatrix2D(centerPalmBbox, angle, 1.0);

// Get rotated image

Mat rotatedImage = new Mat();

Cv2.WarpAffine(croppedImage, rotatedImage, rotationMatrix, new Size(croppedImage.Width, croppedImage.Height));

// Get rotated palm landmarks - 正确的方法

var rotatedPalmLandmarks = new List<Point2f>();

foreach (var landmark in palmLandmarks)

{

// 正确访问旋转矩阵的双精度元素

double r00 = rotationMatrix.At<double>(0, 0);

double r01 = rotationMatrix.At<double>(0, 1);

double r02 = rotationMatrix.At<double>(0, 2);

double r10 = rotationMatrix.At<double>(1, 0);

double r11 = rotationMatrix.At<double>(1, 1);

double r12 = rotationMatrix.At<double>(1, 2);

double rotatedX = r00 * landmark.X + r01 * landmark.Y + r02;

double rotatedY = r10 * landmark.X + r11 * landmark.Y + r12;

rotatedPalmLandmarks.Add(new Point2f((float)rotatedX, (float)rotatedY));

}

// Get landmark bounding box

float minX = rotatedPalmLandmarks.Min(p => p.X);

float minY = rotatedPalmLandmarks.Min(p => p.Y);

float maxX = rotatedPalmLandmarks.Max(p => p.X);

float maxY = rotatedPalmLandmarks.Max(p => p.Y);

var rotatedPalmBboxFinal = new Rect((int)minX, (int)minY, (int)(maxX - minX), (int)(maxY - minY));

// Final crop

var (finalCrop, finalBbox, _) = CropAndPadFromPalm(rotatedImage, rotatedPalmBboxFinal);

//Cv2.ImShow("123",finalCrop);

// Resize to model input size

Mat blob = new Mat();

Cv2.Resize(finalCrop, blob, input_size, 0, 0, InterpolationFlags.Area);

blob.ConvertTo(blob, MatType.CV_32FC3, 1.0 / 255.0);

// Add batch dimension

blob = blob.Reshape(1, new int[] { 1, input_size.Height, input_size.Width, 3 });

return (blob, finalBbox, angle, rotationMatrix, bias);

}

public List<float> Infer(Mat image, List<float> palm)

{

// Preprocess

var (inputBlob, rotatedPalmBbox, angle, rotationMatrix, padBias) = Preprocess(image, palm);

// Forward

model.SetInput(inputBlob);

var outputNames = model.GetUnconnectedOutLayersNames();

Mat[] outputs = outputNames.Select(_ => new Mat()).ToArray();

model.Forward(outputs, outputNames);

// Postprocess

var results = Postprocess(outputs, rotatedPalmBbox, angle, rotationMatrix, padBias);

// Clean up

inputBlob.Dispose();

foreach (var output in outputs)

{

output.Dispose();

}

return results;

}

private List<float> Postprocess(Mat[] outputs, Rect rotatedPalmBbox, double angle, Mat rotationMatrix, Point padBias)

{

if (outputs.Length < 4)

return null;

// Extract outputs

var landmarks = outputs[0].Reshape(1, 21, 3); // 21 landmarks, 3 coordinates each

var conf = outputs[1].At<float>(0, 0);

var handedness = outputs[2].At<float>(0, 0);

var landmarksWorld = outputs[3].Reshape(1, 21, 3);

if (conf < conf_threshold)

return null;

List<float> result = new List<float>();

// Transform coords back to the input coords

var whRotatedPalmBbox = new Point(rotatedPalmBbox.Width, rotatedPalmBbox.Height);

var scaleFactor = new Point2f(

whRotatedPalmBbox.X / (float)input_size.Width,

whRotatedPalmBbox.Y / (float)input_size.Height

);

float maxScale = Math.Max(scaleFactor.X, scaleFactor.Y);

// Scale landmarks

for (int i = 0; i < 21; i++)

{

landmarks.At<float>(i, 0) = (landmarks.At<float>(i, 0) - input_size.Width / 2.0f) * maxScale;

landmarks.At<float>(i, 1) = (landmarks.At<float>(i, 1) - input_size.Height / 2.0f) * maxScale;

landmarks.At<float>(i, 2) = landmarks.At<float>(i, 2) * maxScale; // depth scaling

}

// Rotate landmarks

var coordsRotationMatrix = Cv2.GetRotationMatrix2D(new Point2f(0, 0), -angle, 1.0);

// 正确访问坐标旋转矩阵的双精度元素

double cr00 = coordsRotationMatrix.At<double>(0, 0);

double cr01 = coordsRotationMatrix.At<double>(0, 1);

double cr10 = coordsRotationMatrix.At<double>(1, 0);

double cr11 = coordsRotationMatrix.At<double>(1, 1);

for (int i = 0; i < 21; i++)

{

float x = landmarks.At<float>(i, 0);

float y = landmarks.At<float>(i, 1);

landmarks.Set<float>(i, 0, (float)(cr00 * x + cr01 * y));

landmarks.Set<float>(i, 1, (float)(cr10 * x + cr11 * y));

}

// Invert rotation - 正确访问原始旋转矩阵的双精度元素

double r00 = rotationMatrix.At<double>(0, 0);

double r01 = rotationMatrix.At<double>(0, 1);

double r02 = rotationMatrix.At<double>(0, 2);

double r10 = rotationMatrix.At<double>(1, 0);

double r11 = rotationMatrix.At<double>(1, 1);

double r12 = rotationMatrix.At<double>(1, 2);

var rotationComponent = new double[2, 2] {

{ r00, r10 }, // 注意:转置矩阵

{ r01, r11 }

};

var translationComponent = new double[] { r02, r12 };

var invertedTranslation = new double[] {

-(rotationComponent[0, 0] * translationComponent[0] + rotationComponent[0, 1] * translationComponent[1]),

-(rotationComponent[1, 0] * translationComponent[0] + rotationComponent[1, 1] * translationComponent[1])

};

var inverseRotationMatrix = new Mat(2, 3, MatType.CV_64FC1);

inverseRotationMatrix.Set<double>(0, 0, rotationComponent[0, 0]);

inverseRotationMatrix.Set<double>(0, 1, rotationComponent[0, 1]);

inverseRotationMatrix.Set<double>(0, 2, invertedTranslation[0]);

inverseRotationMatrix.Set<double>(1, 0, rotationComponent[1, 0]);

inverseRotationMatrix.Set<double>(1, 1, rotationComponent[1, 1]);

inverseRotationMatrix.Set<double>(1, 2, invertedTranslation[1]);

// Get box center

var center = new Point2f(

rotatedPalmBbox.X + rotatedPalmBbox.Width / 2.0f,

rotatedPalmBbox.Y + rotatedPalmBbox.Height / 2.0f

);

// 使用逆旋转矩阵计算原始中心

double ir00 = inverseRotationMatrix.At<double>(0, 0);

double ir01 = inverseRotationMatrix.At<double>(0, 1);

double ir02 = inverseRotationMatrix.At<double>(0, 2);

double ir10 = inverseRotationMatrix.At<double>(1, 0);

double ir11 = inverseRotationMatrix.At<double>(1, 1);

double ir12 = inverseRotationMatrix.At<double>(1, 2);

var originalCenter = new Point2f(

(float)(ir00 * center.X + ir01 * center.Y + ir02),

(float)(ir10 * center.X + ir11 * center.Y + ir12)

);

// Transform landmarks back to original coordinates

for (int i = 0; i < 21; i++)

{

landmarks.Set<float>(i, 0, landmarks.At<float>(i, 0) + originalCenter.X + padBias.X);

landmarks.Set<float>(i, 1, landmarks.At<float>(i, 1) + originalCenter.Y + padBias.Y);

}

// Get bounding box from landmarks

float minX = float.MaxValue, minY = float.MaxValue, maxX = float.MinValue, maxY = float.MinValue;

for (int i = 0; i < 21; i++)

{

minX = Math.Min(minX, landmarks.At<float>(i, 0));

minY = Math.Min(minY, landmarks.At<float>(i, 1));

maxX = Math.Max(maxX, landmarks.At<float>(i, 0));

maxY = Math.Max(maxY, landmarks.At<float>(i, 1));

}

var bbox = new Rect((int)minX, (int)minY, (int)(maxX - minX), (int)(maxY - minY));

// Shift bounding box

var shiftVector = new Point(

(int)(bbox.Width * HAND_BOX_SHIFT_VECTOR[0]),

(int)(bbox.Height * HAND_BOX_SHIFT_VECTOR[1])

);

bbox.X += shiftVector.X;

bbox.Y += shiftVector.Y;

// Enlarge bounding box

var centerBbox = new Point(bbox.X + bbox.Width / 2, bbox.Y + bbox.Height / 2);

var newHalfSize = new Size(

(int)(bbox.Width * HAND_BOX_ENLARGE_FACTOR / 2),

(int)(bbox.Height * HAND_BOX_ENLARGE_FACTOR / 2)

);

bbox = new Rect(

centerBbox.X - newHalfSize.Width,

centerBbox.Y - newHalfSize.Height,

newHalfSize.Width * 2,

newHalfSize.Height * 2

);

// Prepare final result

// [0-3]: hand bounding box

result.Add(bbox.X);

result.Add(bbox.Y);

result.Add(bbox.X + bbox.Width);

result.Add(bbox.Y + bbox.Height);

// [4-66]: screen landmarks (21 landmarks * 3 coordinates)

for (int i = 0; i < 21; i++)

{

result.Add(landmarks.At<float>(i, 0));

result.Add(landmarks.At<float>(i, 1));

result.Add(landmarks.At<float>(i, 2));

}

// [67-129]: world landmarks (21 landmarks * 3 coordinates)

for (int i = 0; i < 21; i++)

{

result.Add(landmarksWorld.At<float>(i, 0));

result.Add(landmarksWorld.At<float>(i, 1));

result.Add(landmarksWorld.At<float>(i, 2));

}

// [130]: handedness

result.Add(handedness);

// [131]: confidence

result.Add(conf);

return result;

}

public void Dispose()

{

Dispose(true);

GC.SuppressFinalize(this);

}

protected virtual void Dispose(bool disposing)

{

if (!_disposed)

{

if (disposing)

{

model?.Dispose();

}

_disposed = true;

}

}

~MPHandPose()

{

Dispose(false);

}

}

}

///HandPoseVisualizer

using OpenCvSharp;

using System;

using System.Collections.Generic;

using System.Linq;

namespace yolo_world_opencvsharp_net4._8

{

public static class HandPoseVisualizer

{

public static (Mat, Mat) Visualize(Mat image, List<List<float>> hands, bool printResult = false)

{

Mat displayScreen = image.Clone();

Mat display3d = new Mat(400, 400, MatType.CV_8UC3, Scalar.Black);

// Draw 3D view axes

Cv2.Line(display3d, new Point(200, 0), new Point(200, 400), new Scalar(255, 255, 255), 2);

Cv2.Line(display3d, new Point(0, 200), new Point(400, 200), new Scalar(255, 255, 255), 2);

Cv2.PutText(display3d, "Main View", new Point(0, 12), HersheyFonts.HersheyDuplex, 0.5, new Scalar(0, 0, 255));

Cv2.PutText(display3d, "Top View", new Point(200, 12), HersheyFonts.HersheyDuplex, 0.5, new Scalar(0, 0, 255));

Cv2.PutText(display3d, "Left View", new Point(0, 212), HersheyFonts.HersheyDuplex, 0.5, new Scalar(0, 0, 255));

Cv2.PutText(display3d, "Right View", new Point(200, 212), HersheyFonts.HersheyDuplex, 0.5, new Scalar(0, 0, 255));

bool isDraw = false; // ensure only one hand is drawn

// Gesture classification

var gestureClassifier = new GestureClassification();

for (int idx = 0; idx < hands.Count; idx++)

{

var handpose = hands[idx];

if (handpose.Count < 132) continue;

float conf = handpose[131];

var bbox = new Rect(

(int)handpose[0], (int)handpose[1],

(int)(handpose[2] - handpose[0]),

(int)(handpose[3] - handpose[1])

);

float handedness = handpose[130];

string handednessText = handedness <= 0.5f ? "Left" : "Right";

// Extract landmarks

var landmarksScreen = new List<Point>();

for (int i = 0; i < 21; i++)

{

landmarksScreen.Add(new Point(

(int)handpose[4 + i * 3],

(int)handpose[4 + i * 3 + 1]

));

}

var landmarksWorld = new List<Point3f>();

for (int i = 0; i < 21; i++)

{

landmarksWorld.Add(new Point3f(

handpose[67 + i * 3],

handpose[67 + i * 3 + 1],

handpose[67 + i * 3 + 2]

));

}

// Classify gesture

string gesture = gestureClassifier.Classify(landmarksScreen);

// Print results

if (printResult)

{

Console.WriteLine($"-----------hand {idx + 1}-----------");

Console.WriteLine($"conf: {conf:F2}");

Console.WriteLine($"handedness: {handednessText}");

Console.WriteLine($"gesture: {gesture}");

Console.WriteLine($"hand box: [{bbox.X}, {bbox.Y}, {bbox.X + bbox.Width}, {bbox.Y + bbox.Height}]");

Console.WriteLine("hand landmarks: ");

for (int i = 0; i < landmarksScreen.Count; i++)

{

Console.WriteLine($"\t[{landmarksScreen[i].X}, {landmarksScreen[i].Y}, {handpose[4 + i * 3 + 2]:F2}]");

}

}

// Draw bounding box

Cv2.Rectangle(displayScreen, bbox, new Scalar(0, 255, 0), 2);

// Draw handedness and gesture

Cv2.PutText(displayScreen, handednessText, new Point(bbox.X, bbox.Y + 12),

HersheyFonts.HersheyDuplex, 0.5, new Scalar(0, 0, 255));

Cv2.PutText(displayScreen, gesture, new Point(bbox.X, bbox.Y + 30),

HersheyFonts.HersheyDuplex, 0.5, new Scalar(0, 0, 255));

// Draw hand skeleton

DrawHandSkeleton(displayScreen, landmarksScreen, false);

// Draw landmarks with depth-based size

for (int i = 0; i < landmarksScreen.Count; i++)

{

float depth = handpose[4 + i * 3 + 2];

int radius = Math.Max(5 - (int)(depth / 5), 0);

radius = Math.Min(radius, 14);

Cv2.Circle(displayScreen, landmarksScreen[i], radius, new Scalar(0, 0, 255), -1);

}

// Draw 3D views (only for first hand)

if (!isDraw)

{

isDraw = true;

Draw3DViews(display3d, landmarksWorld);

}

}

return (displayScreen, display3d);

}

private static void DrawHandSkeleton(Mat image, List<Point> landmarks, bool drawPoints = true, int thickness = 2)

{

// Define connections between landmarks

var connections = new List<(int, int)>

{

(0, 1), (1, 2), (2, 3), (3, 4), // Thumb

(0, 5), (5, 6), (6, 7), (7, 8), // Index finger

(0, 9), (9, 10), (10, 11), (11, 12), // Middle finger

(0, 13), (13, 14), (14, 15), (15, 16), // Ring finger

(0, 17), (17, 18), (18, 19), (19, 20) // Pinky finger

};

// Draw connections

foreach (var (start, end) in connections)

{

if (start < landmarks.Count && end < landmarks.Count)

{

Cv2.Line(image, landmarks[start], landmarks[end], new Scalar(255, 255, 255), thickness);

}

}

// Draw points

if (drawPoints)

{

foreach (var point in landmarks)

{

Cv2.Circle(image, point, thickness, new Scalar(0, 0, 255), -1);

}

}

}

private static void Draw3DViews(Mat display3d, List<Point3f> landmarksWorld)

{

// Main view (XY plane)

var landmarksXY = landmarksWorld.Select(p => new Point(

(int)(p.X * 1000 + 100),

(int)(p.Y * 1000 + 100)

)).ToList();

DrawHandSkeleton(display3d, landmarksXY, false, 5);

// Top view (XZ plane)

var landmarksXZ = landmarksWorld.Select(p => new Point(

(int)(p.X * 1000 + 300),

(int)(-p.Z * 1000 + 100)

)).ToList();

DrawHandSkeleton(display3d, landmarksXZ, false, 5);

// Left view (ZY plane)

var landmarksZY = landmarksWorld.Select(p => new Point(

(int)(-p.Z * 1000 + 100),

(int)(p.Y * 1000 + 300)

)).ToList();

DrawHandSkeleton(display3d, landmarksZY, false, 5);

// Right view (YZ plane)

var landmarksYZ = landmarksWorld.Select(p => new Point(

(int)(p.Z * 1000 + 300),

(int)(p.Y * 1000 + 300)

)).ToList();

DrawHandSkeleton(display3d, landmarksYZ, false, 5);

}

}

}

///GestureClassification.cs

using OpenCvSharp;

using System;

using System.Collections.Generic;

using System.Linq;

namespace yolo_world_opencvsharp_net4._8

{

public class GestureClassification

{

private double Vector2Angle(Point v1, Point v2)

{

double normV1 = Math.Sqrt(v1.X * v1.X + v1.Y * v1.Y);

double normV2 = Math.Sqrt(v2.X * v2.X + v2.Y * v2.Y);

if (normV1 == 0 || normV2 == 0) return 0;

double dotProduct = v1.X * v2.X + v1.Y * v2.Y;

double cosAngle = dotProduct / (normV1 * normV2);

cosAngle = Math.Max(-1, Math.Min(1, cosAngle)); // Clamp to avoid floating point errors

double angle = Math.Acos(cosAngle) * (180.0 / Math.PI);

return angle;

}

private List<double> HandAngle(List<Point> hand)

{

var angleList = new List<double>();

// thumb

var angle1 = Vector2Angle(

new Point(hand[0].X - hand[2].X, hand[0].Y - hand[2].Y),

new Point(hand[3].X - hand[4].X, hand[3].Y - hand[4].Y)

);

angleList.Add(angle1);

// index

var angle2 = Vector2Angle(

new Point(hand[0].X - hand[6].X, hand[0].Y - hand[6].Y),

new Point(hand[7].X - hand[8].X, hand[7].Y - hand[8].Y)

);

angleList.Add(angle2);

// middle

var angle3 = Vector2Angle(

new Point(hand[0].X - hand[10].X, hand[0].Y - hand[10].Y),

new Point(hand[11].X - hand[12].X, hand[11].Y - hand[12].Y)

);

angleList.Add(angle3);

// ring

var angle4 = Vector2Angle(

new Point(hand[0].X - hand[14].X, hand[0].Y - hand[14].Y),

new Point(hand[15].X - hand[16].X, hand[15].Y - hand[16].Y)

);

angleList.Add(angle4);

// pinky

var angle5 = Vector2Angle(

new Point(hand[0].X - hand[18].X, hand[0].Y - hand[18].Y),

new Point(hand[19].X - hand[20].X, hand[19].Y - hand[20].Y)

);

angleList.Add(angle5);

return angleList;

}

private List<bool> FingerStatus(List<Point> landmarks)

{

var fingerList = new List<bool>();

var origin = landmarks[0];

var keypointList = new List<(int, int)> { (5, 4), (6, 8), (10, 12), (14, 16), (18, 20) };

foreach (var (point1, point2) in keypointList)

{

var p1 = landmarks[point1];

var p2 = landmarks[point2];

double dist1 = Math.Sqrt(Math.Pow(p1.X - origin.X, 2) + Math.Pow(p1.Y - origin.Y, 2));

double dist2 = Math.Sqrt(Math.Pow(p2.X - origin.X, 2) + Math.Pow(p2.Y - origin.Y, 2));

fingerList.Add(dist2 > dist1);

}

return fingerList;

}

private string ClassifyGesture(List<Point> hand)

{

double thrAngle = 65.0;

double thrAngleThumb = 30.0;

double thrAngleS = 49.0;

string gestureStr = "Undefined";

var angleList = HandAngle(hand);

var fingerStatus = FingerStatus(hand);

bool thumbOpen = fingerStatus[0];

bool firstOpen = fingerStatus[1];

bool secondOpen = fingerStatus[2];

bool thirdOpen = fingerStatus[3];

bool fourthOpen = fingerStatus[4];

// Number gestures

if (angleList[0] > thrAngleThumb && angleList[1] > thrAngle && angleList[2] > thrAngle &&

angleList[3] > thrAngle && angleList[4] > thrAngle &&

!firstOpen && !secondOpen && !thirdOpen && !fourthOpen)

{

gestureStr = "Zero";

}

else if (angleList[0] > thrAngleThumb && angleList[1] < thrAngleS && angleList[2] > thrAngle &&

angleList[3] > thrAngle && angleList[4] > thrAngle &&

firstOpen && !secondOpen && !thirdOpen && !fourthOpen)

{

gestureStr = "One";

}

else if (angleList[0] > thrAngleThumb && angleList[1] < thrAngleS && angleList[2] < thrAngleS &&

angleList[3] > thrAngle && angleList[4] > thrAngle &&

!thumbOpen && firstOpen && secondOpen && !thirdOpen && !fourthOpen)

{

gestureStr = "Two";

}

else if (angleList[0] > thrAngleThumb && angleList[1] < thrAngleS && angleList[2] < thrAngleS &&

angleList[3] < thrAngleS && angleList[4] > thrAngle &&

!thumbOpen && firstOpen && secondOpen && thirdOpen && !fourthOpen)

{

gestureStr = "Three";

}

else if (angleList[0] > thrAngleThumb && angleList[1] < thrAngleS && angleList[2] < thrAngleS &&

angleList[3] < thrAngleS && angleList[4] < thrAngle &&

firstOpen && secondOpen && thirdOpen && fourthOpen)

{

gestureStr = "Four";

}

else if (angleList[0] < thrAngleS && angleList[1] < thrAngleS && angleList[2] < thrAngleS &&

angleList[3] < thrAngleS && angleList[4] < thrAngleS &&

thumbOpen && firstOpen && secondOpen && thirdOpen && fourthOpen)

{

gestureStr = "Five";

}

// Add more gesture classifications as needed...

return gestureStr;

}

public string Classify(List<Point> landmarks)

{

if (landmarks.Count < 21) return "Undefined";

return ClassifyGesture(landmarks);

}

}

}

使用方法示例:

private void button3_Click(object sender, EventArgs e)

{

// 使用手部Pose检测的完整流程

string palmModelPath = @"D:\opencvsharp\1\opencv_zoo-main\opencv_zoo-main\models\palm_detection_mediapipe\palm_detection_mediapipe_2023feb.onnx";

string handPoseModelPath = @"D:\opencvsharp\1\opencv_zoo-main\opencv_zoo-main\models\handpose_estimation_mediapipe\handpose_estimation_mediapipe_2023feb.onnx";

using (var palmDetector = new PalmDetector(palmModelPath, 0.3f, 0.5f))

using (var handPoseDetector = new MPHandPose(handPoseModelPath, 0.7f))

using (var capture = new VideoCapture(0))

{

while (true)

{

using (var frame = new Mat())

{

capture.Read(frame);

if (frame.Empty()) break;

// 手掌检测

var palms = palmDetector.Infer(frame);

var hands = new List<List<float>>();

// 对每个检测到的手掌进行姿态估计

foreach (var palm in palms)

{

var handPose = handPoseDetector.Infer(frame, palm);

if (handPose != null && handPose.Count >= 132)

{

hands.Add(handPose);

}

}

// 可视化结果

var (displayScreen, display3d) = HandPoseVisualizer.Visualize(frame, hands);

Cv2.ImShow("Hand Pose Detection", displayScreen);

Cv2.ImShow("3D Hand Pose", display3d);

displayScreen.Dispose();

display3d.Dispose();

if (Cv2.WaitKey(1) >= 0) break;

}

}

}

}

相关推荐
集成显卡2 小时前
AI取名大师 | PM2 部署 Bun.js 应用及配置 Let‘s Encrypt 免费 HTTPS 证书
开发语言·javascript·人工智能
AI小云2 小时前
【Numpy数据运算】数组间运算
开发语言·python·numpy
baivfhpwxf20232 小时前
SQL Server 服务端如何在其他电脑连接
c#
是苏浙2 小时前
零基础入门C语言之C语言实现数据结构之单链表经典算法
c语言·开发语言·数据结构·算法
纵有疾風起2 小时前
C++—vector:vecor使用及模拟实现
开发语言·c++·经验分享·开源·stl·vector
Dm_dotnet2 小时前
WPF/C#:使用Microsoft Agent Framework框架创建一个带有审批功能的终端Agent
c#
Dm_dotnet3 小时前
WPF/C#:使用Stylet中的IWindowManager用于显示等待窗体、对话框与消息框
c#
Dm_dotnet3 小时前
OpenCVSharp:ArUco 标记检测与透视变换
opencv
任子菲阳3 小时前
学Java第四十三天——Map双列集合
java·开发语言