通过ML.Net调用yolov5的Onnx模型

本文介绍如何在.Net环境调用yolov5的模型，把yolov5的pt模型转成onnx模型，接着通过Microsoft.ML.OnnxRuntime调用yolov5的onnx模型

1.安装yolov5

复制代码

git clone https://github.com/ultralytics/yolov5.git
cd yolov5
python -m venv venv
.\venv\Scripts\activate
pip install -r requirements.txt

2.安装好环境以后运行示例

2.1下载yolov5模型放到项目根目录，地址：https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5s.pt

2.2 执行以下命令：
python detect.py --weights yolov5s.pt --source data/images/zidane.jpg

输出：

复制代码

data\images\zidane.jpg: 384x640 2 persons, 2 ties, 140.7ms
Speed: 1.0ms pre-process, 140.7ms inference, 1.0ms NMS per image at shape (1, 3, 640, 640)
Results saved to runs\detect\exp

runs/detect/exp可以看输出结果

3.把yolov5.pt转成yolov5.onnx，以下命令会在根目录输出一个yolov5.onnx

python export.py --weights yolov5s.pt --img 640 --batch 1 --device cpu --include onnx

4.获取onnx模型输入输出参数，参数在.net项目会用到

复制代码

import onnx
# 加载 ONNX 模型 
model = onnx.load("yolov5s.onnx")

print("=== 模型输入 ===")
for input_tensor in model.graph.input:
    name = input_tensor.name
    elem_type = input_tensor.type.tensor_type.elem_type
    dims = [d.dim_value for d in input_tensor.type.tensor_type.shape.dim]
    print(f"Name: {name}")
    print(f"  ElementType: {elem_type}")
    print(f"  Dimensions: {dims}")

print("\n=== 模型输出 ===")
for output_tensor in model.graph.output:
    name = output_tensor.name
    elem_type = output_tensor.type.tensor_type.elem_type
    dims = [d.dim_value for d in output_tensor.type.tensor_type.shape.dim]
    print(f"Name: {name}")
    print(f"  ElementType: {elem_type}")
    print(f"  Dimensions: {dims}")

输出结果如下：

5.把onnx放到.Net项目，然后运行项目

下面给出代码：

复制代码

using Microsoft.ML.OnnxRuntime;
using Microsoft.ML.OnnxRuntime.Tensors;
using System;
using System.Collections.Generic;
using System.Drawing;
using System.Linq;
using System.Threading.Channels;

namespace YoloV5SoftNMS
{
    class Program
    {
        static void Main(string[] args)
        {
            string modelPath = "yolov5s.onnx";
            string imagePath = "zidane.jpg";

            int inputWidth = 640;
            int inputHeight = 640;
            float confThreshold = 0.20f; // 保留小目标
            float iouThreshold = 0.45f;   // Soft-NMS IOU阈值
            float sigma = 0.5f;          // Soft-NMS 高斯衰减参数

            // 1️. 预处理
            var (inputTensor, padX, padY, scale, origWidth, origHeight) = PreprocessImage(imagePath, inputWidth, inputHeight);

            // 2️. ONNX 推理
            using var session = new InferenceSession(modelPath);
            var inputs = new List<NamedOnnxValue>
            {
                NamedOnnxValue.CreateFromTensor("images", inputTensor)//这里面images就是前面模型输入参数
            };
            using var results = session.Run(inputs);
            var outputTensor = results.First(r => r.Name == "output0").AsTensor<float>(); //这里面output0对应前面模型输出参数

            var output = outputTensor.ToArray();

            // 3️. 解析输出
            var predictions = ParseOutputs(output, 80, confThreshold);

            Console.WriteLine("=== NMS 前候选框 ===");
            foreach (var p in predictions)
                Console.WriteLine($"Class:{p.ClassId} Conf:{p.Confidence:0.00} X1:{p.X1:0.} Y1:{p.Y1:0.} X2:{p.X2:0.} Y2:{p.Y2:0.}");

            // 4️. Soft-NMS
            var finalBoxes = SoftNMS(predictions, iouThreshold, sigma, confThreshold);

            Console.WriteLine("=== Soft-NMS 结果 ===");
            foreach (var p in finalBoxes)
                Console.WriteLine($"Class:{p.ClassId} Conf:{p.Confidence:0.00} X1:{p.X1:0.} Y1:{p.Y1:0.} X2:{p.X2:0.} Y2:{p.Y2:0.}");

            // 5️. 绘制回原图
            using var bitmap = new Bitmap(imagePath);
            using var g = Graphics.FromImage(bitmap);
            foreach (var p in finalBoxes)
            {
                float x1 = (p.X1 - padX) / scale;
                float y1 = (p.Y1 - padY) / scale;
                float x2 = (p.X2 - padX) / scale;
                float y2 = (p.Y2 - padY) / scale;

                g.DrawRectangle(Pens.Red, x1, y1, x2 - x1, y2 - y1);
                g.DrawString($"ID:{p.ClassId} {p.Confidence:0.00}", new Font("Arial", 12), Brushes.Yellow, x1, y1 - 16);
            }

            bitmap.Save("result.jpg");
            Console.WriteLine("完成，结果保存为 result.jpg");
            Console.ReadKey();
        }

        /// <summary>
        /// 处理图片，把图片转成按CHW排布的一维数组
        /// </summary>
        /// <param name="imagePath"></param>
        /// <param name="targetWidth"></param>
        /// <param name="targetHeight"></param>
        /// <returns></returns>
        static (DenseTensor<float>, float, float, float, int, int) PreprocessImage(string imagePath, int targetWidth, int targetHeight)
        {
            using var bitmap = new Bitmap(imagePath);
            int origW = bitmap.Width;
            int origH = bitmap.Height;

            float scale = Math.Min(targetWidth / (float)origW, targetHeight / (float)origH);
            int newW = (int)(origW * scale);
            int newH = (int)(origH * scale);

            float padX = (targetWidth - newW) / 2f;
            float padY = (targetHeight - newH) / 2f;

            using var resized = new Bitmap(targetWidth, targetHeight);
            using (var g = Graphics.FromImage(resized))
            {
                g.FillRectangle(Brushes.Black, 0, 0, targetWidth, targetHeight);
                g.InterpolationMode = System.Drawing.Drawing2D.InterpolationMode.HighQualityBicubic;
                g.DrawImage(bitmap, padX, padY, newW, newH);

            }
            resized.Save("resized_input.jpg");
            float[] data = new float[3 * targetWidth * targetHeight];
            for (int y = 0; y < targetHeight; y++)
            {
                for (int x = 0; x < targetWidth; x++)
                {
                    var color = resized.GetPixel(x, y);
                    int idx = y * targetWidth + x;
                    data[idx] = color.R / 255f;
                    data[targetWidth * targetHeight + idx] = color.G / 255f;
                    data[2 * targetWidth * targetHeight + idx] = color.B / 255f;
                }
            }
             
             //DenseTensor 是 ML.NET / ONNX Runtime 用来表示张量的数据结构
            // 第一个参数 data → 一维浮点数组
            // 第二个参数[1, 3, H, W] → 张量形状：
            //1 → batch = 1，一次输入 1 张图片
            //3 → RGB 通道
            //targetHeight / targetWidth → 缩放后图片大小
            //张量会自动按照 CHW 排布，把一维数组映射成 4 维张量[batch, channel, height, width]，刚好对应 YOLOv5 ONNX 模型输入。
            var tensor = new DenseTensor<float>(data, new int[] { 1, 3, targetHeight, targetWidth });
            return (tensor, padX, padY, scale, origW, origH);
        }

        /// <summary>
        /// 对输出的可信度进行过滤
        /// </summary>
        /// <param name="output"></param>
        /// <param name="numClasses"></param>
        /// <param name="confThreshold"></param>
        /// <returns></returns>
        static List<YoloPrediction> ParseOutputs(float[] output, int numClasses, float confThreshold)
        {
            int numBoxes = output.Length / (5 + numClasses);
            var predictions = new List<YoloPrediction>();

            for (int i = 0; i < numBoxes; i++)
            {
                int offset = i * (5 + numClasses);
                float x = output[offset];
                float y = output[offset + 1];
                float w = output[offset + 2];
                float h = output[offset + 3];
                float objConf = output[offset + 4];

                float maxClassConf = 0;
                int classId = 0;
                for (int c = 0; c < numClasses; c++)
                {
                    float classConf = output[offset + 5 + c];
                    if (classConf > maxClassConf)
                    {
                        maxClassConf = classConf;
                        classId = c;
                    }
                }

                float conf = objConf * maxClassConf;
                if (conf > confThreshold)
                {
                    predictions.Add(new YoloPrediction
                    {
                        X1 = x - w / 2,
                        Y1 = y - h / 2,
                        X2 = x + w / 2,
                        Y2 = y + h / 2,
                        Confidence = conf,
                        ClassId = classId
                    });
                }
            }

            return predictions;
        }

        /// <summary>
        /// 对一组候选框执行 Soft Non-Maximum Suppression (Soft-NMS)，减少重叠框的冗余，但不像传统 NMS 那样直接丢弃，而是降低它们的置信度。
        /// </summary>
        /// <param name="boxes"></param>
        /// <param name="iouThreshold"></param>
        /// <param name="sigma"></param>
        /// <param name="confThreshold"></param>
        /// <returns></returns>
        static List<YoloPrediction> SoftNMS(List<YoloPrediction> boxes, float iouThreshold, float sigma, float confThreshold)
        {
            var dets = boxes.OrderByDescending(b => b.Confidence).ToList();
            var keep = new List<YoloPrediction>();

            while (dets.Count > 0)
            {
                var maxBox = dets[0];
                keep.Add(maxBox);
                dets.RemoveAt(0);

                for (int i = 0; i < dets.Count; i++)
                {
                    float iou = IoU(maxBox, dets[i]);
                    if (iou > iouThreshold)
                        dets[i].Confidence *= (float)Math.Exp(-iou * iou / sigma);
                }

                dets = dets.Where(b => b.Confidence > confThreshold).OrderByDescending(b => b.Confidence).ToList();
            }

            return keep;
        }
        /// <summary>
        /// 计算两个框之间的 IoU（Intersection over Union, 交并比）
        /// </summary>
        /// <param name="a"></param>
        /// <param name="b"></param>
        /// <returns></returns>
        static float IoU(YoloPrediction a, YoloPrediction b)
        {
            float x1 = Math.Max(a.X1, b.X1);
            float y1 = Math.Max(a.Y1, b.Y1);
            float x2 = Math.Min(a.X2, b.X2);
            float y2 = Math.Min(a.Y2, b.Y2);

            float interArea = Math.Max(0, x2 - x1) * Math.Max(0, y2 - y1);
            float unionArea = (a.X2 - a.X1) * (a.Y2 - a.Y1) + (b.X2 - b.X1) * (b.Y2 - b.Y1) - interArea;
            return interArea / unionArea;
        }

        class YoloPrediction
        {
            public float X1, Y1, X2, Y2;
            public float Confidence;
            public int ClassId;
        }
    }
}

5.输出结果

6.输出对应的类别

yolov5模型可以识别80种类别，可以根据id找到对应的类别，把这个类别放到.Net项目就可以了，可以看到id为0是person，id为27是tie

7.关于训练属于自己业务数据的yolo模型

yolo是可以训练自己的业务数据的，准备好相关数据集从标注到识别，具体可参考以下文章：

1.https://github.com/ultralytics/yolov5

2.https://blog.csdn.net/qq_45701791/article/details/113992622 这篇实测过确实是可以，哆啦A梦的数据集自己在百度搜的

8.聊点其他的

不同的业务对于yolo的使用不一样，比如商场、车站统计人数、害虫识别这些业务只要识别出来了就行；对于证件信息识别这些可以先借助yolo把证件的信息定位好，然后再通过OCR识别图片能快速获取身份证各个位置的信息完成整个流程。