本文介绍如何在.Net环境调用yolov5的模型,把yolov5的pt模型转成onnx模型,接着通过Microsoft.ML.OnnxRuntime调用yolov5的onnx模型
1.安装yolov5
git clone https://github.com/ultralytics/yolov5.git
cd yolov5
python -m venv venv
.\venv\Scripts\activate
pip install -r requirements.txt
2.安装好环境以后运行示例
2.1下载yolov5模型放到项目根目录,地址:https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5s.pt
2.2 执行以下命令:
python detect.py --weights yolov5s.pt --source data/images/zidane.jpg
输出:
data\images\zidane.jpg: 384x640 2 persons, 2 ties, 140.7ms
Speed: 1.0ms pre-process, 140.7ms inference, 1.0ms NMS per image at shape (1, 3, 640, 640)
Results saved to runs\detect\exp
runs/detect/exp可以看输出结果

3.把yolov5.pt转成yolov5.onnx,以下命令会在根目录输出一个yolov5.onnx
python export.py --weights yolov5s.pt --img 640 --batch 1 --device cpu --include onnx
4.获取onnx模型输入输出参数,参数在.net项目会用到
import onnx
# 加载 ONNX 模型
model = onnx.load("yolov5s.onnx")
print("=== 模型输入 ===")
for input_tensor in model.graph.input:
name = input_tensor.name
elem_type = input_tensor.type.tensor_type.elem_type
dims = [d.dim_value for d in input_tensor.type.tensor_type.shape.dim]
print(f"Name: {name}")
print(f" ElementType: {elem_type}")
print(f" Dimensions: {dims}")
print("\n=== 模型输出 ===")
for output_tensor in model.graph.output:
name = output_tensor.name
elem_type = output_tensor.type.tensor_type.elem_type
dims = [d.dim_value for d in output_tensor.type.tensor_type.shape.dim]
print(f"Name: {name}")
print(f" ElementType: {elem_type}")
print(f" Dimensions: {dims}")
输出结果如下:

5.把onnx放到.Net项目,然后运行项目
下面给出代码:
using Microsoft.ML.OnnxRuntime;
using Microsoft.ML.OnnxRuntime.Tensors;
using System;
using System.Collections.Generic;
using System.Drawing;
using System.Linq;
using System.Threading.Channels;
namespace YoloV5SoftNMS
{
class Program
{
static void Main(string[] args)
{
string modelPath = "yolov5s.onnx";
string imagePath = "zidane.jpg";
int inputWidth = 640;
int inputHeight = 640;
float confThreshold = 0.20f; // 保留小目标
float iouThreshold = 0.45f; // Soft-NMS IOU阈值
float sigma = 0.5f; // Soft-NMS 高斯衰减参数
// 1️. 预处理
var (inputTensor, padX, padY, scale, origWidth, origHeight) = PreprocessImage(imagePath, inputWidth, inputHeight);
// 2️. ONNX 推理
using var session = new InferenceSession(modelPath);
var inputs = new List<NamedOnnxValue>
{
NamedOnnxValue.CreateFromTensor("images", inputTensor)//这里面images就是前面模型输入参数
};
using var results = session.Run(inputs);
var outputTensor = results.First(r => r.Name == "output0").AsTensor<float>(); //这里面output0对应前面模型输出参数
var output = outputTensor.ToArray();
// 3️. 解析输出
var predictions = ParseOutputs(output, 80, confThreshold);
Console.WriteLine("=== NMS 前候选框 ===");
foreach (var p in predictions)
Console.WriteLine($"Class:{p.ClassId} Conf:{p.Confidence:0.00} X1:{p.X1:0.} Y1:{p.Y1:0.} X2:{p.X2:0.} Y2:{p.Y2:0.}");
// 4️. Soft-NMS
var finalBoxes = SoftNMS(predictions, iouThreshold, sigma, confThreshold);
Console.WriteLine("=== Soft-NMS 结果 ===");
foreach (var p in finalBoxes)
Console.WriteLine($"Class:{p.ClassId} Conf:{p.Confidence:0.00} X1:{p.X1:0.} Y1:{p.Y1:0.} X2:{p.X2:0.} Y2:{p.Y2:0.}");
// 5️. 绘制回原图
using var bitmap = new Bitmap(imagePath);
using var g = Graphics.FromImage(bitmap);
foreach (var p in finalBoxes)
{
float x1 = (p.X1 - padX) / scale;
float y1 = (p.Y1 - padY) / scale;
float x2 = (p.X2 - padX) / scale;
float y2 = (p.Y2 - padY) / scale;
g.DrawRectangle(Pens.Red, x1, y1, x2 - x1, y2 - y1);
g.DrawString($"ID:{p.ClassId} {p.Confidence:0.00}", new Font("Arial", 12), Brushes.Yellow, x1, y1 - 16);
}
bitmap.Save("result.jpg");
Console.WriteLine("完成,结果保存为 result.jpg");
Console.ReadKey();
}
/// <summary>
/// 处理图片,把图片转成按CHW排布的一维数组
/// </summary>
/// <param name="imagePath"></param>
/// <param name="targetWidth"></param>
/// <param name="targetHeight"></param>
/// <returns></returns>
static (DenseTensor<float>, float, float, float, int, int) PreprocessImage(string imagePath, int targetWidth, int targetHeight)
{
using var bitmap = new Bitmap(imagePath);
int origW = bitmap.Width;
int origH = bitmap.Height;
float scale = Math.Min(targetWidth / (float)origW, targetHeight / (float)origH);
int newW = (int)(origW * scale);
int newH = (int)(origH * scale);
float padX = (targetWidth - newW) / 2f;
float padY = (targetHeight - newH) / 2f;
using var resized = new Bitmap(targetWidth, targetHeight);
using (var g = Graphics.FromImage(resized))
{
g.FillRectangle(Brushes.Black, 0, 0, targetWidth, targetHeight);
g.InterpolationMode = System.Drawing.Drawing2D.InterpolationMode.HighQualityBicubic;
g.DrawImage(bitmap, padX, padY, newW, newH);
}
resized.Save("resized_input.jpg");
float[] data = new float[3 * targetWidth * targetHeight];
for (int y = 0; y < targetHeight; y++)
{
for (int x = 0; x < targetWidth; x++)
{
var color = resized.GetPixel(x, y);
int idx = y * targetWidth + x;
data[idx] = color.R / 255f;
data[targetWidth * targetHeight + idx] = color.G / 255f;
data[2 * targetWidth * targetHeight + idx] = color.B / 255f;
}
}
//DenseTensor 是 ML.NET / ONNX Runtime 用来表示张量的数据结构
// 第一个参数 data → 一维浮点数组
// 第二个参数[1, 3, H, W] → 张量形状:
//1 → batch = 1,一次输入 1 张图片
//3 → RGB 通道
//targetHeight / targetWidth → 缩放后图片大小
//张量会自动按照 CHW 排布,把一维数组映射成 4 维张量[batch, channel, height, width],刚好对应 YOLOv5 ONNX 模型输入。
var tensor = new DenseTensor<float>(data, new int[] { 1, 3, targetHeight, targetWidth });
return (tensor, padX, padY, scale, origW, origH);
}
/// <summary>
/// 对输出的可信度进行过滤
/// </summary>
/// <param name="output"></param>
/// <param name="numClasses"></param>
/// <param name="confThreshold"></param>
/// <returns></returns>
static List<YoloPrediction> ParseOutputs(float[] output, int numClasses, float confThreshold)
{
int numBoxes = output.Length / (5 + numClasses);
var predictions = new List<YoloPrediction>();
for (int i = 0; i < numBoxes; i++)
{
int offset = i * (5 + numClasses);
float x = output[offset];
float y = output[offset + 1];
float w = output[offset + 2];
float h = output[offset + 3];
float objConf = output[offset + 4];
float maxClassConf = 0;
int classId = 0;
for (int c = 0; c < numClasses; c++)
{
float classConf = output[offset + 5 + c];
if (classConf > maxClassConf)
{
maxClassConf = classConf;
classId = c;
}
}
float conf = objConf * maxClassConf;
if (conf > confThreshold)
{
predictions.Add(new YoloPrediction
{
X1 = x - w / 2,
Y1 = y - h / 2,
X2 = x + w / 2,
Y2 = y + h / 2,
Confidence = conf,
ClassId = classId
});
}
}
return predictions;
}
/// <summary>
/// 对一组候选框执行 Soft Non-Maximum Suppression (Soft-NMS),减少重叠框的冗余,但不像传统 NMS 那样直接丢弃,而是降低它们的置信度。
/// </summary>
/// <param name="boxes"></param>
/// <param name="iouThreshold"></param>
/// <param name="sigma"></param>
/// <param name="confThreshold"></param>
/// <returns></returns>
static List<YoloPrediction> SoftNMS(List<YoloPrediction> boxes, float iouThreshold, float sigma, float confThreshold)
{
var dets = boxes.OrderByDescending(b => b.Confidence).ToList();
var keep = new List<YoloPrediction>();
while (dets.Count > 0)
{
var maxBox = dets[0];
keep.Add(maxBox);
dets.RemoveAt(0);
for (int i = 0; i < dets.Count; i++)
{
float iou = IoU(maxBox, dets[i]);
if (iou > iouThreshold)
dets[i].Confidence *= (float)Math.Exp(-iou * iou / sigma);
}
dets = dets.Where(b => b.Confidence > confThreshold).OrderByDescending(b => b.Confidence).ToList();
}
return keep;
}
/// <summary>
/// 计算两个框之间的 IoU(Intersection over Union, 交并比)
/// </summary>
/// <param name="a"></param>
/// <param name="b"></param>
/// <returns></returns>
static float IoU(YoloPrediction a, YoloPrediction b)
{
float x1 = Math.Max(a.X1, b.X1);
float y1 = Math.Max(a.Y1, b.Y1);
float x2 = Math.Min(a.X2, b.X2);
float y2 = Math.Min(a.Y2, b.Y2);
float interArea = Math.Max(0, x2 - x1) * Math.Max(0, y2 - y1);
float unionArea = (a.X2 - a.X1) * (a.Y2 - a.Y1) + (b.X2 - b.X1) * (b.Y2 - b.Y1) - interArea;
return interArea / unionArea;
}
class YoloPrediction
{
public float X1, Y1, X2, Y2;
public float Confidence;
public int ClassId;
}
}
}
5.输出结果

6.输出对应的类别
yolov5模型可以识别80种类别,可以根据id找到对应的类别,把这个类别放到.Net项目就可以了,可以看到id为0是person,id为27是tie

7.关于训练属于自己业务数据的yolo模型
yolo是可以训练自己的业务数据的,准备好相关数据集从标注到识别,具体可参考以下文章:
1.https://github.com/ultralytics/yolov5
2.https://blog.csdn.net/qq_45701791/article/details/113992622 这篇实测过确实是可以,哆啦A梦的数据集自己在百度搜的
8.聊点其他的
不同的业务对于yolo的使用不一样,比如商场、车站统计人数、害虫识别这些业务只要识别出来了就行;对于证件信息识别这些可以先借助yolo把证件的信息定位好,然后再通过OCR识别图片能快速获取身份证各个位置的信息完成整个流程。