yolov8使用 - 技术栈

导出onnx yolo export model=D:/zou/wjzz/workDoc/screwsDetech/runs/detect/train/weights/best.pt format=onnx opset=12 dynamic=True simplify=True

内容	解释
蓝色 Conv	卷积层，提取特征
黑色 Add / Mul	做位置偏移、网格映射
绿色 Gather / Shape	从 shape 里提取宽、高等维度信息
红色 Concat / Reshape	拼接张量、变形输出
紫色 Softmax / Transpose	用于分类概率或格式变换

因为这部分在 ONNX 里：

很难理解 / 不好调试
不利于优化和加速
有时还不能使用 GPU 推理

所以很多人导出的时候会加上 export(onnx, simplify=True, dynamic=False, nms=False)，再用自己写的代码（像你 Python 写的那样）做后处理。

✅ 什么是张量（Tensor）？

张量（Tensor）就是一个多维数组。你可以把它看作是：

维度	举例	类比（在 NumPy 中）
标量（0维）	`3.14`	`np.array(3.14)`
向量（1维）	`[1, 2, 3]`	`np.array([1, 2, 3])`
矩阵（2维）	`[[1,2],[3,4]]`	`np.array([[1,2],[3,4]])`
三维张量（3维）	彩色图像（H, W, C）	`img[height][width][channel]`
四维张量（4维）	多张图片：`[batch, channel, height, width]`	`img[batch][channel][H][W]`

所以你可以记住一句话： 张量 = "有维度的数组"，是用来装图片、特征图、模型输入输出的容器。

✅ NCHW 和 NHWC 是什么？

它们是 图像数据的维度排列方式，在模型输入输出中经常见到。

格式含义解释举例（YOLO）

NCHW Batch, Channels, Height, Width PyTorch 默认格式 [1, 3, 640, 640]

NHWC Batch, Height, Width, Channels TensorFlow 默认格式 [1, 640, 640, 3]

比如，一张 RGB 图片输入模型时：

NCHW ：先通道、后高宽：[1, 3, 640, 640]

NHWC ：先高宽、后通道：[1, 640, 640, 3]

格式	含义	解释	举例（YOLO）
NCHW	Batch, Channels, Height, Width	PyTorch 默认格式	`[1, 3, 640, 640]`
NHWC	Batch, Height, Width, Channels	TensorFlow 默认格式	`[1, 640, 640, 3]`

假设你有 100 张图片要输入模型，但你的显卡内存有限，不能一次处理 100 张，那就可以每次处理 16 张，这个 16 就是 Batch Size（N）。

NCHW: (16, 3, 640, 640)

N = 16：一次处理 16 张图片

C = 3：每张图有 3 个通道（RGB）

H = 640：高度 640 像素

W = 640：宽度 640 像素

已经可以跑通的程序、没有优化

using OpenCvSharp; using Microsoft.ML.OnnxRuntime; using Microsoft.ML.OnnxRuntime.Tensors; using System; using System.Linq;

class Program { static void Main(string[] args) { // 1. 加载模型 var session = new InferenceSession(@"D:\zou\wjzz\workDoc\screwsDetech\runs\detect\train\weights\best.onnx");

复制代码

    // 2. 读取图片
    string imgPath = @"D:\zou\wjzz\workDoc\screwsDetech\test_images\screw_keshang_bad_130.png";
    Mat img = Cv2.ImRead(imgPath);
    Mat originalImg = img.Clone();
    int h0 = img.Rows;
    int w0 = img.Cols;

    // 3. 图片预处理：resize到640x640，BGR转RGB，归一化
    Mat imgResized = new Mat();
    Cv2.Resize(img, imgResized, new Size(640, 640));
    Cv2.CvtColor(imgResized, imgResized, ColorConversionCodes.BGR2RGB);
    imgResized.ConvertTo(imgResized, MatType.CV_32FC3, 1.0 / 255);

    // 4. HWC转CHW，并变成1x3x640x640的Tensor
    var inputData = new float[1 * 3 * 640 * 640];
    for (int y = 0; y < 640; y++)
    {
        for (int x = 0; x < 640; x++)
        {
            Vec3f pixel = imgResized.At<Vec3f>(y, x);
            inputData[0 * 3 * 640 * 640 + 0 * 640 * 640 + y * 640 + x] = pixel.Item0; // R从0开始
            inputData[0 * 3 * 640 * 640 + 1 * 640 * 640 + y * 640 + x] = pixel.Item1; // G从640*640
            inputData[0 * 3 * 640 * 640 + 2 * 640 * 640 + y * 640 + x] = pixel.Item2; // B从640*640*2
        }
    }
    var inputTensor = new DenseTensor<float>(inputData, new[] { 1, 3, 640, 640 });

    // 5. 推理
    var inputs = new List<NamedOnnxValue>
    {
        NamedOnnxValue.CreateFromTensor(session.InputMetadata.Keys.First(), inputTensor)
    };
    using var results = session.Run(inputs);
    var output = results.First().AsTensor<float>();

    // 6. 解析输出
    var dims = output.Dimensions; // 应该是 (1, 5, N)，比如(1,5,8400)
    int numPreds = dims[2];
    float[,] preds = new float[numPreds, 5];
    for (int i = 0; i < 5; i++)
    {
        for (int j = 0; j < numPreds; j++)
        {
            preds[j, i] = output[0, i, j];
        }
    }

    // 7. 按置信度筛选
    float confThreshold = 0.25f;
    var boxes = new List<Rect2d>();
    var scores = new List<float>();
    for (int i = 0; i < numPreds; i++)
    {
        float objness = preds[i, 4];
        if (objness > confThreshold)
        {
            float cx = preds[i, 0];
            float cy = preds[i, 1];
            float w = preds[i, 2];
            float h = preds[i, 3];
            float x1 = cx - w / 2;
            float y1 = cy - h / 2;  
            float x2 = cx + w / 2;
            float y2 = cy + h / 2;

            // 还原到原图尺度
            x1 *= (float)w0 / 640;
            x2 *= (float)w0 / 640;
            y1 *= (float)h0 / 640;
            y2 *= (float)h0 / 640;

            boxes.Add(new Rect2d(x1, y1, x2 - x1, y2 - y1));
            scores.Add(objness);
        }
    }

    // 8. 绘制框
    for (int i = 0; i < boxes.Count; i++)
    {
        var rect = boxes[i];
        Cv2.Rectangle(originalImg, new Point(rect.X, rect.Y), new Point(rect.X + rect.Width, rect.Y + rect.Height), Scalar.Green, 2);
        Cv2.PutText(originalImg, $"{scores[i]:0.00}", new Point((int)rect.X, (int)rect.Y - 10), HersheyFonts.HersheySimplex, 0.5, Scalar.Green, 1);
    }

    // 9. 显示
    Cv2.ImShow("Result", originalImg);
    Cv2.WaitKey();
    Cv2.DestroyAllWindows();
}

}

处理逻辑优化使用NMS（非极大值抑制）

using OpenCvSharp; using Microsoft.ML.OnnxRuntime; using Microsoft.ML.OnnxRuntime.Tensors; using System; using System.Collections.Generic; using System.Linq;

class Program { static readonly int inputWidth = 640; static readonly int inputHeight = 640; static readonly float confThreshold = 0.25f; static readonly float iouThreshold = 0.45f;

复制代码

static readonly InferenceSession session = new InferenceSession(@"D:\zou\wjzz\workDoc\screwsDetech\runs\detect\train\weights\best.onnx");

static void Main(string[] args)
{
    string imgPath = @"D:\zou\wjzz\workDoc\screwsDetech\test_images\screw_keshang_bad_130.png";
    Mat img = Cv2.ImRead(imgPath);
    Mat originalImg = img.Clone();

    int h0 = img.Rows;
    int w0 = img.Cols;

    var inputTensor = Preprocess(img);
    var output = Inference(inputTensor);
    var (boxes, scores) = Postprocess(output, w0, h0);
    var keepIdx = NMS(boxes, scores, iouThreshold);

    foreach (int i in keepIdx)
    {
        var box = boxes[i];
        Cv2.Rectangle(originalImg, new Point(box.X, box.Y), new Point(box.X + box.Width, box.Y + box.Height), Scalar.Green, 2);
        Cv2.PutText(originalImg, $"{scores[i]:0.00}", new Point((int)box.X, (int)box.Y - 10), HersheyFonts.HersheySimplex, 0.5, Scalar.Green, 1);
    }

    Cv2.ImShow("Result", originalImg);
    Cv2.WaitKey();
    Cv2.DestroyAllWindows();
}

static DenseTensor<float> Preprocess(Mat img)
{
    Mat resized = new Mat();
    Cv2.Resize(img, resized, new Size(inputWidth, inputHeight));
    Cv2.CvtColor(resized, resized, ColorConversionCodes.BGR2RGB);
    resized.ConvertTo(resized, MatType.CV_32FC3, 1.0 / 255);

    Mat[] channels = Cv2.Split(resized);
    float[] inputData = new float[3 * inputHeight * inputWidth];
    for (int c = 0; c < 3; c++)
    {
        var indexer = channels[c].GetGenericIndexer<float>();
        for (int y = 0; y < inputHeight; y++)
            for (int x = 0; x < inputWidth; x++)
                inputData[c * inputHeight * inputWidth + y * inputWidth + x] = indexer[y, x];
    }

    return new DenseTensor<float>(inputData, new[] { 1, 3, inputHeight, inputWidth });
}

static Tensor<float> Inference(DenseTensor<float> inputTensor)
{
    var inputs = new List<NamedOnnxValue>
    {
        NamedOnnxValue.CreateFromTensor(session.InputMetadata.Keys.First(), inputTensor)
    };
    using var results = session.Run(inputs);
    return results.First().AsTensor<float>();
}

static (List<Rect2d>, List<float>) Postprocess(Tensor<float> output, int w0, int h0)
{
    var dims = output.Dimensions;
    int numPreds = dims[2];
    var boxes = new List<Rect2d>();
    var scores = new List<float>();

    for (int i = 0; i < numPreds; i++)
    {
        float obj = output[0, 4, i];
        if (obj < confThreshold) continue;

        float cx = output[0, 0, i];
        float cy = output[0, 1, i];
        float w = output[0, 2, i];
        float h = output[0, 3, i];

        float x1 = (cx - w / 2) * w0 / inputWidth;
        float y1 = (cy - h / 2) * h0 / inputHeight;
        float x2 = (cx + w / 2) * w0 / inputWidth;
        float y2 = (cy + h / 2) * h0 / inputHeight;

        boxes.Add(new Rect2d(x1, y1, x2 - x1, y2 - y1));
        scores.Add(obj);
    }

    return (boxes, scores);
}

static List<int> NMS(List<Rect2d> boxes, List<float> scores, float iouThreshold)
{
    var indices = scores
        .Select((score, idx) => new { score, idx })
        .OrderByDescending(s => s.score)
        .Select(s => s.idx)
        .ToList();

    var keep = new List<int>();
    var removed = new bool[boxes.Count];

    for (int i = 0; i < indices.Count; i++)
    {
        int idx = indices[i];
        if (removed[idx]) continue;
        keep.Add(idx);

        for (int j = i + 1; j < indices.Count; j++)
        {
            int idx2 = indices[j];
            if (removed[idx2]) continue;
            if (ComputeIOU(boxes[idx], boxes[idx2]) > iouThreshold)
                removed[idx2] = true;
        }
    }
    return keep;
}

static double ComputeIOU(Rect2d box1, Rect2d box2)
{
    double xx1 = Math.Max(box1.X, box2.X);
    double yy1 = Math.Max(box1.Y, box2.Y);
    double xx2 = Math.Min(box1.X + box1.Width, box2.X + box2.Width);
    double yy2 = Math.Min(box1.Y + box1.Height, box2.Y + box2.Height);

    double interArea = Math.Max(0, xx2 - xx1) * Math.Max(0, yy2 - yy1);
    double unionArea = box1.Width * box1.Height + box2.Width * box2.Height - interArea;

    return interArea / unionArea;
}

}