部署：YOLO V11 TensorRT 推理&前后处理

开源地址：github.com/robofans/yolov11_trt_ros2

功能实现：yolov11混合精度量化、ros2部署

1. 整体流程

YOLO V11 推理分为三个阶段：

复制代码

输入图像 → 前处理 → TensorRT 推理 → 后处理 → 检测结果

2. 前处理（Preprocess）

2.1 仿射变换原理

YOLO 前处理只需要缩放 + 平移 （不需要旋转），因此令 b = d = 0：

复制代码

x' = a·x + c
y' = e·y + f

其中 a = e = min(dst_h / src_h, dst_w / src_w)，取长宽缩放比例中较小的那个，保证图像不变形。

中心对齐：源图中心 (H_s/2, W_s/2) 映射到目标图中心 (H_d/2, W_d/2)：

复制代码

c = H_d/2 - s·H_s/2
f = W_d/2 - s·W_s/2

关键思路：从目标图坐标出发，根据仿射变换反推源图坐标，然后用双线性插值获取像素值。

2.2 双线性插值

得到浮点坐标 (src_x, src_y) 后，从四个最近整数像素按面积比例插值：

复制代码

x_low = floor(src_x),  x_high = x_low + 1
y_low = floor(src_y),  y_high = y_low + 1
lx = src_x - x_low,   hx = 1 - lx
ly = src_y - y_low,   hy = 1 - ly

w1 = hy·hx  (左上),  w2 = hy·lx  (右上)
w3 = ly·hx  (左下),  w4 = ly·lx  (右下)

2.3 CUDA 实现

cpp 复制代码

// preprocess.cu - 仿射变换 kernel
__global__ void warpaffine_kernel(
    uint8_t* src, int src_line_size, int src_width, int src_height,
    float* dst, int dst_width, int dst_height,
    uint8_t const_value_st, AffineMatrix d2s, int edge)
{
    int position = blockDim.x * blockIdx.x + threadIdx.x;
    if (position >= edge) return;

    int dx = position % dst_width;
    int dy = position / dst_width;

    // 从目标坐标反推源坐标
    float src_x = d2s.value[0] * dx + d2s.value[1] * dy + d2s.value[2] + 0.5f;
    float src_y = d2s.value[3] * dx + d2s.value[4] * dy + d2s.value[5] + 0.5f;

    // 越界填充灰色
    if (src_x <= -1 || src_x >= src_width || src_y <= -1 || src_y >= src_height) {
        c0 = c1 = c2 = const_value_st;
    } else {
        // 双线性插值
        int y_low = floorf(src_y), x_low = floorf(src_x);
        int y_high = y_low + 1, x_high = x_low + 1;
        float ly = src_y - y_low, lx = src_x - x_low;
        float hy = 1 - ly, hx = 1 - lx;

        float w1 = hy*hx, w2 = hy*lx, w3 = ly*hx, w4 = ly*lx;
        // ... 取四个角点像素 ...
        c0 = w1*v1[0] + w2*v2[0] + w3*v3[0] + w4*v4[0]; // B
        c1 = w1*v1[1] + w2*v2[1] + w3*v3[1] + w4*v4[1]; // G
        c2 = w1*v1[2] + w2*v2[2] + w3*v3[2] + w4*v4[2]; // R
    }

    // BGR→RGB 通道交换
    float t = c2; c2 = c0; c0 = t;

    // 归一化 + CHW 重排
    int area = dst_width * dst_height;
    dst[dy * dst_width + dx] = c0 / 255.0f;                    // R channel
    dst[area + dy * dst_width + dx] = c1 / 255.0f;           // G channel
    dst[2*area + dy * dst_width + dx] = c2 / 255.0f;         // B channel
}

3. TensorRT 推理

3.1 模型输出格式

YOLO V11 输出 (DetectionAttributeSize, NumDetections)：

索引	内容
0	cx（中心 x）
1	cy（中心 y）
2	ow（宽）
3	oh（高）
4 ~ 4+num_classes	各类别置信度

NumDetections = 8400（三个输出层下采样之和），DetectionAttributeSize = 84（4 + 80 类）。

4. 后处理（Postprocess）

4.1 解码矩形框

cpp 复制代码

// yolov11.cpp - postprocess()
for (int i = 0; i < det_output.cols; ++i) {
    // 提取类别分数
    const Mat classes_scores = det_output.col(i).rowRange(4, 4 + num_classes);
    Point class_id_point;
    double score;
    minMaxLoc(classes_scores, nullptr, &score, nullptr, &class_id_point);

    if (score > conf_threshold) {
        const float cx = det_output.at<float>(0, i);
        const float cy = det_output.at<float>(1, i);
        const float ow = det_output.at<float>(2, i);
        const float oh = det_output.at<float>(3, i);

        // cx,cy 是中心点，转为左上角坐标
        Rect box;
        box.x = static_cast<int>((cx - 0.5 * ow));
        box.y = static_cast<int>((cy - 0.5 * oh));
        box.width = static_cast<int>(ow);
        box.height = static_cast<int>(oh);

        boxes.push_back(box);
        class_ids.push_back(class_id_point.y);
        confidences.push_back(score);
    }
}

4.2 NMS（非极大值抑制）

cpp 复制代码

// 按置信度降序排序
vector<int> sorted(boxes.size());
iota(sorted.begin(), sorted.end(), 0);
sort(sorted.begin(), sorted.end(),
     [&](int a, int b) { return confidences[a] > confidences[b]; });

vector<bool> suppressed(boxes.size(), false);
for (size_t i = 0; i < sorted.size(); i++) {
    if (suppressed[sorted[i]]) continue;
    nms_result.push_back(sorted[i]);
    for (size_t j = i + 1; j < sorted.size(); j++) {
        if (suppressed[sorted[j]]) continue;
        Rect inter = boxes[sorted[i]] & boxes[sorted[j]];
        float overlap = static_cast<float>(inter.area()) /
            (areas[sorted[i]] + areas[sorted[j]] - inter.area());
        if (overlap > nms_threshold)
            suppressed[sorted[j]] = true;
    }
}

NMS 原理：

按置信度排序
保留最高置信度的框
剔除与保留框 IoU > 阈值的框
重复直到处理完所有框

4.3 坐标映射回原图

cpp 复制代码

// yolov11.cpp - draw()
const float ratio_h = input_h / (float)image.rows;
const float ratio_w = input_w / (float)image.cols;

for (int i = 0; i < output.size(); i++) {
    auto box = output[i].bbox;
    auto class_id = output[i].class_id;

    if (ratio_h > ratio_w) {
        // 宽边对齐，x 方向缩放用 ratio_w
        box.x = box.x / ratio_w;
        box.y = (box.y - (input_h - ratio_w * image.rows) / 2) / ratio_w;
        box.width = box.width / ratio_w;
        box.height = box.height / ratio_w;
    } else {
        // 高边对齐
        box.x = (box.x - (input_w - ratio_h * image.cols) / 2) / ratio_h;
        box.y = box.y / ratio_h;
        box.width = box.width / ratio_h;
        box.height = box.height / ratio_h;
    }

    rectangle(image, Point(box.x, box.y),
              Point(box.x + box.width, box.y + box.height), color, 3);
    putText(image, class_string, Point(box.x, box.y - 10), ...);
}