YOLO26-OBB ONNXruntime部署 python/C++

python版本：

python 复制代码

import onnxruntime as ort
import math 
import cv2
import numpy as np
 
 
class YOLO26OBB:
 
    def __init__(
        self,
        onnx_model: str,
        input_image: str,
        confidence_thres: float = 0.25
    ):
 
        """Initialize YOLO26OBB detector.
        Args:
            onnx_model (str): Path to the ONNX model.
            input_image (str): Path to the input image.
            confidence_thres (float): Confidence threshold for filtering detections.
            classes (dict, optional): Dictionary mapping class IDs to class names.
        """
 
        self.onnx_model = onnx_model
 
        self.input_image = input_image
 
        self.confidence_thres = confidence_thres
 
 
        self.classes = {
 
            0: "carton",
            1: "strip"
 
        }
 
        # Generate a color palette for the classes
 
        np.random.seed(42)
 
        self.color_palette = np.random.uniform(0, 255, size=(len(self.classes), 3))
 
 
 
    def letterbox(
 
        self, img: np.ndarray, new_shape: tuple[int, int] = (640, 640)
 
    ) -> tuple[np.ndarray, float, tuple[int, int]]:
 
        """Resize and pad image while maintaining aspect ratio.
        Args:
            img (np.ndarray): Input image to be resized.
            new_shape (tuple[int, int]): Target shape (height, width).
        Returns:
            img (np.ndarray): Resized and padded image.
            ratio (float): Scaling ratio applied.
            pad (tuple[int, int]): Padding values (top, left).
        """
 
        shape = img.shape[:2]  # current shape [height, width]
 
        # Scale ratio (new / old)
 
        r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
 
        # Compute padding
 
        new_unpad = round(shape[1] * r), round(shape[0] * r)
 
        dw, dh = (new_shape[1] - new_unpad[0]) / 2, (new_shape[0] - new_unpad[1]) / 2
 
        if shape[::-1] != new_unpad:
 
            img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
 
        top, bottom = round(dh - 0.1), round(dh + 0.1)
 
        left, right = round(dw - 0.1), round(dw + 0.1)
 
        img = cv2.copyMakeBorder(
 
            img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114)
 
        )
        return img, r, (top, left)
 
 
 
    def xywhr2xyxyxyxy(self, rboxes: np.ndarray) -> np.ndarray:
 
        """Convert rotated bounding boxes from xywhr format to 4 corner points.
        Args:
            rboxes (np.ndarray): Rotated boxes with shape (N, 5) in [cx, cy, w, h, angle].
        Returns:
            (np.ndarray): Corner points with shape (N, 4, 2).
        """
 
        cos = np.cos(rboxes[:, 4])
 
        sin = np.sin(rboxes[:, 4])
 

        cx, cy = rboxes[:, 0], rboxes[:, 1]
 
        w, h = rboxes[:, 2], rboxes[:, 3]
 
 
        # Half dimensions
 
        w2, h2 = w / 2, h / 2
 
        # Rotation vectors
 
        vec1_x = w2 * cos
 
        vec1_y = w2 * sin
 
        vec2_x = -h2 * sin
 
        vec2_y = h2 * cos
 
        # Four corner points
 
        pt1 = np.stack([cx + vec1_x + vec2_x, cy + vec1_y + vec2_y], axis=-1)
 
        pt2 = np.stack([cx + vec1_x - vec2_x, cy + vec1_y - vec2_y], axis=-1)
 
        pt3 = np.stack([cx - vec1_x - vec2_x, cy - vec1_y - vec2_y], axis=-1)
 
        pt4 = np.stack([cx - vec1_x + vec2_x, cy - vec1_y + vec2_y], axis=-1)
 
        return np.stack([pt1, pt2, pt3, pt4], axis=1)
 
 
 
    def regularize_rboxes(self, rboxes: np.ndarray) -> np.ndarray:
 
        """Regularize rotated bounding boxes to range [0, pi/2].
        Args:
            rboxes (np.ndarray): Rotated boxes with shape (N, 5) in xywhr format.
        Returns:
            (np.ndarray): Regularized rotated boxes.
        """
 
        x, y, w, h, t = rboxes[:, 0], rboxes[:, 1], rboxes[:, 2], rboxes[:, 3], rboxes[:, 4]
 
        # Swap edge if t >= pi/2
 
        swap = (t % math.pi) >= (math.pi / 2)
 
        w_ = np.where(swap, h, w)
 
        h_ = np.where(swap, w, h)
 
        t = t % (math.pi / 2)
 
        return np.stack([x, y, w_, h_, t], axis=-1)
 
 
 
    def draw_rotated_box(
 
        self, img: np.ndarray, corners: np.ndarray, score: float, class_id: int
 
    ) -> None:
 
        """Draw an oriented bounding box on the image.
        Args:
            img (np.ndarray): Image to draw on.
            corners (np.ndarray): Four corner points with shape (4, 2).
            score (float): Detection confidence score.
            class_id (int): Class ID for the detection.
        """
 
        color = tuple(map(int, self.color_palette[int(class_id) % len(self.color_palette)]))
 
        corners = corners.astype(np.int32)
 
 
 
        # Draw the rotated rectangle
 
        cv2.polylines(img, [corners], isClosed=True, color=color, thickness=2)
 
 
 
        # Draw label
 
        label = f"{self.classes.get(int(class_id), int(class_id))}: {score:.2f}"
 
        label_pos = (int(corners[0, 0]), int(corners[0, 1]) - 10)
 
        (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
 
 
 
        # Background rectangle for label
 
        cv2.rectangle(
 
            img,
 
            (label_pos[0], label_pos[1] - label_height - 5),
 
            (label_pos[0] + label_width, label_pos[1] + 5),
 
            color,
 
            cv2.FILLED,
 
        )
 
        cv2.putText(
 
            img, label, label_pos, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1, cv2.LINE_AA
 
        )
 
 
 
    def preprocess(self) -> tuple[np.ndarray, float, tuple[int, int]]:
 
        """Preprocess the input image for inference.
        Returns:
            image_data (np.ndarray): Preprocessed image with shape (1, 3, H, W).
            ratio (float): Scaling ratio applied during letterbox.
            pad (tuple[int, int]): Padding (top, left) applied during letterbox.
        """
 
        # Read image
 
        self.img = cv2.imread(self.input_image)
 
        if self.img is None:
 
            raise FileNotFoundError(f"Image not found: {self.input_image}")
 
 
 
        self.img_height, self.img_width = self.img.shape[:2]
 
 
 
        # Convert BGR to RGB
 
        img = cv2.cvtColor(self.img, cv2.COLOR_BGR2RGB)
 
 
 
        # Letterbox
 
        img, ratio, pad = self.letterbox(img, (self.input_height, self.input_width))
 
 
 
        # Normalize and transpose: HWC -> CHW, add batch dimension
 
        image_data = np.ascontiguousarray(img.transpose(2, 0, 1)[None].astype(np.float32) / 255.0)
 
 
 
        return image_data, ratio, pad
 
 
 
    def postprocess_end2end(
 
        self, output: np.ndarray, ratio: float, pad: tuple[int, int]
 
    ) -> list[tuple[np.ndarray, float, int]]:
 
        """Post-process YOLO26 end2end model output.
        YOLO26 end2end output format: (batch, max_det, 7)
        Each detection: [x, y, w, h, score, class_id, angle]
        Args:
            output (np.ndarray): Model output with shape (1, max_det, 7).
            ratio (float): Scaling ratio from letterbox.
            pad (tuple[int, int]): Padding (top, left) from letterbox.
        Returns:
            (list): List of (corners, score, class_id) tuples.
        """
 
        # Remove batch dimension: (1, max_det, 7) -> (max_det, 7)
 
        preds = np.squeeze(output, axis=0)
 
 
 
        # Extract components: [x, y, w, h, score, class_id, angle]
 
        boxes_xywh = preds[:, :4]  # x, y, w, h
 
        scores = preds[:, 4]  # confidence score
 
        class_ids = preds[:, 5]  # class index
 
        angles = preds[:, 6]  # rotation angle
 
 
 
        # Filter by confidence threshold
 
        mask = scores >= self.confidence_thres
 
        boxes_xywh = boxes_xywh[mask]
 
        scores = scores[mask]
 
        class_ids = class_ids[mask]
 
        angles = angles[mask]
 
 
 
        if len(boxes_xywh) == 0:
 
            return []
 
 
 
        # Scale boxes back to original image size
 
        boxes_xywh[:, 0] = (boxes_xywh[:, 0] - pad[1]) / ratio  # x
 
        boxes_xywh[:, 1] = (boxes_xywh[:, 1] - pad[0]) / ratio  # y
 
        boxes_xywh[:, 2] = boxes_xywh[:, 2] / ratio  # w
 
        boxes_xywh[:, 3] = boxes_xywh[:, 3] / ratio  # h
 
 
 
        # Combine boxes with angles: (N, 5) in xywhr format
 
        rboxes = np.concatenate([boxes_xywh, angles[:, None]], axis=1)
 
 
        # Regularize angles to [0, pi/2]
 
        rboxes = self.regularize_rboxes(rboxes)
 
 
        # Convert to corner points
 
        corners = self.xywhr2xyxyxyxy(rboxes)
 
 
 
        detections = []
 
        for i in range(len(rboxes)):
 
            detections.append((corners[i], scores[i], class_ids[i]))
 
 
 
        return detections
 
 
 
    def main(self) -> np.ndarray:
 
        """Perform inference and return the output image with drawn detections.
        Returns:
            (np.ndarray): Output image with drawn oriented bounding boxes.
        """
 
        # Create ONNX Runtime session
 
        available = ort.get_available_providers()
 
        providers = [p for p in ("CUDAExecutionProvider", "CPUExecutionProvider") if p in available]
 
        session = ort.InferenceSession(self.onnx_model, providers=providers or available)
 
        # Get model input shape
 
        model_inputs = session.get_inputs()
 
        input_shape = model_inputs[0].shape
 
        self.input_height = input_shape[2]
 
        self.input_width = input_shape[3]
 
        # Preprocess
 
        img_data, ratio, pad = self.preprocess()
 
        outputs = session.run(None, {model_inputs[0].name: img_data})
 
        detections = self.postprocess_end2end(outputs[0], ratio, pad)

        output_img = self.img.copy()
 
        for corners, score, class_id in detections:
 
            self.draw_rotated_box(output_img, corners, score, class_id)
 
        print(f"Found {len(detections)} detections")
 
        return output_img
 
 
 
 
def main():
 
    detector = YOLO26OBB("strip-yolo26.onnx", "obb.jpg", 0.5)
 
    output_image = detector.main()
 
 
    cv2.imwrite("output.jpg", output_image)

 
 
 
if __name__ == "__main__":
 
    main()

C++

cpp 复制代码

#include <iostream>
#include <vector>
#include <string>
#include <cmath>
#include <algorithm>
#include <memory>

// OpenCV
#include <opencv2/opencv.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>

// ONNX Runtime
#include <onnxruntime_cxx_api.h>

// 定义常量
const float PI = 3.14159265358979323846f;

// 存储检测结果的结构体
struct OBBResult {
	cv::Point2f corners[4]; // 4个角点
	float score;
	int class_id;
	std::string class_name;
};

class YOLO26OBB {
public:
	YOLO26OBB(const std::string& model_path, const std::string& image_path, float conf_thres = 0.25)
		: input_image_path_(image_path), conf_thres_(conf_thres), env_(ORT_LOGGING_LEVEL_WARNING, "YOLO26OBB") {

		// 1. 初始化 Session 选项
		Ort::SessionOptions session_options;
		session_options.SetIntraOpNumThreads(1);
		session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);

		// 2. 创建 Session (修复 Windows 下的路径问题)
#ifdef _WIN32
		// Windows: 需要转换 char* 为 wchar_t*
		size_t newsize = model_path.length() + 1;
		std::wstring w_model_path(newsize, L'\0');
		size_t convertedChars = 0;
		mbstowcs_s(&convertedChars, &w_model_path[0], newsize, model_path.c_str(), _TRUNCATE);
		session_ = std::make_unique<Ort::Session>(env_, w_model_path.c_str(), session_options);
#else
		// Linux/Mac: 直接使用 char*
		session_ = std::make_unique<Ort::Session>(env_, model_path.c_str(), session_options);
#endif

		// 3. 获取输入节点信息
		Ort::AllocatorWithDefaultOptions allocator;
		size_t num_input_nodes = session_->GetInputCount();

		// 预留空间
		input_node_names_storage_.reserve(num_input_nodes);
		input_node_names_.reserve(num_input_nodes);

		for (size_t i = 0; i < num_input_nodes; i++) {
			// 获取名称并深拷贝到 string vector 中，防止指针悬空
			auto input_name_ptr = session_->GetInputNameAllocated(i, allocator);
			input_node_names_storage_.emplace_back(input_name_ptr.get());
			input_node_names_.push_back(input_node_names_storage_.back().c_str());

			// 获取维度
			auto type_info = session_->GetInputTypeInfo(i);
			auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
			input_dims_ = tensor_info.GetShape();
		}

		// 假设输入形状为 [1, 3, H, W]
		// 注意：有些模型 input_dims_[0]可能是 -1 (动态batch)，这里我们只关心 H 和 W
		if (input_dims_.size() >= 4) {
			input_height_ = input_dims_[2];
			input_width_ = input_dims_[3];
		}
		else {
			// 默认值，防止读取失败
			input_height_ = 640;
			input_width_ = 640;
		}

		// 4. 获取输出节点信息
		size_t num_output_nodes = session_->GetOutputCount();
		output_node_names_storage_.reserve(num_output_nodes);
		output_node_names_.reserve(num_output_nodes);

		for (size_t i = 0; i < num_output_nodes; i++) {
			auto output_name_ptr = session_->GetOutputNameAllocated(i, allocator);
			output_node_names_storage_.emplace_back(output_name_ptr.get());
			output_node_names_.push_back(output_node_names_storage_.back().c_str());
		}

		// 5. 初始化类别和颜色
		classes_ = { {0, "carton"}, {1, "strip"} };
		generate_colors();
	}

	void run() {
		// 读取图像
		cv::Mat original_img = cv::imread(input_image_path_);
		if (original_img.empty()) {
			std::cerr << "Error: Image not found at " << input_image_path_ << std::endl;
			return;
		}

		// 1. 预处理
		float ratio;
		std::pair<int, int> pad;
		std::vector<float> input_tensor_values;
		preprocess(original_img, input_tensor_values, ratio, pad);

		// 2. 创建 Input Tensor
		size_t input_tensor_size = input_tensor_values.size();
		std::vector<int64_t> input_shape = { 1, 3, input_height_, input_width_ };

		auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
		Ort::Value input_tensor = Ort::Value::CreateTensor<float>(
			memory_info, input_tensor_values.data(), input_tensor_size, input_shape.data(), input_shape.size()
			);

		// 3. 执行推理
		try {
			auto output_tensors = session_->Run(
				Ort::RunOptions{ nullptr },
				input_node_names_.data(), &input_tensor, 1,
				output_node_names_.data(), output_node_names_.size()
			);

			// 4. 后处理
			// 获取输出数据指针
			float* floatarr = output_tensors[0].GetTensorMutableData<float>();

			// 获取输出维度信息
			auto output_shape = output_tensors[0].GetTensorTypeAndShapeInfo().GetShape();
			// 假设输出形状: (1, max_det, 7) -> [batch, detections, features]
			// features: [x, y, w, h, score, class_id, angle]

			int num_detections = 0;
			int num_features = 0;

			if (output_shape.size() == 3) {
				num_detections = (int)output_shape[1];
				num_features = (int)output_shape[2];
			}
			else if (output_shape.size() == 2) {
				// 有些模型可能是 (max_det, 7)
				num_detections = (int)output_shape[0];
				num_features = (int)output_shape[1];
			}

			std::vector<OBBResult> results = postprocess(floatarr, num_detections, num_features, ratio, pad);

			// 5. 绘制并保存
			cv::Mat output_img = original_img.clone();
			std::cout << "Found " << results.size() << " detections." << std::endl;

			for (const auto& res : results) {
				draw_rotated_box(output_img, res);
			}

			cv::imwrite("output.jpg", output_img);
			std::cout << "Result saved to output.jpg" << std::endl;

		}
		catch (const Ort::Exception& e) {
			std::cerr << "ONNX Runtime Exception: " << e.what() << std::endl;
		}
	}

private:
	std::string input_image_path_;
	float conf_thres_;
	int64_t input_width_;
	int64_t input_height_;

	Ort::Env env_;
	std::unique_ptr<Ort::Session> session_;

	// 关键修复：使用 vector<string> 存储实际字符串，vector<const char*> 存储指针
	std::vector<std::string> input_node_names_storage_;
	std::vector<const char*> input_node_names_;

	std::vector<std::string> output_node_names_storage_;
	std::vector<const char*> output_node_names_;

	std::vector<int64_t> input_dims_;

	std::map<int, std::string> classes_;
	std::vector<cv::Scalar> colors_;

	void generate_colors() {
		cv::RNG rng(42);
		for (size_t i = 0; i < classes_.size(); ++i) {
			colors_.push_back(cv::Scalar(rng.uniform(0, 255), rng.uniform(0, 255), rng.uniform(0, 255)));
		}
	}

	// Letterbox resizing & Preprocessing
	void preprocess(const cv::Mat& img, std::vector<float>& input_data, float& ratio, std::pair<int, int>& pad) {
		int w = img.cols;
		int h = img.rows;

		// 计算缩放比例
		ratio = std::min((float)input_width_ / w, (float)input_height_ / h);

		// 计算 Padding
		int new_unpad_w = (int)std::round(w * ratio);
		int new_unpad_h = (int)std::round(h * ratio);

		float dw = (input_width_ - new_unpad_w) / 2.0f;
		float dh = (input_height_ - new_unpad_h) / 2.0f;

		pad.first = (int)std::round(dh - 0.1f); // top
		pad.second = (int)std::round(dw - 0.1f); // left

		int bottom = (int)std::round(dh + 0.1f);
		int right = (int)std::round(dw + 0.1f);

		// Resize
		cv::Mat resized;
		if (w != new_unpad_w || h != new_unpad_h) {
			cv::resize(img, resized, cv::Size(new_unpad_w, new_unpad_h));
		}
		else {
			resized = img.clone();
		}

		// Add Border
		cv::Mat padded;
		cv::copyMakeBorder(resized, padded, pad.first, bottom, pad.second, right, cv::BORDER_CONSTANT, cv::Scalar(114, 114, 114));

		// 归一化 + HWC 转 CHW
		input_data.resize(1 * 3 * input_height_ * input_width_);

		cv::Mat rgb_img;
		cv::cvtColor(padded, rgb_img, cv::COLOR_BGR2RGB);
		rgb_img.convertTo(rgb_img, CV_32F, 1.0 / 255.0);

		// 使用 split 分离通道
		std::vector<cv::Mat> channels(3);
		cv::split(rgb_img, channels);

		// 填充数据 (CHW)
		// Channel 0 (R)
		std::memcpy(input_data.data(), channels[0].data, input_height_ * input_width_ * sizeof(float));
		// Channel 1 (G)
		std::memcpy(input_data.data() + input_height_ * input_width_, channels[1].data, input_height_ * input_width_ * sizeof(float));
		// Channel 2 (B)
		std::memcpy(input_data.data() + 2 * input_height_ * input_width_, channels[2].data, input_height_ * input_width_ * sizeof(float));
	}

	std::vector<OBBResult> postprocess(float* data, int num_detections, int num_features, float ratio, std::pair<int, int> pad) {
		std::vector<OBBResult> detections;

		for (int i = 0; i < num_detections; ++i) {
			// 获取当前检测框的数据起始位置
			float* det = data + (i * num_features);

			// 格式: [x, y, w, h, score, class_id, angle]
			float score = det[4];

			if (score < conf_thres_) continue;

			float x = det[0];
			float y = det[1];
			float w = det[2];
			float h = det[3];
			int class_id = (int)det[5];
			float angle = det[6];

			// 坐标还原 (Letterbox 逆操作)
			x = (x - pad.second) / ratio;
			y = (y - pad.first) / ratio;
			w = w / ratio;
			h = h / ratio;

			// 角度正则化 (Regularize)
			// Python: if (t % pi) >= (pi/2) swap(w,h)
			if (std::fmod(angle, PI) >= (PI / 2.0f)) {
				std::swap(w, h);
			}
			angle = std::fmod(angle, PI / 2.0f);

			// 计算 4 个角点 (xywhr -> corners)
			float cos_a = std::cos(angle);
			float sin_a = std::sin(angle);

			float w2 = w / 2.0f;
			float h2 = h / 2.0f;

			// 旋转向量
			float vec1_x = w2 * cos_a;
			float vec1_y = w2 * sin_a;
			float vec2_x = -h2 * sin_a;
			float vec2_y = h2 * cos_a;

			OBBResult res;
			res.score = score;
			res.class_id = class_id;

			if (classes_.count(class_id)) res.class_name = classes_[class_id];
			else res.class_name = std::to_string(class_id);

			// 计算角点坐标
			res.corners[0] = cv::Point2f(x + vec1_x + vec2_x, y + vec1_y + vec2_y);
			res.corners[1] = cv::Point2f(x + vec1_x - vec2_x, y + vec1_y - vec2_y);
			res.corners[2] = cv::Point2f(x - vec1_x - vec2_x, y - vec1_y - vec2_y);
			res.corners[3] = cv::Point2f(x - vec1_x + vec2_x, y - vec1_y + vec2_y);

			detections.push_back(res);
		}

		return detections;
	}

	void draw_rotated_box(cv::Mat& img, const OBBResult& res) {
		cv::Scalar color = colors_[res.class_id % colors_.size()];

		// 绘制多边形
		std::vector<cv::Point> pts;
		for (int i = 0; i < 4; i++) pts.push_back(res.corners[i]);

		std::vector<std::vector<cv::Point>> contours = { pts };
		cv::polylines(img, contours, true, color, 2, cv::LINE_AA);

		// 绘制标签
		std::string label = res.class_name + ": " + cv::format("%.2f", res.score);

		int baseLine;
		cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);

		// 标签位置
		int x = (int)res.corners[0].x;
		int y = (int)res.corners[0].y - 10;

		// 绘制标签背景
		cv::rectangle(img, cv::Point(x, y - labelSize.height - 5),
			cv::Point(x + labelSize.width, y + 5), color, cv::FILLED);

		// 绘制文字
		cv::putText(img, label, cv::Point(x, y), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(255, 255, 255), 1, cv::LINE_AA);
	}
};

int main() {
	// 确保你的模型路径和图片路径是正确的
	// 注意：如果是相对路径，确保文件在可执行文件(.exe)的同一目录下，或者在 VS 的工作目录下
	std::string model_path = "strip-yolo26.onnx";
	std::string image_path = "obb.jpg";

	try {
		YOLO26OBB detector(model_path, image_path, 0.5f);
		detector.run();
	}
	catch (const std::exception& e) {
		std::cerr << "Main Exception: " << e.what() << std::endl;
		return -1;
	}

	return 0;
}