python版本:
python
import onnxruntime as ort
import math
import cv2
import numpy as np
class YOLO26OBB:
def __init__(
self,
onnx_model: str,
input_image: str,
confidence_thres: float = 0.25
):
"""Initialize YOLO26OBB detector.
Args:
onnx_model (str): Path to the ONNX model.
input_image (str): Path to the input image.
confidence_thres (float): Confidence threshold for filtering detections.
classes (dict, optional): Dictionary mapping class IDs to class names.
"""
self.onnx_model = onnx_model
self.input_image = input_image
self.confidence_thres = confidence_thres
self.classes = {
0: "carton",
1: "strip"
}
# Generate a color palette for the classes
np.random.seed(42)
self.color_palette = np.random.uniform(0, 255, size=(len(self.classes), 3))
def letterbox(
self, img: np.ndarray, new_shape: tuple[int, int] = (640, 640)
) -> tuple[np.ndarray, float, tuple[int, int]]:
"""Resize and pad image while maintaining aspect ratio.
Args:
img (np.ndarray): Input image to be resized.
new_shape (tuple[int, int]): Target shape (height, width).
Returns:
img (np.ndarray): Resized and padded image.
ratio (float): Scaling ratio applied.
pad (tuple[int, int]): Padding values (top, left).
"""
shape = img.shape[:2] # current shape [height, width]
# Scale ratio (new / old)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
# Compute padding
new_unpad = round(shape[1] * r), round(shape[0] * r)
dw, dh = (new_shape[1] - new_unpad[0]) / 2, (new_shape[0] - new_unpad[1]) / 2
if shape[::-1] != new_unpad:
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = round(dh - 0.1), round(dh + 0.1)
left, right = round(dw - 0.1), round(dw + 0.1)
img = cv2.copyMakeBorder(
img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114)
)
return img, r, (top, left)
def xywhr2xyxyxyxy(self, rboxes: np.ndarray) -> np.ndarray:
"""Convert rotated bounding boxes from xywhr format to 4 corner points.
Args:
rboxes (np.ndarray): Rotated boxes with shape (N, 5) in [cx, cy, w, h, angle].
Returns:
(np.ndarray): Corner points with shape (N, 4, 2).
"""
cos = np.cos(rboxes[:, 4])
sin = np.sin(rboxes[:, 4])
cx, cy = rboxes[:, 0], rboxes[:, 1]
w, h = rboxes[:, 2], rboxes[:, 3]
# Half dimensions
w2, h2 = w / 2, h / 2
# Rotation vectors
vec1_x = w2 * cos
vec1_y = w2 * sin
vec2_x = -h2 * sin
vec2_y = h2 * cos
# Four corner points
pt1 = np.stack([cx + vec1_x + vec2_x, cy + vec1_y + vec2_y], axis=-1)
pt2 = np.stack([cx + vec1_x - vec2_x, cy + vec1_y - vec2_y], axis=-1)
pt3 = np.stack([cx - vec1_x - vec2_x, cy - vec1_y - vec2_y], axis=-1)
pt4 = np.stack([cx - vec1_x + vec2_x, cy - vec1_y + vec2_y], axis=-1)
return np.stack([pt1, pt2, pt3, pt4], axis=1)
def regularize_rboxes(self, rboxes: np.ndarray) -> np.ndarray:
"""Regularize rotated bounding boxes to range [0, pi/2].
Args:
rboxes (np.ndarray): Rotated boxes with shape (N, 5) in xywhr format.
Returns:
(np.ndarray): Regularized rotated boxes.
"""
x, y, w, h, t = rboxes[:, 0], rboxes[:, 1], rboxes[:, 2], rboxes[:, 3], rboxes[:, 4]
# Swap edge if t >= pi/2
swap = (t % math.pi) >= (math.pi / 2)
w_ = np.where(swap, h, w)
h_ = np.where(swap, w, h)
t = t % (math.pi / 2)
return np.stack([x, y, w_, h_, t], axis=-1)
def draw_rotated_box(
self, img: np.ndarray, corners: np.ndarray, score: float, class_id: int
) -> None:
"""Draw an oriented bounding box on the image.
Args:
img (np.ndarray): Image to draw on.
corners (np.ndarray): Four corner points with shape (4, 2).
score (float): Detection confidence score.
class_id (int): Class ID for the detection.
"""
color = tuple(map(int, self.color_palette[int(class_id) % len(self.color_palette)]))
corners = corners.astype(np.int32)
# Draw the rotated rectangle
cv2.polylines(img, [corners], isClosed=True, color=color, thickness=2)
# Draw label
label = f"{self.classes.get(int(class_id), int(class_id))}: {score:.2f}"
label_pos = (int(corners[0, 0]), int(corners[0, 1]) - 10)
(label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
# Background rectangle for label
cv2.rectangle(
img,
(label_pos[0], label_pos[1] - label_height - 5),
(label_pos[0] + label_width, label_pos[1] + 5),
color,
cv2.FILLED,
)
cv2.putText(
img, label, label_pos, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1, cv2.LINE_AA
)
def preprocess(self) -> tuple[np.ndarray, float, tuple[int, int]]:
"""Preprocess the input image for inference.
Returns:
image_data (np.ndarray): Preprocessed image with shape (1, 3, H, W).
ratio (float): Scaling ratio applied during letterbox.
pad (tuple[int, int]): Padding (top, left) applied during letterbox.
"""
# Read image
self.img = cv2.imread(self.input_image)
if self.img is None:
raise FileNotFoundError(f"Image not found: {self.input_image}")
self.img_height, self.img_width = self.img.shape[:2]
# Convert BGR to RGB
img = cv2.cvtColor(self.img, cv2.COLOR_BGR2RGB)
# Letterbox
img, ratio, pad = self.letterbox(img, (self.input_height, self.input_width))
# Normalize and transpose: HWC -> CHW, add batch dimension
image_data = np.ascontiguousarray(img.transpose(2, 0, 1)[None].astype(np.float32) / 255.0)
return image_data, ratio, pad
def postprocess_end2end(
self, output: np.ndarray, ratio: float, pad: tuple[int, int]
) -> list[tuple[np.ndarray, float, int]]:
"""Post-process YOLO26 end2end model output.
YOLO26 end2end output format: (batch, max_det, 7)
Each detection: [x, y, w, h, score, class_id, angle]
Args:
output (np.ndarray): Model output with shape (1, max_det, 7).
ratio (float): Scaling ratio from letterbox.
pad (tuple[int, int]): Padding (top, left) from letterbox.
Returns:
(list): List of (corners, score, class_id) tuples.
"""
# Remove batch dimension: (1, max_det, 7) -> (max_det, 7)
preds = np.squeeze(output, axis=0)
# Extract components: [x, y, w, h, score, class_id, angle]
boxes_xywh = preds[:, :4] # x, y, w, h
scores = preds[:, 4] # confidence score
class_ids = preds[:, 5] # class index
angles = preds[:, 6] # rotation angle
# Filter by confidence threshold
mask = scores >= self.confidence_thres
boxes_xywh = boxes_xywh[mask]
scores = scores[mask]
class_ids = class_ids[mask]
angles = angles[mask]
if len(boxes_xywh) == 0:
return []
# Scale boxes back to original image size
boxes_xywh[:, 0] = (boxes_xywh[:, 0] - pad[1]) / ratio # x
boxes_xywh[:, 1] = (boxes_xywh[:, 1] - pad[0]) / ratio # y
boxes_xywh[:, 2] = boxes_xywh[:, 2] / ratio # w
boxes_xywh[:, 3] = boxes_xywh[:, 3] / ratio # h
# Combine boxes with angles: (N, 5) in xywhr format
rboxes = np.concatenate([boxes_xywh, angles[:, None]], axis=1)
# Regularize angles to [0, pi/2]
rboxes = self.regularize_rboxes(rboxes)
# Convert to corner points
corners = self.xywhr2xyxyxyxy(rboxes)
detections = []
for i in range(len(rboxes)):
detections.append((corners[i], scores[i], class_ids[i]))
return detections
def main(self) -> np.ndarray:
"""Perform inference and return the output image with drawn detections.
Returns:
(np.ndarray): Output image with drawn oriented bounding boxes.
"""
# Create ONNX Runtime session
available = ort.get_available_providers()
providers = [p for p in ("CUDAExecutionProvider", "CPUExecutionProvider") if p in available]
session = ort.InferenceSession(self.onnx_model, providers=providers or available)
# Get model input shape
model_inputs = session.get_inputs()
input_shape = model_inputs[0].shape
self.input_height = input_shape[2]
self.input_width = input_shape[3]
# Preprocess
img_data, ratio, pad = self.preprocess()
outputs = session.run(None, {model_inputs[0].name: img_data})
detections = self.postprocess_end2end(outputs[0], ratio, pad)
output_img = self.img.copy()
for corners, score, class_id in detections:
self.draw_rotated_box(output_img, corners, score, class_id)
print(f"Found {len(detections)} detections")
return output_img
def main():
detector = YOLO26OBB("strip-yolo26.onnx", "obb.jpg", 0.5)
output_image = detector.main()
cv2.imwrite("output.jpg", output_image)
if __name__ == "__main__":
main()
C++
cpp
#include <iostream>
#include <vector>
#include <string>
#include <cmath>
#include <algorithm>
#include <memory>
// OpenCV
#include <opencv2/opencv.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
// ONNX Runtime
#include <onnxruntime_cxx_api.h>
// 定义常量
const float PI = 3.14159265358979323846f;
// 存储检测结果的结构体
struct OBBResult {
cv::Point2f corners[4]; // 4个角点
float score;
int class_id;
std::string class_name;
};
class YOLO26OBB {
public:
YOLO26OBB(const std::string& model_path, const std::string& image_path, float conf_thres = 0.25)
: input_image_path_(image_path), conf_thres_(conf_thres), env_(ORT_LOGGING_LEVEL_WARNING, "YOLO26OBB") {
// 1. 初始化 Session 选项
Ort::SessionOptions session_options;
session_options.SetIntraOpNumThreads(1);
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
// 2. 创建 Session (修复 Windows 下的路径问题)
#ifdef _WIN32
// Windows: 需要转换 char* 为 wchar_t*
size_t newsize = model_path.length() + 1;
std::wstring w_model_path(newsize, L'\0');
size_t convertedChars = 0;
mbstowcs_s(&convertedChars, &w_model_path[0], newsize, model_path.c_str(), _TRUNCATE);
session_ = std::make_unique<Ort::Session>(env_, w_model_path.c_str(), session_options);
#else
// Linux/Mac: 直接使用 char*
session_ = std::make_unique<Ort::Session>(env_, model_path.c_str(), session_options);
#endif
// 3. 获取输入节点信息
Ort::AllocatorWithDefaultOptions allocator;
size_t num_input_nodes = session_->GetInputCount();
// 预留空间
input_node_names_storage_.reserve(num_input_nodes);
input_node_names_.reserve(num_input_nodes);
for (size_t i = 0; i < num_input_nodes; i++) {
// 获取名称并深拷贝到 string vector 中,防止指针悬空
auto input_name_ptr = session_->GetInputNameAllocated(i, allocator);
input_node_names_storage_.emplace_back(input_name_ptr.get());
input_node_names_.push_back(input_node_names_storage_.back().c_str());
// 获取维度
auto type_info = session_->GetInputTypeInfo(i);
auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
input_dims_ = tensor_info.GetShape();
}
// 假设输入形状为 [1, 3, H, W]
// 注意:有些模型 input_dims_[0]可能是 -1 (动态batch),这里我们只关心 H 和 W
if (input_dims_.size() >= 4) {
input_height_ = input_dims_[2];
input_width_ = input_dims_[3];
}
else {
// 默认值,防止读取失败
input_height_ = 640;
input_width_ = 640;
}
// 4. 获取输出节点信息
size_t num_output_nodes = session_->GetOutputCount();
output_node_names_storage_.reserve(num_output_nodes);
output_node_names_.reserve(num_output_nodes);
for (size_t i = 0; i < num_output_nodes; i++) {
auto output_name_ptr = session_->GetOutputNameAllocated(i, allocator);
output_node_names_storage_.emplace_back(output_name_ptr.get());
output_node_names_.push_back(output_node_names_storage_.back().c_str());
}
// 5. 初始化类别和颜色
classes_ = { {0, "carton"}, {1, "strip"} };
generate_colors();
}
void run() {
// 读取图像
cv::Mat original_img = cv::imread(input_image_path_);
if (original_img.empty()) {
std::cerr << "Error: Image not found at " << input_image_path_ << std::endl;
return;
}
// 1. 预处理
float ratio;
std::pair<int, int> pad;
std::vector<float> input_tensor_values;
preprocess(original_img, input_tensor_values, ratio, pad);
// 2. 创建 Input Tensor
size_t input_tensor_size = input_tensor_values.size();
std::vector<int64_t> input_shape = { 1, 3, input_height_, input_width_ };
auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
Ort::Value input_tensor = Ort::Value::CreateTensor<float>(
memory_info, input_tensor_values.data(), input_tensor_size, input_shape.data(), input_shape.size()
);
// 3. 执行推理
try {
auto output_tensors = session_->Run(
Ort::RunOptions{ nullptr },
input_node_names_.data(), &input_tensor, 1,
output_node_names_.data(), output_node_names_.size()
);
// 4. 后处理
// 获取输出数据指针
float* floatarr = output_tensors[0].GetTensorMutableData<float>();
// 获取输出维度信息
auto output_shape = output_tensors[0].GetTensorTypeAndShapeInfo().GetShape();
// 假设输出形状: (1, max_det, 7) -> [batch, detections, features]
// features: [x, y, w, h, score, class_id, angle]
int num_detections = 0;
int num_features = 0;
if (output_shape.size() == 3) {
num_detections = (int)output_shape[1];
num_features = (int)output_shape[2];
}
else if (output_shape.size() == 2) {
// 有些模型可能是 (max_det, 7)
num_detections = (int)output_shape[0];
num_features = (int)output_shape[1];
}
std::vector<OBBResult> results = postprocess(floatarr, num_detections, num_features, ratio, pad);
// 5. 绘制并保存
cv::Mat output_img = original_img.clone();
std::cout << "Found " << results.size() << " detections." << std::endl;
for (const auto& res : results) {
draw_rotated_box(output_img, res);
}
cv::imwrite("output.jpg", output_img);
std::cout << "Result saved to output.jpg" << std::endl;
}
catch (const Ort::Exception& e) {
std::cerr << "ONNX Runtime Exception: " << e.what() << std::endl;
}
}
private:
std::string input_image_path_;
float conf_thres_;
int64_t input_width_;
int64_t input_height_;
Ort::Env env_;
std::unique_ptr<Ort::Session> session_;
// 关键修复:使用 vector<string> 存储实际字符串,vector<const char*> 存储指针
std::vector<std::string> input_node_names_storage_;
std::vector<const char*> input_node_names_;
std::vector<std::string> output_node_names_storage_;
std::vector<const char*> output_node_names_;
std::vector<int64_t> input_dims_;
std::map<int, std::string> classes_;
std::vector<cv::Scalar> colors_;
void generate_colors() {
cv::RNG rng(42);
for (size_t i = 0; i < classes_.size(); ++i) {
colors_.push_back(cv::Scalar(rng.uniform(0, 255), rng.uniform(0, 255), rng.uniform(0, 255)));
}
}
// Letterbox resizing & Preprocessing
void preprocess(const cv::Mat& img, std::vector<float>& input_data, float& ratio, std::pair<int, int>& pad) {
int w = img.cols;
int h = img.rows;
// 计算缩放比例
ratio = std::min((float)input_width_ / w, (float)input_height_ / h);
// 计算 Padding
int new_unpad_w = (int)std::round(w * ratio);
int new_unpad_h = (int)std::round(h * ratio);
float dw = (input_width_ - new_unpad_w) / 2.0f;
float dh = (input_height_ - new_unpad_h) / 2.0f;
pad.first = (int)std::round(dh - 0.1f); // top
pad.second = (int)std::round(dw - 0.1f); // left
int bottom = (int)std::round(dh + 0.1f);
int right = (int)std::round(dw + 0.1f);
// Resize
cv::Mat resized;
if (w != new_unpad_w || h != new_unpad_h) {
cv::resize(img, resized, cv::Size(new_unpad_w, new_unpad_h));
}
else {
resized = img.clone();
}
// Add Border
cv::Mat padded;
cv::copyMakeBorder(resized, padded, pad.first, bottom, pad.second, right, cv::BORDER_CONSTANT, cv::Scalar(114, 114, 114));
// 归一化 + HWC 转 CHW
input_data.resize(1 * 3 * input_height_ * input_width_);
cv::Mat rgb_img;
cv::cvtColor(padded, rgb_img, cv::COLOR_BGR2RGB);
rgb_img.convertTo(rgb_img, CV_32F, 1.0 / 255.0);
// 使用 split 分离通道
std::vector<cv::Mat> channels(3);
cv::split(rgb_img, channels);
// 填充数据 (CHW)
// Channel 0 (R)
std::memcpy(input_data.data(), channels[0].data, input_height_ * input_width_ * sizeof(float));
// Channel 1 (G)
std::memcpy(input_data.data() + input_height_ * input_width_, channels[1].data, input_height_ * input_width_ * sizeof(float));
// Channel 2 (B)
std::memcpy(input_data.data() + 2 * input_height_ * input_width_, channels[2].data, input_height_ * input_width_ * sizeof(float));
}
std::vector<OBBResult> postprocess(float* data, int num_detections, int num_features, float ratio, std::pair<int, int> pad) {
std::vector<OBBResult> detections;
for (int i = 0; i < num_detections; ++i) {
// 获取当前检测框的数据起始位置
float* det = data + (i * num_features);
// 格式: [x, y, w, h, score, class_id, angle]
float score = det[4];
if (score < conf_thres_) continue;
float x = det[0];
float y = det[1];
float w = det[2];
float h = det[3];
int class_id = (int)det[5];
float angle = det[6];
// 坐标还原 (Letterbox 逆操作)
x = (x - pad.second) / ratio;
y = (y - pad.first) / ratio;
w = w / ratio;
h = h / ratio;
// 角度正则化 (Regularize)
// Python: if (t % pi) >= (pi/2) swap(w,h)
if (std::fmod(angle, PI) >= (PI / 2.0f)) {
std::swap(w, h);
}
angle = std::fmod(angle, PI / 2.0f);
// 计算 4 个角点 (xywhr -> corners)
float cos_a = std::cos(angle);
float sin_a = std::sin(angle);
float w2 = w / 2.0f;
float h2 = h / 2.0f;
// 旋转向量
float vec1_x = w2 * cos_a;
float vec1_y = w2 * sin_a;
float vec2_x = -h2 * sin_a;
float vec2_y = h2 * cos_a;
OBBResult res;
res.score = score;
res.class_id = class_id;
if (classes_.count(class_id)) res.class_name = classes_[class_id];
else res.class_name = std::to_string(class_id);
// 计算角点坐标
res.corners[0] = cv::Point2f(x + vec1_x + vec2_x, y + vec1_y + vec2_y);
res.corners[1] = cv::Point2f(x + vec1_x - vec2_x, y + vec1_y - vec2_y);
res.corners[2] = cv::Point2f(x - vec1_x - vec2_x, y - vec1_y - vec2_y);
res.corners[3] = cv::Point2f(x - vec1_x + vec2_x, y - vec1_y + vec2_y);
detections.push_back(res);
}
return detections;
}
void draw_rotated_box(cv::Mat& img, const OBBResult& res) {
cv::Scalar color = colors_[res.class_id % colors_.size()];
// 绘制多边形
std::vector<cv::Point> pts;
for (int i = 0; i < 4; i++) pts.push_back(res.corners[i]);
std::vector<std::vector<cv::Point>> contours = { pts };
cv::polylines(img, contours, true, color, 2, cv::LINE_AA);
// 绘制标签
std::string label = res.class_name + ": " + cv::format("%.2f", res.score);
int baseLine;
cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
// 标签位置
int x = (int)res.corners[0].x;
int y = (int)res.corners[0].y - 10;
// 绘制标签背景
cv::rectangle(img, cv::Point(x, y - labelSize.height - 5),
cv::Point(x + labelSize.width, y + 5), color, cv::FILLED);
// 绘制文字
cv::putText(img, label, cv::Point(x, y), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(255, 255, 255), 1, cv::LINE_AA);
}
};
int main() {
// 确保你的模型路径和图片路径是正确的
// 注意:如果是相对路径,确保文件在可执行文件(.exe)的同一目录下,或者在 VS 的工作目录下
std::string model_path = "strip-yolo26.onnx";
std::string image_path = "obb.jpg";
try {
YOLO26OBB detector(model_path, image_path, 0.5f);
detector.run();
}
catch (const std::exception& e) {
std::cerr << "Main Exception: " << e.what() << std::endl;
return -1;
}
return 0;
}