文章目录
-
- [1. 前述](#1. 前述)
- [2. 部分代码](#2. 部分代码)
- [3. 说明](#3. 说明)
1. 前述
OK,这一篇博客将完整给出最后的优化教程,包括代码设计。
首先有这样的目录结构:
shell
./rknn_engine
├── include
│ ├── def
│ │ └── rknn_define.h
│ └── rknn_engine.h
├── src
│ ├── common
│ │ ├── rknn_data.h
│ │ └── rknn_functions.hpp
│ ├── inference
│ │ ├── inference.cpp
│ │ └── inference.h
│ ├── postprocess
│ │ ├── postprocess.cpp
│ │ └── postprocess.h
│ ├── preprocess
│ │ ├── preprocess.cpp
│ │ └── preprocess.h
│ ├── rknn_engine.cpp
│ └── task
│ ├── base_task.h
│ ├── pool_task.cpp
│ ├── pool_task.h
│ ├── single_task.cpp
│ ├── single_task.h
│ └── task_define.h
├── xmake.lua
└── xmake_repo
10 directories, 18 files
其实这里只给出了detection的部分设计,其他的segment和pose就是对应扩充一下就可以了。我觉得还是很简单的......
2. 部分代码
就给出一些代码示意吧:
Inference::init
cpp
bool Inference::init(const rknn_binary &model_data)
{
int ret = rknn_init(&m_rknnCtx, const_cast<char *>(&model_data[0]), static_cast<uint32_t>(model_data.size()), 0, nullptr);
if (ret < 0)
{
spdlog::error("RKNN初始化失败, ret={}", ret);
return false;
}
m_isInited = true;
// 配置运行在什么核心上
ret = rknn_set_core_mask(m_rknnCtx, rk3588_npu[m_ctxIndex++ % NPU_NUMS]);
if (ret != RKNN_SUCC)
{
spdlog::error("rknn_set_core_mask failed, ret:{}", ret);
return false;
}
// 获取版本信息
rknn_sdk_version version;
ret = rknn_query(m_rknnCtx, RKNN_QUERY_SDK_VERSION, &version, sizeof(rknn_sdk_version));
if (ret < 0)
{
spdlog::error("RKNN查询版本失败, ret={}", ret);
return false;
}
spdlog::info("RKNN API version: {}", version.api_version);
spdlog::info("RKNN Driver version: {}", version.drv_version);
// 获取输入输出的个数
rknn_input_output_num io_num;
ret = rknn_query(m_rknnCtx, RKNN_QUERY_IN_OUT_NUM, &io_num, sizeof(io_num));
if (ret != RKNN_SUCC)
{
spdlog::error("RKNN查询输入输出失败, ret={}", ret);
return false;
}
spdlog::info("模型的输入数量: {}, 输出数量: {}", io_num.n_input, io_num.n_output);
// 模型的输入属性
m_modelParams.m_nInput = io_num.n_input;
for (uint32_t index = 0; index < io_num.n_input; ++index)
{
rknn_tensor_attr attr{0};
attr.index = index;
ret = rknn_query(m_rknnCtx, RKNN_QUERY_INPUT_ATTR, &attr, sizeof(attr));
if (ret != RKNN_SUCC)
{
spdlog::error("RKNN查询输入属性失败, ret={}", ret);
return false;
}
logTensorAttr(attr);
m_modelParams.m_inputAttrs.push_back(attr);
}
// 模型的输出属性
m_modelParams.m_nOutput = io_num.n_output;
for (uint32_t index = 0; index < io_num.n_output; ++index)
{
rknn_tensor_attr attr{0};
attr.index = index;
ret = rknn_query(m_rknnCtx, RKNN_QUERY_OUTPUT_ATTR, &attr, sizeof(attr));
if (ret != RKNN_SUCC)
{
spdlog::error("RKNN查询输出属性失败, ret={}", ret);
return false;
}
logTensorAttr(attr);
m_modelParams.m_outputAttrs.push_back(attr);
}
// 判断是否是量化的
auto &out1_attr = m_modelParams.m_outputAttrs[0];
if (out1_attr.qnt_type == RKNN_TENSOR_QNT_AFFINE_ASYMMETRIC && out1_attr.type == RKNN_TENSOR_INT8)
{
m_modelParams.m_isFloat = false;
}
else
{
m_modelParams.m_isFloat = true;
}
// 获得宽高和通道
auto &in1_attr = m_modelParams.m_inputAttrs[0];
if (in1_attr.fmt == RKNN_TENSOR_NCHW)
{
spdlog::info("model is NCHW input fmt.");
m_modelParams.m_modelChannel = in1_attr.dims[1];
m_modelParams.m_modelHeight = in1_attr.dims[2];
m_modelParams.m_modelWidth = in1_attr.dims[3];
}
else
{
spdlog::info("model is NHWC input fmt.");
m_modelParams.m_modelChannel = in1_attr.dims[3];
m_modelParams.m_modelHeight = in1_attr.dims[1];
m_modelParams.m_modelWidth = in1_attr.dims[2];
}
spdlog::info("model input height:{} width:{} channel:{}",
m_modelParams.m_modelHeight, m_modelParams.m_modelWidth, m_modelParams.m_modelChannel);
spdlog::info("RKNN初始化成功!");
return true;
}
SingleTask::work
cpp
bool SingleTask::work(const cv::Mat &img, std::vector<DetectionResult> &dets)
{
前处理 //
m_preprocess.run(img, m_pImgBuff);
推理 //
// input
rknn_input inputs[m_modelParams.m_nInput];
memset(inputs, 0, sizeof(inputs));
inputs[0].index = 0;
inputs[0].type = RKNN_TENSOR_UINT8;
inputs[0].fmt = RKNN_TENSOR_NHWC;
inputs[0].size = m_imgSize;
inputs[0].buf = m_pImgBuff;
// output
rknn_output outputs[m_modelParams.m_nOutput];
memset(outputs, 0, sizeof(outputs));
for (uint32_t index = 0; index < m_modelParams.m_nOutput; ++index)
{
outputs[index].index = index;
outputs[index].want_float = m_modelParams.m_isFloat;
}
m_inference.run(inputs, outputs);
后处理 //
m_postprocess->run(outputs);
int width = img.cols;
int height = img.rows;
auto scale = m_preprocess.getScale();
auto &labels = m_postprocess->getLabels();
auto &results = m_postprocess->detectionResult();
int label_count = static_cast<int>(labels.size());
for (auto &result : results)
{
DetectionResult rd;
int id = result.m_classId < 0 ? -1 : (result.m_classId < label_count ? result.m_classId : -1);
rd.m_className = id < 0 ? "unknown" : labels[id];
rd.m_confidence = result.m_confidence;
rd.m_box.x = static_cast<int>(result.m_x * scale);
rd.m_box.x = clamp_i(rd.m_box.x, 0, width);
rd.m_box.y = static_cast<int>(result.m_y * scale);
rd.m_box.y = clamp_i(rd.m_box.y, 0, height);
int w = static_cast<int>(result.m_w * scale);
int h = static_cast<int>(result.m_h * scale);
rd.m_box.w = clamp_i(w, 0, width - rd.m_box.x);
rd.m_box.h = clamp_i(h, 0, height - rd.m_box.y);
dets.push_back(rd);
}
// release output
rknn_outputs_release(m_inference.rknnContext(), m_modelParams.m_nOutput, outputs);
return true;
}
PoolTask::onDetectionResult
cpp
void PoolTask::onDetectionResult(TaskData &&task_data, std::vector<DetectionResult> &&dets)
{
TaskResult res{true, std::move(task_data), std::move(dets)};
list<TaskResult> tmp_res;
{
lock_guard<mutex> lg(m_mutexOutput);
m_taskResults[res.m_taskData.m_taskId] = std::move(res);
while (true)
{
if (m_taskResults[m_outputIndex].m_isGet)
{
auto &res_data = m_taskResults[m_outputIndex];
tmp_res.emplace_back(std::move(res_data));
res_data.m_isGet = false;
++m_outputIndex;
}
else
{
break;
}
}
}
// 通过这个方式进行拼接,不在一个锁里面耗费时间
if (!tmp_res.empty())
{
lock_guard<mutex> lg(m_mutexCb);
m_cbResults.splice(m_cbResults.end(), tmp_res);
}
}
3. 说明
完整代码将放在原力推上。
可访问下载地址进行下载。