代码:https://github.com/mxsurui/NNIE-lite
修改
c
void nnie_param_init(SAMPLE_SVP_NNIE_MODEL_S *s_stModel, SAMPLE_SVP_NNIE_CFG_S *stNnieCfg, SAMPLE_SVP_NNIE_PARAM_S *s_stNnieParam)
{
if (NULL == s_stModel || NULL == stNnieCfg || NULL == s_stNnieParam)
{
printf("NULL == s_stModel || NULL == stNnieCfg || NULL == s_stNnieParam \n");
return;
}
stNnieCfg->u32MaxInputNum = 4; //修改为batch size
stNnieCfg->u32MaxRoiNum = 0;
stNnieCfg->aenNnieCoreId[0] = SVP_NNIE_ID_0; // set NNIE core
s_stNnieParam->pstModel = &s_stModel->stModel;
HI_S32 s32Ret = HI_SUCCESS;
s32Ret = SAMPLE_COMM_SVP_NNIE_ParamInit(stNnieCfg, s_stNnieParam);
if (HI_SUCCESS != s32Ret)
{
NNIE_Param_Deinit(s_stNnieParam, s_stModel);
SAMPLE_SVP_TRACE_INFO("Error,nnie_Param_init failed!\n");
return;
}
else
{
printf("**** nnie param init success\n");
}
}
c
int n = 4;
...
unsigned char *data = (unsigned char *)malloc(sizeof(unsigned char) * MODEL_HEIGHT * MODEL_HEIGHT * c * n);
yolov5->run(data);
Tensor output0 = yolov5_mnas->getOutputTensor(0);
Tensor output1 = yolov5_mnas->getOutputTensor(1);
Tensor output2 = yolov5_mnas->getOutputTensor(2);
for (int i = 0; i < n; i++)
{
output0.data + i*6400*3*9;
output1.data + i*1600*3*9;
output2.data + i*400*3*9;
parseYolov5Feature(MODEL_HEIGHT, MODEL_HEIGHT, num_classes, kBoxPerCell, feature_index0, conf_threshold, anchors[2], output0, ids, boxes, confidences);
parseYolov5Feature(MODEL_HEIGHT, MODEL_HEIGHT, num_classes, kBoxPerCell, feature_index1, conf_threshold, anchors[1], output1, ids, boxes, confidences);
parseYolov5Feature(MODEL_HEIGHT, MODEL_HEIGHT, num_classes, kBoxPerCell, feature_index2, conf_threshold, anchors[0], output2, ids, boxes, confidences);
...
}
实测多batch比单batch在yolov5推理上节省时间并不多。
bs=1: 311ms
bs=4: 1158ms
bs=8: 2287ms