模型部署全流程
(1)训练模型
基于pytorch框架,训练一个简单的CNN图像分类模型
python
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as datasets
import torchvision.transforms as transforms
# 检查是否有可用的GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"使用设备: {device}")
# 定义数据转换
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
# 加载数据集
train_data = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_data = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=64, shuffle=True)
# 定义CNN模型
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
self.fc1 = nn.Linear(64 * 4 * 4, 500)
self.fc2 = nn.Linear(500, 10)
def forward(self, x):
# 卷积层
x = self.conv1(x)
x = nn.functional.relu(x)
x = nn.functional.max_pool2d(x, 2)
x = self.conv2(x)
x = nn.functional.relu(x)
x = nn.functional.max_pool2d(x, 2)
x = self.conv3(x)
x = nn.functional.relu(x)
x = nn.functional.max_pool2d(x, 2)
# 全连接层
x = x.view(-1, 64 * 4 * 4)
x = self.fc1(x)
x = nn.functional.relu(x)
x = self.fc2(x)
return x
# 创建模型实例并移动到GPU
model = Net().to(device)
# 定义优化器和损失函数
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
# 训练模型
for epoch in range(10): # 训练 10 次
running_loss = 0.0
for i, data in enumerate(train_loader, 0):
# 获取输入并移动到GPU
inputs, labels = data
inputs, labels = inputs.to(device), labels.to(device)
# 梯度清零
optimizer.zero_grad()
# 前向传播,反向传播,优化器更新参数
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# 输出统计信息
running_loss += loss.item()
if i % 100 == 99:
print('[%d, %5d] loss: %.3f' %
(epoch + 1, i + 1, running_loss / 100))
running_loss = 0.0
print('Finished Training')
# 保存模型权重
torch.save(model.state_dict(), 'cifar10_model_weights.pth')
print('模型权重已保存到 cifar10_model_weights.pth')
(2)PyTorch模型转为ONNX模型
①注意这里的导出设置,要与后面转RKNN模型时保持一致
python
# 导出为 ONNX 模型
dummy_input = torch.randn(1, 3, 32, 32, device=device) # CIFAR10 输入尺寸
onnx_file = "cifar10_model.onnx"
torch.onnx.export(
model, # 要转换的模型
dummy_input, # 模型输入形式
onnx_file, # 导出文件名
export_params=True, # 是否导出模型权重
opset_version=11, # ONNX opset 版本
do_constant_folding=True, # 常量折叠优化
input_names=["input"], # 输入节点名称
output_names=["output"], # 输出节点名称
)
print(f"ONNX 模型已保存为 {onnx_file}")
②ONNX模型的PC端测试
因为训练的pytorch没有带softmax层所以ONNX模型也没有softmax,如果想得到预测概率,要自己加一个softmax层
实现的功能就是读取一种图片进行识别
python
import onnxruntime as ort
from PIL import Image
import numpy as np
import torch
import torchvision.transforms as transforms
# CIFAR10 类别名称(官方)
classes = ['airplane', 'automobile', 'bird', 'cat', 'deer',
'dog', 'frog', 'horse', 'ship', 'truck']
# 图像预处理(与训练时一致)
transform = transforms.Compose([
transforms.Resize((32, 32)), # CIFAR10 输入大小
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5),
(0.5, 0.5, 0.5))
])
def load_image(img_path):
img = Image.open(img_path).convert("RGB")
img = transform(img)
img = img.unsqueeze(0) # batch_size = 1
return img.numpy()
# ONNX 推理
def infer_onnx(img_path, onnx_path="cifar10_model.onnx"):
# 创建 ONNX Runtime session
session = ort.InferenceSession(onnx_path, providers=['CPUExecutionProvider'])
# 读取输入名、输出名
input_name = session.get_inputs()[0].name
output_name = session.get_outputs()[0].name
# 加载图片
img = load_image(img_path)
# 推理
outputs = session.run([output_name], {input_name: img})
logits = outputs[0]
# softmax
probs = torch.softmax(torch.tensor(logits), dim=1)
pred_idx = torch.argmax(probs, dim=1).item()
return classes[pred_idx], probs[0][pred_idx].item()
# 主程序
if __name__ == "__main__":
img_path = "test.jpg" # 图片路径
pred_label, confidence = infer_onnx(img_path)
print("预测类别:", pred_label)
print("置信度:", confidence)
(3)ONNX模型转RKNN模型
这一步需要在安装rknn-toolkit2的虚拟机中进行,这里要注意rknn-toolkit2版本的匹配问题,与开发板的rknn库版本兼容
模型转换参数配置一定要与原模型相匹配
因为原模型是先对数据做归一化预处理再送入输入,这里直接将归一化的步骤放入模型里面,因此转换成RKNN模型后,输入不用再归一化
还有就是如果量化的话需要有量化数据集,本次测试没有开启量化,因此模型输入是要注意输入设置
python
from rknn.api import RKNN
import os
ONNX = "cifar10_model.onnx"
RKNN_OUT = "cifar10_model.rknn"
TARGET = "rk3588"
# 校准集 txt(可选)
DATASET_TXT = "./dataset.txt" # 如果不量化可以设为 None 或留空
rknn = RKNN(verbose=True)
print("--> 配置(与训练时 preprocessing 保持一致)")
# 说明:训练时用 transforms.Normalize((0.5,), (0.5,)) => mean=127.5 std=127.5 (0..255 scale)
rknn.config(
mean_values=[[127.5, 127.5, 127.5]], # 按训练时的 normalize 映射到 0-255 量纲
std_values=[[127.5, 127.5, 127.5]],
target_platform=TARGET
)
print("--> 加载 ONNX 模型")
ret = rknn.load_onnx(model=ONNX)
if ret != 0:
raise SystemExit("load_onnx 失败,检查 onnx 文件与输出节点")
# 是否量化(True 推荐,若出现精度问题可改为 False 或调整校准集)
DO_QUANT = True if os.path.exists(DATASET_TXT) else False
print(f"--> 构建 RKNN 模型 (do_quantization={DO_QUANT})")
if DO_QUANT:
ret = rknn.build(do_quantization=True, dataset=DATASET_TXT)
else:
ret = rknn.build(do_quantization=False)
if ret != 0:
raise SystemExit("build 失败,查看控制台错误以定位不支持算子或 shape 问题")
print("--> 导出 rknn 文件")
ret = rknn.export_rknn(RKNN_OUT)
if ret != 0:
raise SystemExit("export_rknn 失败")
print("转换完成:", RKNN_OUT)
rknn.release()
(4)模型部署
①python快速测试
用的rk3588 的linux 系统的根文件系统是buildroot 构建的,所以对Python 的支持没那么好,下载了相关的库,能对一些简单的例程进行推理
实现的功能就是读取一种图片进行识别
python
import cv2
import numpy as np
from rknnlite.api import RKNNLite
# CIFAR10 类别
classes = ['airplane', 'automobile', 'bird', 'cat', 'deer',
'dog', 'frog', 'horse', 'ship', 'truck']
def load_image(img_path):
"""
使用 OpenCV 加载图片并进行预处理
"""
img = cv2.imread(img_path)
if img is None:
raise FileNotFoundError(f"找不到图片: {img_path}")
# 转为 RGB
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# Resize 到 CIFAR-10 输入大小
img_resized = cv2.resize(img, (32, 32))
# 转为 float32
img_normalized = img_resized.astype(np.float32)
# 添加 batch 维度
img_batch = np.expand_dims(img_normalized, axis=0)
return img_batch
def softmax(x):
"""对 1D 或 2D numpy array 做 Softmax"""
x = np.array(x, dtype=np.float32)
if x.ndim == 1:
x_exp = np.exp(x - np.max(x))
return x_exp / np.sum(x_exp)
elif x.ndim == 2:
x_exp = np.exp(x - np.max(x, axis=1, keepdims=True))
return x_exp / np.sum(x_exp, axis=1, keepdims=True)
else:
raise ValueError("输入数组维度不支持")
def rknn_lite_inference(img_path, rknn_model_path="cifar10_model.rknn"):
"""
使用 RKNNLite 模型进行推理 (RK3588),返回概率置信度
"""
rknn_lite = RKNNLite()
print("加载 RKNNLITE 模型...")
ret = rknn_lite.load_rknn(rknn_model_path)
if ret != 0:
print("加载模型失败!")
return
print("初始化 RKNNLITE 运行环境...")
ret = rknn_lite.init_runtime()
if ret != 0:
print("运行环境初始化失败!")
return
img = load_image(img_path)
print("正在推理...")
outputs = rknn_lite.inference(inputs=[img])
# 去掉 batch 维度
output_data = np.squeeze(outputs[0])
# softmax 转为概率
probs = softmax(output_data)
# 取最大概率作为预测类别
pred_idx = np.argmax(probs)
pred_label = classes[pred_idx]
confidence = probs[pred_idx]
print(f"预测类别: {pred_label}")
print(f"置信度: {confidence:.4f}")
rknn_lite.release()
if __name__ == "__main__":
img_path = "test.jpg" # 图片路径
rknn_lite_inference(img_path)
②C++部署
注意输入设置那块一定一定要和原模型的配置一样
转换成RKNN模型后输入数据格式会变成NHWC,还有要注意模型到底量化没有
实现的功能就是读取一种图片进行识别
cpp
#include <iostream>
#include <opencv2/opencv.hpp>
#include <rknn_api.h>
#include <vector>
#include <cstring>
#include <cstdlib>
#include <cmath>
#define WIDTH 32
#define HEIGHT 32
#define CHANNEL 3
#define CLASS_NUM 10
const char* classes[CLASS_NUM] = {
"airplane","automobile","bird","cat","deer",
"dog","frog","horse","ship","truck"
};
// 读取模型文件
unsigned char* load_model(const char* path,int* size){
FILE* fp=fopen(path,"rb");
if(!fp) return nullptr;
fseek(fp,0,SEEK_END);
*size=ftell(fp);
fseek(fp,0,SEEK_SET);
unsigned char* buf=(unsigned char*)malloc(*size);
fread(buf,1,*size,fp);
fclose(fp);
return buf;
}
// 读取图片并转 float32 NHWC (不归一化)
bool load_image(const char* img_path,float* output_data){
cv::Mat img=cv::imread(img_path);
if(img.empty()){ std::cerr<<"Failed to read image!"<<std::endl; return false; }
cv::Mat img_rgb;
cv::cvtColor(img,img_rgb,cv::COLOR_BGR2RGB);
cv::Mat resized;
cv::resize(img_rgb,resized,cv::Size(WIDTH,HEIGHT));
int idx=0;
for(int h=0;h<HEIGHT;h++){
for(int w=0;w<WIDTH;w++){
for(int c=0;c<CHANNEL;c++){
output_data[idx++]=static_cast<float>(resized.at<cv::Vec3b>(h,w)[c]);
}
}
}
return true;
}
// argmax
int argmax(float* data,int len){
int idx=0;
float max_val=data[0];
for(int i=1;i<len;i++){
if(data[i]>max_val){
max_val=data[i];
idx=i;
}
}
return idx;
}
// softmax
void softmax(float* data,int len,float* probs){
float max_val=data[0];
for(int i=1;i<len;i++) if(data[i]>max_val) max_val=data[i];
float sum=0.0f;
for(int i=0;i<len;i++){
probs[i]=std::exp(data[i]-max_val);
sum+=probs[i];
}
for(int i=0;i<len;i++) probs[i]/=sum;
}
int main(int argc,char** argv){
if(argc<3){
std::cerr<<"Usage: "<<argv[0]<<" model.rknn image.jpg"<<std::endl;
return -1;
}
int model_len=0;
unsigned char* model=load_model(argv[1],&model_len);
if(!model){ std::cerr<<"Failed to load model!"<<std::endl; return -1; }
rknn_context ctx;
int ret=rknn_init(&ctx,model,model_len,0,nullptr);
if(ret<0){ std::cerr<<"rknn_init fail "<<ret<<std::endl; free(model); return -1; }
float* input_data=(float*)malloc(sizeof(float)*WIDTH*HEIGHT*CHANNEL);
if(!load_image(argv[2],input_data)){
free(input_data);
rknn_destroy(ctx);
free(model);
return -1;
}
rknn_input inputs[1];
memset(inputs,0,sizeof(inputs));
inputs[0].index=0;
inputs[0].type=RKNN_TENSOR_FLOAT32;
inputs[0].size=sizeof(float)*WIDTH*HEIGHT*CHANNEL;
inputs[0].fmt=RKNN_TENSOR_NHWC;
inputs[0].buf=input_data;
ret=rknn_inputs_set(ctx,1,inputs);
if(ret<0){
std::cerr<<"rknn_inputs_set fail "<<ret<<std::endl;
free(input_data);
rknn_destroy(ctx);
free(model);
return -1;
}
ret=rknn_run(ctx,nullptr);
if(ret<0){
std::cerr<<"rknn_run fail "<<ret<<std::endl;
free(input_data);
rknn_destroy(ctx);
free(model);
return -1;
}
rknn_output outputs[1];
memset(outputs,0,sizeof(outputs));
outputs[0].want_float=1;
ret=rknn_outputs_get(ctx,1,outputs,nullptr);
if(ret<0){
std::cerr<<"rknn_outputs_get fail "<<ret<<std::endl;
free(input_data);
rknn_destroy(ctx);
free(model);
return -1;
}
float* out=(float*)outputs[0].buf;
float probs[CLASS_NUM];
softmax(out,CLASS_NUM,probs);
int pred_idx=argmax(probs,CLASS_NUM);
std::cout<<"预测类别: "<<classes[pred_idx]<<std::endl;
std::cout<<"置信度: "<<probs[pred_idx]<<std::endl;
// 可选:打印 Top5
std::cout<<"--- Top5 ---"<<std::endl;
for(int i=0;i<CLASS_NUM;i++){
for(int j=i+1;j<CLASS_NUM;j++){
if(probs[j]>probs[i]) std::swap(probs[i],probs[j]);
}
}
for(int i=0;i<5;i++){
std::cout<<classes[i]<<" : "<<probs[i]<<std::endl;
}
rknn_outputs_release(ctx,1,outputs);
free(input_data);
rknn_destroy(ctx);
free(model);
return 0;
}
部署到RK3588的最后效果和pytorch模型在PC端的预测结果几乎一致
(5)参考资料
chatgpt