1 简介
在深度学习模型的部署过程中,模型格式的转换是一个至关重要的环节。Paddle2ONNX是一个开源工具,用于将飞桨(PaddlePaddle)模型转换为ONNX格式,以便在各种不同的深度学习框架中进行推理和部署。随着ONNX标准的不断更新,保持Paddle2ONNX与最新版本的兼容性显得尤为重要。本篇文章《【Paddle2ONNX】为Paddle2ONNX升级ONNX版本》将详细介绍如何为Paddle2ONNX项目升级其依赖的ONNX版本。
2 升级ONNX依赖实现过程
2.1 修改 .gitmodules
修改onnx的branch参数到最新的commit id
[submodule "third_party/onnx"]
path = third_party/onnx
url = https://github.com/onnx/onnx.git
branch = ad834eb73ee0cd9b6fa9ea892caeed5fa17d7dc0
2.2 修改 CMakeLists.txt
Paddle2ONNX在CMakeLists.txt中规定了最大opset的数值,我们需要修改为18,关于opset与onnx版本的对应关系,可以查看 onnx版本与opset版本对应表
cmake
# if you build from other version of onnx this should be modified
# refer to https://github.com/onnx/onnx/blob/main/docs/Versioning.md#released-versions
add_definitions(-DMAX_ONNX_OPSET_VERSION=18)
add_definitions(-DPADDLE2ONNX_LIB)
2.3 添加对opset18转换的支持
在 paddle2onnx/mapper/mapper.h 中添加对opset18的支持
cpp
class Mapper {
public:
void Run() {
int32_t opset_version = helper_->GetOpsetVersion();
Assert(opset_version >= 7 && opset_version <= MAX_ONNX_OPSET_VERSION,
"[Paddle2ONNX] Only support opset_version in range of [7, " +
std::to_string(MAX_ONNX_OPSET_VERSION) + "].");
if (IsExportAsCustomOp()) {
return ExportAsCustomOp();
}
if (opset_version == 18) {
Opset18();
} else if (opset_version == 17) {
Opset17();
} else if (opset_version == 16) {
Opset16();
} else if (opset_version == 15) {
Opset15();
} else if (opset_version == 14) {
Opset14();
} else if (opset_version == 13) {
Opset13();
} else if (opset_version == 12) {
Opset12();
} else if (opset_version == 11) {
Opset11();
} else if (opset_version == 10) {
Opset10();
} else if (opset_version == 9) {
Opset9();
} else if (opset_version == 8) {
Opset8();
} else {
Opset7();
}
}
virtual void Opset18() { Opset17(); }
};
3 升级算子对Opset18的支持
3.1 升级 LayerNormalize 算子
当opset升级到17后,LayerNormalize 算子会出现问题,这里做的针对性的修复,主要是Scale 和Bias 两个参数的Shape 需要与 input_shape[norm_axis:] 一致
cpp
void LayerNormMapper::Opset17() {
auto input_info = GetInput("X");
auto output_info = GetOutput("Y");
constexpr std::array<P2ODataType, 3> T = {P2ODataType::FP16, P2ODataType::FP32, P2ODataType::FP64};
auto input_name = input_info[0].name;
auto input_type = input_info[0].dtype;
auto input_shape = input_info[0].shape;
if (std::find(T.begin(), T.end(), input_type) == T.end()) {
input_name = helper_->AutoCast(input_name, input_info[0].dtype, P2ODataType::FP32);
input_type = P2ODataType::FP32;
}
bool has_input_Bias = HasInput("Bias");
bool has_input_Scale = HasInput("Scale");
if (has_input_Bias && has_input_Scale) {
auto scale_info = GetInput("Scale");
auto scale_name = scale_info[0].name;
auto scale_type = scale_info[0].dtype;
if (std::find(T.begin(), T.end(), scale_type) == T.end()) {
scale_name = helper_->AutoCast(scale_name, scale_type, P2ODataType::FP32);
scale_type = P2ODataType::FP32;
}
auto bias_info = GetInput("Bias");
auto bias_name = bias_info[0].name;
auto bias_type = bias_info[0].dtype;
if (std::find(T.begin(), T.end(), bias_type) == T.end()) {
bias_name = helper_->AutoCast(bias_name, bias_type, P2ODataType::FP32);
bias_type = P2ODataType::FP32;
}
auto layer_norm_node = helper_->MakeNode(
"LayerNormalization",
{input_name, scale_name, bias_name},
{output_info[0].name});
AddAttribute(layer_norm_node, "axis", begin_norm_axis_);
AddAttribute(layer_norm_node, "epsilon", epsilon_);
return;
}
if (has_input_Scale) {
auto scale_info = GetInput("Scale");
auto scale_name = scale_info[0].name;
auto scale_type = scale_info[0].dtype;
if (std::find(T.begin(), T.end(), scale_type) == T.end()) {
scale_name = helper_->AutoCast(scale_name, scale_type, P2ODataType::FP32);
scale_type = P2ODataType::FP32;
}
auto layer_norm_node = helper_->MakeNode(
"LayerNormalization",
{input_name, scale_name},
{output_info[0].name});
AddAttribute(layer_norm_node, "axis", begin_norm_axis_);
AddAttribute(layer_norm_node, "epsilon", epsilon_);
return;
}
std::vector<int64_t> normalized_shape;
for (int64_t i = begin_norm_axis_;i < input_shape.size();i++) {
normalized_shape.emplace_back(input_shape[i]);
}
if (has_input_Bias) {
auto bias_info = GetInput("Bias");
auto bias_name = bias_info[0].name;
auto bias_type = bias_info[0].dtype;
if (std::find(T.begin(), T.end(), bias_type) == T.end()) {
bias_name = helper_->AutoCast(bias_name, bias_type, P2ODataType::FP32);
bias_type = P2ODataType::FP32;
}
std::string scale_name = helper_->Constant(normalized_shape, GetOnnxDtype(P2ODataType::FP32), static_cast<float>(1.0));
auto layer_norm_node = helper_->MakeNode(
"LayerNormalization",
{input_name, scale_name, bias_name},
{output_info[0].name});
AddAttribute(layer_norm_node, "axis", begin_norm_axis_);
AddAttribute(layer_norm_node, "epsilon", epsilon_);
return;
}
if (!has_input_Bias && !has_input_Scale) {
std::string scale_name = helper_->Constant(normalized_shape, GetOnnxDtype(P2ODataType::FP32), static_cast<float>(1.0));
auto layer_norm_node = helper_->MakeNode(
"LayerNormalization",
{input_name, scale_name},
{output_info[0].name});
AddAttribute(layer_norm_node, "axis", begin_norm_axis_);
AddAttribute(layer_norm_node, "epsilon", epsilon_);
}
}
3.2 升级并拆分Reduce算子
在OP版本升级到18后,Reduce算子的输入发生了变化,因此需要对Reduce系列的算子进行全面的升级,为了不将所有的代码耦合在一起,我将他们做了拆分,具体代码如下:
cpp
#include "paddle2onnx/mapper/tensor/reduce_logsumexp.h"
namespace paddle2onnx {
REGISTER_MAPPER(logsumexp, ReduceLogSumExpMapper)
int32_t ReduceLogSumExpMapper::GetMinOpset(bool verbose) {
constexpr int op_version = 11;
Logger(verbose, op_version) << RequireOpset(op_version) << std::endl;
return op_version;
}
void ReduceLogSumExpMapper::Opset18() {
GetAttr("keepdim", &keep_dim_);
GetAttr("reduce_all", &reduce_all_);
GetAttr("axis", &dim_);
auto x_info = GetInput("X");
std::string dims;
if (!reduce_all_) {
dims = helper_->Constant(ONNX_NAMESPACE::TensorProto::INT64, dim_);
} else {
dims = helper_->Constant(ONNX_NAMESPACE::TensorProto::INT64, Arange(0, x_info[0].Rank()));
}
std::string input_name = x_info[0].name;
auto input_tpye = x_info[0].dtype;
if (x_info[0].dtype == P2ODataType::BOOL) {
input_name = helper_->AutoCast(input_name, input_tpye, P2ODataType::INT32);
input_tpye = P2ODataType::INT32;
}
auto reduce_node = helper_->MakeNode("ReduceLogSumExp", {input_name, dims});
// Add attribute
AddAttribute(reduce_node, "keepdims", static_cast<int64_t>(keep_dim_));
auto out_node_name = reduce_node->output(0);
bool reduce_all_axes = dim_.size() == x_info[0].Rank();
if (reduce_all_) {
reduce_all_axes = true;
}
if (!keep_dim_ && reduce_all_axes) {
out_node_name = helper_->Reshape(out_node_name, {-1});
}
auto out_info = GetOutput("Out");
helper_->AutoCast(out_node_name, out_info[0].name, input_tpye, out_info[0].dtype);
}
} // namespace paddle2onnx
cpp
#include "paddle2onnx/mapper/tensor/reduce_max.h"
namespace paddle2onnx {
REGISTER_MAPPER(reduce_max, ReduceMaxMapper)
REGISTER_MAPPER(reduce_any, ReduceMaxMapper)
int32_t ReduceMaxMapper::GetMinOpset(bool verbose) {
int op_version = 11;
auto x_info = GetInput("X");
if (x_info[0].dtype == P2ODataType::FP64) {
op_version = 12;
}
Logger(verbose, op_version) << RequireOpset(op_version) << std::endl;
return op_version;
}
void ReduceMaxMapper::Opset18() {
GetAttr("keep_dim", &keep_dim_);
GetAttr("reduce_all", &reduce_all_);
GetAttr("in_dtype", &in_dtype_);
GetAttr("out_dtype", &out_dtype_);
GetAttr("dim", &dim_);
auto x_info = GetInput("X");
std::string dims;
if (!reduce_all_) {
dims = helper_->Constant(ONNX_NAMESPACE::TensorProto::INT64, dim_);
} else {
dims = helper_->Constant(ONNX_NAMESPACE::TensorProto::INT64, Arange(0, x_info[0].Rank()));
}
std::string input_name = x_info[0].name;
auto input_tpye = x_info[0].dtype;
if (x_info[0].dtype == P2ODataType::BOOL) {
input_name = helper_->AutoCast(input_name, input_tpye, P2ODataType::INT32);
input_tpye = P2ODataType::INT32;
}
auto reduce_node = helper_->MakeNode("ReduceMax", {input_name, dims});
// Add attribute
AddAttribute(reduce_node, "keepdims", static_cast<int64_t>(keep_dim_));
auto out_node_name = reduce_node->output(0);
bool reduce_all_axes = dim_.size() == x_info[0].Rank();
if (reduce_all_) {
reduce_all_axes = true;
}
if (!keep_dim_ && reduce_all_axes) {
out_node_name = helper_->Reshape(out_node_name, {-1});
}
auto out_info = GetOutput("Out");
helper_->AutoCast(out_node_name, out_info[0].name, input_tpye, out_info[0].dtype);
}
void ReduceMaxMapper::Opset12() {
// The implementation logic of Opset12 is the same as that of Opset11, with the difference being that Opset12 supports input data types as double.
Opset11();
}
} // namespace paddle2onnx
cpp
#include "paddle2onnx/mapper/tensor/reduce_min.h"
namespace paddle2onnx {
REGISTER_MAPPER(reduce_min, ReduceMinMapper)
REGISTER_MAPPER(reduce_all, ReduceMinMapper)
int32_t ReduceMinMapper::GetMinOpset(bool verbose) {
int op_version = 11;
auto x_info = GetInput("X");
if (x_info[0].dtype == P2ODataType::FP64) {
op_version = 12;
}
Logger(verbose, op_version) << RequireOpset(op_version) << std::endl;
return op_version;
}
void ReduceMinMapper::Opset18() {
GetAttr("keep_dim", &keep_dim_);
GetAttr("reduce_all", &reduce_all_);
GetAttr("in_dtype", &in_dtype_);
GetAttr("out_dtype", &out_dtype_);
GetAttr("dim", &dim_);
auto x_info = GetInput("X");
std::string dims;
if (!reduce_all_) {
dims = helper_->Constant(ONNX_NAMESPACE::TensorProto::INT64, dim_);
} else {
dims = helper_->Constant(ONNX_NAMESPACE::TensorProto::INT64, Arange(0, x_info[0].Rank()));
}
auto input_node_name = x_info[0].name;
auto input_tpye = x_info[0].dtype;
if (x_info[0].dtype == P2ODataType::BOOL) {
input_node_name = helper_->AutoCast(x_info[0].name, x_info[0].dtype, P2ODataType::INT32);
input_tpye = P2ODataType::INT32;
}
// Add attribute
auto reduce_node = helper_->MakeNode("ReduceMin", {input_node_name, dims});
AddAttribute(reduce_node, "keepdims", static_cast<int64_t>(keep_dim_));
auto out_node_name = reduce_node->output(0);
bool reduce_all_axes = dim_.size() == x_info[0].Rank();
if (reduce_all_) {
reduce_all_axes = true;
}
if (!keep_dim_ && reduce_all_axes) {
out_node_name = helper_->Reshape(out_node_name, {-1});
}
auto out_info = GetOutput("Out");
helper_->AutoCast(out_node_name, out_info[0].name, input_tpye, out_info[0].dtype);
}
void ReduceMinMapper::Opset12() {
// The implementation logic of Opset12 is the same as that of Opset11, with the difference being that Opset12 supports input data types as double.
Opset11();
}
} // namespace paddle2onnx
cpp
#include "paddle2onnx/mapper/tensor/reduce_mean.h"
namespace paddle2onnx {
REGISTER_MAPPER(reduce_mean, ReduceMeanMapper)
int32_t ReduceMeanMapper::GetMinOpset(bool verbose) {
constexpr int op_version = 11;
Logger(verbose, op_version) << RequireOpset(op_version) << std::endl;
return op_version;
}
void ReduceMeanMapper::Opset18() {
auto axis_name_ = "dim";
GetAttr("keep_dim", &keep_dim_);
GetAttr("reduce_all", &reduce_all_);
GetAttr("in_dtype", &in_dtype_);
GetAttr("out_dtype", &out_dtype_);
if (IsAttrVar(axis_name_)) {
auto info = GetAttrVar(axis_name_);
TryGetValue(info[0], &dim_);
} else {
GetAttr(axis_name_, &dim_);
}
auto x_info = GetInput("X");
std::string dims;
if (IsAttrVar(axis_name_)) {
auto info = GetAttrVar(axis_name_);
dims = helper_->AutoCast(info[0].name, info[0].dtype, P2ODataType::INT64);
} else {
if (!reduce_all_) {
dims = helper_->Constant(ONNX_NAMESPACE::TensorProto::INT64, dim_);
} else {
dims = helper_->Constant(ONNX_NAMESPACE::TensorProto::INT64, Arange(0, x_info[0].Rank()));
}
}
// Add attribute
auto reduce_node = helper_->MakeNode("ReduceMean", {x_info[0].name, dims});
AddAttribute(reduce_node, "keepdims", static_cast<int64_t>(keep_dim_));
auto out_node_name = reduce_node->output(0);
bool reduce_all_axes = dim_.size() == x_info[0].Rank();
if (reduce_all_) {
reduce_all_axes = true;
}
if (!keep_dim_ && reduce_all_axes) {
out_node_name = helper_->Reshape(out_node_name, {-1});
}
auto out_info = GetOutput("Out");
helper_->AutoCast(out_node_name, out_info[0].name, x_info[0].dtype, out_info[0].dtype);
}
} // namespace paddle2onnx
cpp
#include "paddle2onnx/mapper/tensor/reduce_sum.h"
namespace paddle2onnx {
REGISTER_MAPPER(reduce_sum, ReduceMapperSum)
int32_t ReduceMapperSum::GetMinOpset(bool verbose) {
constexpr int op_version = 13;
Logger(verbose, op_version) << RequireOpset(op_version) << std::endl;
return op_version;
}
void ReduceMapperSum::Opset13() {
auto axis_name_ = "dim";
GetAttr("keep_dim", &keep_dim_);
GetAttr("reduce_all", &reduce_all_);
GetAttr("in_dtype", &in_dtype_);
GetAttr("out_dtype", &out_dtype_);
if (IsAttrVar(axis_name_)) {
auto info = GetAttrVar(axis_name_);
TryGetValue(info[0], &dim_);
} else {
GetAttr(axis_name_, &dim_);
}
auto x_info = GetInput("X");
std::string dims;
if (IsAttrVar(axis_name_)) {
auto info = GetAttrVar(axis_name_);
dims = helper_->AutoCast(info[0].name, info[0].dtype, P2ODataType::INT64);
} else {
if (!reduce_all_) {
dims = helper_->Constant(ONNX_NAMESPACE::TensorProto::INT64, dim_);
} else {
dims = helper_->Constant(ONNX_NAMESPACE::TensorProto::INT64, Arange(0, x_info[0].Rank()));
}
}
// Add attribute
auto reduce_node = helper_->MakeNode("ReduceSum", {x_info[0].name, dims});
AddAttribute(reduce_node, "keepdims", static_cast<int64_t>(keep_dim_));
auto out_node_name = reduce_node->output(0);
bool reduce_all_axes = dim_.size() == x_info[0].Rank();
if (reduce_all_) {
reduce_all_axes = true;
}
if (!keep_dim_ && reduce_all_axes) {
out_node_name = helper_->Reshape(out_node_name, {-1});
}
auto out_info = GetOutput("Out");
helper_->AutoCast(out_node_name, out_info[0].name, x_info[0].dtype, out_info[0].dtype);
}
} // namespace paddle2onnx
由于篇幅的限制,这里仅放出了部分核心代码,更多代码请到 Paddle2ONNX PR 1250 内查看。
4 升级CI机制
原先的CI机制对所有的opset都进行验证,这在高等级opset上是不合适的,所以对原先的CI机制进行了升级,仅对指定的opset进行验证
4.1 升级 onnxbase
python
class APIOnnx(object):
def run(self):
"""
1. use dygraph layer to make exp
2. dygraph layer to onnx
3. use onnx to make res
4. compare diff
"""
self._mkdir()
self.set_input_spec()
for place in self.places:
paddle.set_device(place)
exp = self._mk_dygraph_exp(self._func)
res_fict = {}
assert len(self.ops) <= 1, "Need to make sure the number of ops in config is 1."
# Save Paddle Inference model
if os.path.exists(self.name):
shutil.rmtree(self.name)
paddle.jit.save(self._func, os.path.join(self.name, "model"), self.input_spec)
# Get PaddleInference model path
pdmodel_path = os.path.join(self.name, "model.pdmodel")
pdiparams_path = os.path.join(self.name, "model.pdiparams")
if len(self.ops) > 0:
self.dev_check_ops(self.ops[0], pdmodel_path)
original_model_file = pdmodel_path
params_file = pdiparams_path
if not os.path.exists(params_file):
params_file = ""
# clip extra
model_file = os.path.join(self.name, "cliped_model.pdmodel")
self.clip_extra_program_only(original_model_file, model_file)
for v in self._version:
onnx_model_str = c_p2o.export(
model_file, params_file, v, False, True, True, True,
True, {}, "onnxruntime", "", "", False)
with open(os.path.join(self.name, self.name + '_' + str(v) + ".onnx"), "wb") as f:
f.write(onnx_model_str)
res_fict[str(v)] = self._mk_onnx_res(ver=v)
for v in self._version:
compare(res_fict[str(v)], exp, delta=self.delta, rtol=self.rtol)
4.2 升级单测代码
由于篇幅的限制,更多代码请到 Paddle2ONNX PR 1250 内查看。