yolov5导出onnx模型问题

为了适配C++工程代码，我在导出onnx模型时，会把models/yolo.py里面的forward函数改成下面这样，

python 复制代码

    #转模型
    def forward(self, x):
        z = []  # inference output
        for i in range(self.nl):
            x[i] = self.m[i](x[i])  # conv
            bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
            x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()

            if not self.training:  # inference
                if self.onnx_dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:
                    self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)

                y = x[i].sigmoid()
                if self.inplace:
                    y[..., 0:2] = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i]  # xy
                    y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh 
                    z.append(y.view(bs, -1, self.no))                    
                else:  # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953
                    xy = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i]  # xy
                    wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
                    
                    anchor, conf, prob = torch.split(y, [4, 1, self.nc], dim=4)
                    # add a idx (label ids before prob)

                    # ori
                    idxs = torch.argmax(prob, dim=-1).unsqueeze(axis=-1).type(x[i].dtype)
                    # new
                    #idxs = torch.max(prob, dim=-1)[1].data.unsqueeze(axis=-1).type(x[i].dtype)
                    
                    
                    y = torch.cat((xy, wh, conf, idxs, prob), -1)
                    z.append(y.view(bs, -1, self.no + 1))
        return x if self.training else (torch.cat(z, 1))

也就是把后面类别得分中最大的那个计算出来赋值给idxs，

原来的yolov5输出是x y w h box_score label1_confidence label2_confidence .... labeln_confidence.

我改完之后，输出变成x y w h box_score idxs label1_confidence label2_confidence .... labeln_confidence.

然后之前我都是在转onnx之前手动的去改代码，然后转完模型再改回来因为train和detect也要用到这个yolo.py中的forward函数，但是后来某项目中，要实现一个自动训练、自动检测、自动转模型，这就不能我手动改了，所以我第一个方法是我复制一份yolo.py复制成yolo_onnx.py，然后export.py中from models.yolo_onnx import Detect，这种方法不可行，因为其他还有还有很多地方也是用的from models.yolo import Detect，最后用的方法如下：

首先在yolo.py中的Detect类中增加一个成员export

python 复制代码

class Detect(nn.Module):
    stride = None  # strides computed during build
    onnx_dynamic = False  # ONNX export parameter
    export = False  #增加的成员
    ......

然后我在export.py的run函数中给这个值赋值为true

python 复制代码

@torch.no_grad()
def run(data=ROOT / 'data/coco128.yaml',  # 'dataset.yaml path'
        weights=ROOT / 'yolov5s.pt',  # weights path
        imgsz=(640, 640),  # image (height, width)
        batch_size=1,  # batch size
        device='cpu',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
        include=('torchscript', 'onnx'),  # include formats
        half=False,  # FP16 half-precision export
        inplace=False,  # set YOLOv5 Detect() inplace=True
        train=False,  # model.train() mode
        optimize=False,  # TorchScript: optimize for mobile
        int8=False,  # CoreML/TF INT8 quantization
        dynamic=False,  # ONNX/TF: dynamic axes
        simplify=False,  # ONNX: simplify model
        opset=12,  # ONNX: opset version
        verbose=False,  # TensorRT: verbose log
        workspace=4,  # TensorRT: workspace size (GB)
        nms=False,  # TF: add NMS to model
        agnostic_nms=False,  # TF: add agnostic NMS to model
        topk_per_class=100,  # TF.js NMS: topk per class to keep
        topk_all=100,  # TF.js NMS: topk for all classes to keep
        iou_thres=0.45,  # TF.js NMS: IoU threshold
        conf_thres=0.25  # TF.js NMS: confidence threshold
        ):
    t = time.time()
    include = [x.lower() for x in include]  # to lowercase
    formats = tuple(export_formats()['Argument'][1:])  # --include arguments
    flags = [x in include for x in formats]
    assert sum(flags) == len(include), f'ERROR: Invalid --include {include}, valid --include arguments are {formats}'
    jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs = flags  # export booleans
    file = Path(url2file(weights) if str(weights).startswith(('http:/', 'https:/')) else weights)  # PyTorch weights

    # Load PyTorch model
    device = select_device(device)
    assert not (device.type == 'cpu' and half), '--half only compatible with GPU export, i.e. use --device 0'
    model = attempt_load(weights, map_location=device, inplace=True, fuse=True)  # load FP32 model
    nc, names = model.nc, model.names  # number of classes, class names

    model.model[-1].export = True

    # Checks
    imgsz *= 2 if len(imgsz) == 1 else 1  # expand
    opset = 12 if ('openvino' in include) else opset  # OpenVINO requires opset <= 12
    assert nc == len(names), f'Model class count {nc} != len(names) {len(names)}'

    # Input
    gs = int(max(model.stride))  # grid size (max stride)
    imgsz = [check_img_size(x, gs) for x in imgsz]  # verify img_size are gs-multiples
    im = torch.zeros(batch_size, 3, *imgsz).to(device)  # image size(1,3,320,192) BCHW iDetection

    # Update model
    if half:
        im, model = im.half(), model.half()  # to FP16
    model.train() if train else model.eval()  # training mode = no Detect() layer grid construction
    for k, m in model.named_modules():
        if isinstance(m, Conv):  # assign export-friendly activations
            if isinstance(m.act, nn.SiLU):
                m.act = SiLU()
        elif isinstance(m, Detect):
            m.inplace = inplace
            m.onnx_dynamic = dynamic
            if hasattr(m, 'forward_export'):
                m.forward = m.forward_export  # assign custom forward (optional)

    for _ in range(2):
        y = model(im)  # dry runs
    shape = tuple(y[0].shape)  # model output shape
    LOGGER.info(f"\n{colorstr('PyTorch:')} starting from {file} with output shape {shape} ({file_size(file):.1f} MB)")

    # Exports
    f = [''] * 10  # exported filenames
    warnings.filterwarnings(action='ignore', category=torch.jit.TracerWarning)  # suppress TracerWarning
    if jit:
        f[0] = export_torchscript(model, im, file, optimize)
    if engine:  # TensorRT required before ONNX
        f[1] = export_engine(model, im, file, train, half, simplify, workspace, verbose)
    if onnx or xml:  # OpenVINO requires ONNX
        f[2] = export_onnx(model, im, file, opset, train, dynamic, simplify)
    if xml:  # OpenVINO
        f[3] = export_openvino(model, im, file)
    if coreml:
        _, f[4] = export_coreml(model, im, file)

    # TensorFlow Exports
    if any((saved_model, pb, tflite, edgetpu, tfjs)):
        if int8 or edgetpu:  # TFLite --int8 bug https://github.com/ultralytics/yolov5/issues/5707
            check_requirements(('flatbuffers==1.12',))  # required before `import tensorflow`
        assert not (tflite and tfjs), 'TFLite and TF.js models must be exported separately, please pass only one type.'
        model, f[5] = export_saved_model(model, im, file, dynamic, tf_nms=nms or agnostic_nms or tfjs,
                                         agnostic_nms=agnostic_nms or tfjs, topk_per_class=topk_per_class,
                                         topk_all=topk_all, conf_thres=conf_thres, iou_thres=iou_thres)  # keras model
        if pb or tfjs:  # pb prerequisite to tfjs
            f[6] = export_pb(model, im, file)
        if tflite or edgetpu:
            f[7] = export_tflite(model, im, file, int8=int8 or edgetpu, data=data, ncalib=100)
        if edgetpu:
            f[8] = export_edgetpu(model, im, file)
        if tfjs:
            f[9] = export_tfjs(model, im, file)

    # Finish
    f = [str(x) for x in f if x]  # filter out '' and None
    if any(f):
        LOGGER.info(f'\nExport complete ({time.time() - t:.2f}s)'
                    f"\nResults saved to {colorstr('bold', file.parent.resolve())}"
                    f"\nDetect:          python detect.py --weights {f[-1]}"
                    f"\nPyTorch Hub:     model = torch.hub.load('ultralytics/yolov5', 'custom', '{f[-1]}')"
                    f"\nValidate:        python val.py --weights {f[-1]}"
                    f"\nVisualize:       https://netron.app")
    return f  # return list of exported files/dirs

然后修改yolo.py中的forward函数，增加分支判断

python 复制代码

    def forward(self, x):
        if self.export:
            print("self.export===============",self.export)
            z = []  # inference output
            for i in range(self.nl):
                x[i] = self.m[i](x[i])  # conv
                bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
                x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()

                if not self.training:  # inference
                    if self.onnx_dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:
                        self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)

                    y = x[i].sigmoid()
                    if self.inplace:
                        y[..., 0:2] = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i]  # xy
                        y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh 
                        z.append(y.view(bs, -1, self.no))                    
                    else:  # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953
                        xy = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i]  # xy
                        wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
                        
                        anchor, conf, prob = torch.split(y, [4, 1, self.nc], dim=4)
                        # add a idx (label ids before prob)

                        # ori
                        idxs = torch.argmax(prob, dim=-1).unsqueeze(axis=-1).type(x[i].dtype)
                        # new
                        #idxs = torch.max(prob, dim=-1)[1].data.unsqueeze(axis=-1).type(x[i].dtype)
                        
                        
                        y = torch.cat((xy, wh, conf, idxs, prob), -1)
                        z.append(y.view(bs, -1, self.no + 1))
            return x if self.training else (torch.cat(z, 1))
        else:
            print("self.export===============",self.export)
            z = []  # inference output
            for i in range(self.nl):
                x[i] = self.m[i](x[i])  # conv
                bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
                x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()

                if not self.training:  # inference
                    if self.onnx_dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:
                        self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)

                    y = x[i].sigmoid()
                    if self.inplace:
                        y[..., 0:2] = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i]  # xy
                        y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
                    else:  # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953
                        xy = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i]  # xy
                        wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
                        y = torch.cat((xy, wh, y[..., 4:]), -1)
                    z.append(y.view(bs, -1, self.no))

            return x if self.training else (torch.cat(z, 1), x)

这样就可以实现train和export分别跑不同的代码了。