目录
一、GhostNetV2核心代码
在models文件夹下新建modules文件夹,在modules文件夹下新建一个py文件。这里为GhostV2.py。复制以下代码到文件里面。
python
# TODO: ghostnetv2
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
def autopad(k, p=None, d=1):
"""
Pads kernel to 'same' output shape, adjusting for optional dilation; returns padding size.
`k`: kernel, `p`: padding, `d`: dilation.
"""
if d > 1:
k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k] # actual kernel-size
if p is None:
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
return p
class Conv(nn.Module):
"""Applies a convolution, batch normalization, and activation function to an input tensor in a neural network."""
default_act = nn.SiLU() # default activation
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
"""Initializes a standard convolution layer with optional batch normalization and activation."""
super().__init__()
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)
self.bn = nn.BatchNorm2d(c2)
self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
def forward(self, x):
"""Applies a convolution followed by batch normalization and an activation function to the input tensor `x`."""
return self.act(self.bn(self.conv(x)))
def forward_fuse(self, x):
"""Applies a fused convolution and activation function to the input tensor `x`."""
return self.act(self.conv(x))
class Bottleneck(nn.Module):
"""A bottleneck layer with optional shortcut and group convolution for efficient feature extraction."""
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5):
"""Initializes a standard bottleneck layer with optional shortcut and group convolution, supporting channel
expansion.
"""
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c_, c2, 3, 1, g=g)
self.add = shortcut and c1 == c2
def forward(self, x):
"""Processes input through two convolutions, optionally adds shortcut if channel dimensions match; input is a
tensor.
"""
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
class C3(nn.Module):
"""Implements a CSP Bottleneck module with three convolutions for enhanced feature extraction in neural networks."""
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
"""Initializes C3 module with options for channel count, bottleneck repetition, shortcut usage, group
convolutions, and expansion.
"""
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c1, c_, 1, 1)
self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2)
self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
def forward(self, x):
"""Performs forward propagation using concatenated outputs from two convolutions and a Bottleneck sequence."""
return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))
class DWConv(Conv):
"""Implements a depth-wise convolution layer with optional activation for efficient spatial filtering."""
def __init__(self, c1, c2, k=1, s=1, d=1, act=True):
"""Initializes a depth-wise convolution layer with optional activation; args: input channels (c1), output
channels (c2), kernel size (k), stride (s), dilation (d), and activation flag (act).
"""
super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act)
class GhostConvV2(nn.Module):
# Ghostv2 Convolution https://github.com/huawei-noah/Efficient-AI-Backbones/tree/master/ghostnetv2_pytorch
def __init__(self, c1, c2, k=1, s=1, g=1, act=True, mode=None): # ch_in, ch_out, kernel, stride, groups
super(GhostConvV2, self).__init__()
c_ = c2 // 2 # hidden channels
self.cv1 = Conv(c1, c_, k, s, None, g, act=act)
self.cv2 = Conv(c_, c_, 5, 1, None, c_, act=act)
self.mode = mode
self.gate_fn = nn.Sigmoid()
if mode in ['attn']:
self.short_conv = nn.Sequential(
nn.Conv2d(c1, c2, k, s, k // 2, bias=False),
nn.BatchNorm2d(c2),
nn.Conv2d(c2, c2, kernel_size=(1, 5), stride=1, padding=(0, 2), groups=c2, bias=False),
nn.BatchNorm2d(c2),
nn.Conv2d(c2, c2, kernel_size=(5, 1), stride=1, padding=(2, 0), groups=c2, bias=False),
nn.BatchNorm2d(c2)
)
def forward(self, x):
y = self.cv1(x)
if self.mode in ['attn']:
res = self.short_conv(F.avg_pool2d(x, kernel_size=2, stride=2))
# res=self.short_conv(x)
out = torch.cat((y, self.cv2(y)), 1)
return out * F.interpolate(self.gate_fn(res), size=(out.shape[-2], out.shape[-1]),
mode='nearest')
return torch.cat((y, self.cv2(y)), 1)
class GhostBottleneckV2(nn.Module):
# Ghostv2 Convolution https://github.com/huawei-noah/Efficient-AI-Backbones/tree/master/ghostnetv2_pytorch
def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride
super().__init__()
c_ = c2 // 2
self.conv = nn.Sequential(
GhostConvV2(c1, c_, 1, 1, mode='attn'), # pw
DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw
GhostConvV2(c_, c2, 1, 1, act=False)) # pw-linear
self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False), Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
def forward(self, x):
return self.conv(x) + self.shortcut(x)
class C3GhostV2(C3):
# C3 module with Ghostv2Bottleneck()
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
super().__init__(c1, c2, n, shortcut, g, e)
c_ = int(c2 * e) # hidden channels
self.m = nn.Sequential(*(GhostBottleneckV2(c_, c_) for _ in range(n)))
注意:很多改进教程都是将代码直接复制到common.py文件,如果改进机制多了容易造成混乱。建议创建一个modules文件夹,将改进机制放里面方便管理。
二、修改common.py
在common.py文件中,在前面的部分添加以下代码,导入GhostV2.py的内容:
python
from models.modules.Ghostv2 import *
三、修改yolo.py
在yolo.py文件中,在导入common模块的上面一行添加以下代码,导入GhostV2.py的内容:
python
from models.modules.Ghostv2 import *
注意:这里位置不要搞错,不然可能会找不到导入的模块。
如下图所示:
找到parse_model函数,将GhostConvV2卷积和C3GhostV2模块加入,如下图所示:
三、建立yaml文件
在models文件夹下,新建yaml文件,这里命名为yolov5-GhostNetv2.yaml。
将以下代码复制进文件。
python
# YOLOv5 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
anchors:
- [10, 13, 16, 30, 33, 23] # P3/8
- [30, 61, 62, 45, 59, 119] # P4/16
- [116, 90, 156, 198, 373, 326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, GhostConvV2, [128, 3, 2]], # 1-P2/4
[-1, 3, C3GhostV2, [128]],
[-1, 1, GhostConvV2, [256, 3, 2]], # 3-P3/8
[-1, 6, C3GhostV2, [256]],
[-1, 1, GhostConvV2, [512, 3, 2]], # 5-P4/16
[-1, 9, C3GhostV2, [512]],
[-1, 1, GhostConvV2, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3GhostV2, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head: [
[-1, 1, GhostConvV2, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3GhostV2, [512, False]], # 13
[-1, 1, GhostConvV2, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3GhostV2, [256, False]], # 17 (P3/8-small)
[-1, 1, GhostConvV2, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3GhostV2, [512, False]], # 20 (P4/16-medium)
[-1, 1, GhostConvV2, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3GhostV2, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]
至此更改完成。
四、训练
由于更改了主干网络,weights权重选不选都是从头开始训练。
在data下打开coco.yaml文件,对路径进行修改。尽量用绝对路径,相对路径容易报错。
在yolov5下找到train.py文件,对里面参数的进行修改。
目前主要对--weight、--cfg、--data进行参数设置。其他默认即可。
--weight:先选用官方的yolov5.pt权重。自己训练完后可更换为自己的权重。
注意:这里由于更改了主干网络,weights权重选不选都是从头开始训练。
--cfg:选用刚刚在models下建立的yolov5-GhostNetv2.yaml。
--data:选用上面修改过路径的coco.yaml。
--batch-size:默认是16。如果出现以下问题,提示内存不足的情况,建议设置为8。(batch-size的设置一般为8的整数倍)
打开train.py。ctrl+shift+p 在弹出框窗口搜索Python:选择解释器,选择自己创建的Python虚拟环境,这里是yolo。
点击右上角运行程序。等待训练结束即可。
训练结果会保存在run文件夹下。
五、GFLOPs不显示
修改完后发现运行yolo.py时,发现GFLOPs指标无法显示。(yolo的运行和运行train一样,需修改cfg网络配置等参数)
解决方法如下:在utils文件夹下,打开torch_utils.py文件,找到model_info函数进行修改。YOLOv8 四可以在ultralytics/utils/torch utils.py中的get flops 函数中修改。
python
try: # FLOPs
p = next(model.parameters())
stride = max(int(model.stride.max()), 32) if hasattr(model, 'stride') else 32 # max stride
im = torch.empty((1, p.shape[1], stride, stride), device=p.device) # input image in BCHW format
flops = thop.profile(deepcopy(model), inputs=(im, ), verbose=False)[0] / 1E9 * 2 # stride GFLOPs
imgsz = imgsz if isinstance(imgsz, list) else [imgsz, imgsz] # expand if int/float
fs = f', {flops * imgsz[0] / stride * imgsz[1] / stride:.1f} GFLOPs' # 640x640 GFLOPs
except Exception as e:
# print(e)
im = torch.rand(1, 3, 640, 640).to(p.device)
flops, parms = thop.profile(model, inputs=(im,), verbose=False)
# print(f'Params: {parms}, GFLOPs: {flops * 2 / 1e9}')
# fs=''
fs = f', {flops * 2 / 1E9:.1f} GFLOPs'
修改完后,再次运行。