YOLOv8/YOLOv11改进 添加CBAM、GAM、SimAM、EMA、CAA、ECA、CA等多种注意力机制

目录

前言

CBAM

GAM

SimAM

EMA

CAA

ECA

CA

添加方法

YAML文件添加

使用改进训练


前言

本篇文章将为大家介绍Ultralytics/YOLOv8/YOLOv11中常用注意力机制的添加,可以满足一些简单的涨点需求。本文仅写方法,原理不多讲解,需要可跳转论文查看,文章中出现的所有结构示意图都来自论文中。

改进模块的教程制作不易,如果这篇文章对你有帮助的话,请点赞、收藏和打赏!

CBAM

论文:点击查看论文

结构示意图

代码

python 复制代码
import torch
import torch.nn as nn

class ChannelAttention(nn.Module):
    def __init__(self, channels: int) -> None:
        """Initializes the class and sets the basic configurations and instance variables required."""
        super().__init__()
        self.pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Conv2d(channels, channels, 1, 1, 0, bias=True)
        self.act = nn.Sigmoid()

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """Applies forward pass using activation on convolutions of the input, optionally using batch normalization."""
        return x * self.act(self.fc(self.pool(x)))

class SpatialAttention(nn.Module):
    """Spatial-attention module."""

    def __init__(self, kernel_size=7):
        """Initialize Spatial-attention module with kernel size argument."""
        super().__init__()
        assert kernel_size in (3, 7), "kernel size must be 3 or 7"
        padding = 3 if kernel_size == 7 else 1
        self.cv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
        self.act = nn.Sigmoid()

    def forward(self, x):
        """Apply channel and spatial attention on input for feature recalibration."""
        return x * self.act(self.cv1(torch.cat([torch.mean(x, 1, keepdim=True), torch.max(x, 1, keepdim=True)[0]], 1)))

class CBAM(nn.Module):
    """Convolutional Block Attention Module."""

    def __init__(self, c1, kernel_size=7):
        """Initialize CBAM with given input channel (c1) and kernel size."""
        super().__init__()
        self.channel_attention = ChannelAttention(c1)
        self.spatial_attention = SpatialAttention(kernel_size)

    def forward(self, x):
        """Applies the forward pass through C1 module."""
        return self.spatial_attention(self.channel_attention(x))

GAM

论文:点击查看论文

结构示意图

代码

python 复制代码
import torch
import torch.nn as nn

class GAM(nn.Module):
    def __init__(self, in_channels, rate=4):
        super().__init__()
        out_channels = in_channels
        in_channels = int(in_channels)
        out_channels = int(out_channels)
        inchannel_rate = int(in_channels / rate)

        self.linear1 = nn.Linear(in_channels, inchannel_rate)
        self.relu = nn.ReLU(inplace=True)
        self.linear2 = nn.Linear(inchannel_rate, in_channels)

        self.conv1 = nn.Conv2d(in_channels, inchannel_rate, kernel_size=7, padding=3, padding_mode='replicate')

        self.conv2 = nn.Conv2d(inchannel_rate, out_channels, kernel_size=7, padding=3, padding_mode='replicate')

        self.norm1 = nn.BatchNorm2d(inchannel_rate)
        self.norm2 = nn.BatchNorm2d(out_channels)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        b, c, h, w = x.shape
        # B,C,H,W ==> B,H*W,C
        x_permute = x.permute(0, 2, 3, 1).view(b, -1, c)

        # B,H*W,C ==> B,H,W,C
        x_att_permute = self.linear2(self.relu(self.linear1(x_permute))).view(b, h, w, c)

        # B,H,W,C ==> B,C,H,W
        x_channel_att = x_att_permute.permute(0, 3, 1, 2)

        x = x * x_channel_att

        x_spatial_att = self.relu(self.norm1(self.conv1(x)))
        x_spatial_att = self.sigmoid(self.norm2(self.conv2(x_spatial_att)))

        out = x * x_spatial_att

        return out

if __name__ == '__main__':
    img = torch.rand(1, 64, 32, 48)
    b, c, h, w = img.shape
    net = GAM(in_channels=c, out_channels=c)
    output = net(img)
    print(output.shape)

SimAM

论文:点击查看论文

结构示意图

代码

python 复制代码
import torch
import torch.nn as nn


class SimAM(torch.nn.Module):
    def __init__(self, e_lambda=1e-4):
        super(SimAM, self).__init__()

        self.activaton = nn.Sigmoid()
        self.e_lambda = e_lambda

    def __repr__(self):
        s = self.__class__.__name__ + '('
        s += ('lambda=%f)' % self.e_lambda)
        return s

    @staticmethod
    def get_module_name():
        return "simam"

    def forward(self, x):
        b, c, h, w = x.size()

        n = w * h - 1

        x_minus_mu_square = (x - x.mean(dim=[2, 3], keepdim=True)).pow(2)
        y = x_minus_mu_square / (4 * (x_minus_mu_square.sum(dim=[2, 3], keepdim=True) / n + self.e_lambda)) + 0.5

        return x * self.activaton(y)


if __name__ == '__main__':
    input = torch.randn(3, 64, 7, 7)
    model = SimAM()
    outputs = model(input)
    print(outputs.shape)

EMA

论文:点击查看论文

结构图

代码

python 复制代码
import torch
from torch import nn

class EMA(nn.Module):
    def __init__(self, channels, factor=8):
        super(EMA, self).__init__()
        self.groups = factor
        assert channels // self.groups > 0
        self.softmax = nn.Softmax(-1)
        self.agp = nn.AdaptiveAvgPool2d((1, 1))
        self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
        self.pool_w = nn.AdaptiveAvgPool2d((1, None))
        self.gn = nn.GroupNorm(channels // self.groups, channels // self.groups)
        self.conv1x1 = nn.Conv2d(channels // self.groups, channels // self.groups, kernel_size=1, stride=1, padding=0)
        self.conv3x3 = nn.Conv2d(channels // self.groups, channels // self.groups, kernel_size=3, stride=1, padding=1)

    def forward(self, x):
        b, c, h, w = x.size()
        group_x = x.reshape(b * self.groups, -1, h, w)  # b*g,c//g,h,w
        x_h = self.pool_h(group_x)
        x_w = self.pool_w(group_x).permute(0, 1, 3, 2)
        hw = self.conv1x1(torch.cat([x_h, x_w], dim=2))
        x_h, x_w = torch.split(hw, [h, w], dim=2)
        x1 = self.gn(group_x * x_h.sigmoid() * x_w.permute(0, 1, 3, 2).sigmoid())
        x2 = self.conv3x3(group_x)
        x11 = self.softmax(self.agp(x1).reshape(b * self.groups, -1, 1).permute(0, 2, 1))
        x12 = x2.reshape(b * self.groups, c // self.groups, -1)  # b*g, c//g, hw
        x21 = self.softmax(self.agp(x2).reshape(b * self.groups, -1, 1).permute(0, 2, 1))
        x22 = x1.reshape(b * self.groups, c // self.groups, -1)  # b*g, c//g, hw
        weights = (torch.matmul(x11, x12) + torch.matmul(x21, x22)).reshape(b * self.groups, 1, h, w)
        return (group_x * weights.sigmoid()).reshape(b, c, h, w)

CAA

论文:点击查看论文

结构示意图

代码

python 复制代码
import torch.nn as nn

def autopad(k, p=None, d=1):  # kernel, padding, dilation
    """Pad to 'same' shape outputs."""
    if d > 1:
        k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k]  # actual kernel-size
    if p is None:
        p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad
    return p


class Conv(nn.Module):
    """Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation)."""

    default_act = nn.SiLU()  # default activation

    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
        """Initialize Conv layer with given arguments including activation."""
        super().__init__()
        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)
        self.bn = nn.BatchNorm2d(c2)
        self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()

    def forward(self, x):
        """Apply convolution, batch normalization and activation to input tensor."""
        return self.act(self.bn(self.conv(x)))

    def forward_fuse(self, x):
        """Perform transposed convolution of 2D data."""
        return self.act(self.conv(x))

class CAA(nn.Module):
    def __init__(self, ch, h_kernel_size = 11, v_kernel_size = 11) -> None:
        super().__init__()
        
        self.avg_pool = nn.AvgPool2d(7, 1, 3)
        self.conv1 = Conv(ch, ch)
        self.h_conv = nn.Conv2d(ch, ch, (1, h_kernel_size), 1, (0, h_kernel_size // 2), 1, ch)
        self.v_conv = nn.Conv2d(ch, ch, (v_kernel_size, 1), 1, (v_kernel_size // 2, 0), 1, ch)
        self.conv2 = Conv(ch, ch)
        self.act = nn.Sigmoid()
    
    def forward(self, x):
        attn_factor = self.act(self.conv2(self.v_conv(self.h_conv(self.conv1(self.avg_pool(x))))))
        return attn_factor * x

ECA

论文:点击查看论文

结构示意图

代码

python 复制代码
import torch, math
from torch import nn

class EfficientChannelAttention(nn.Module):           # Efficient Channel Attention module
    def __init__(self, c, b=1, gamma=2):
        super(EfficientChannelAttention, self).__init__()
        t = int(abs((math.log(c, 2) + b) / gamma))
        k = t if t % 2 else t + 1

        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.conv1 = nn.Conv1d(1, 1, kernel_size=k, padding=int(k/2), bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        out = self.avg_pool(x)
        out = self.conv1(out.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1)
        out = self.sigmoid(out)
        return out * x


if __name__ == '__main__':
    input = torch.randn(50, 512, 7, 7)
    eca = EfficientChannelAttention(c=512)
    output = eca(input)
    print(output.shape)

CA

论文:点击查看论文

结构示意图

代码

python 复制代码
import torch
import torch.nn as nn

class h_sigmoid(nn.Module):
    def __init__(self, inplace=True):
        super(h_sigmoid, self).__init__()
        self.relu = nn.ReLU6(inplace=inplace)
        
    def forward(self, x):
        return self.relu(x + 3) / 6

class h_swish(nn.Module):
    def __init__(self, inplace=True):
        super(h_swish, self).__init__()
        self.sigmoid = h_sigmoid(inplace=inplace)

    def forward(self, x):
        return x * self.sigmoid(x)

class CA(nn.Module):
    def __init__(self, inp, reduction=32):
        super(CA, self).__init__()
        self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
        self.pool_w = nn.AdaptiveAvgPool2d((1, None))

        mip = max(8, inp // reduction)

        self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
        self.bn1 = nn.BatchNorm2d(mip)
        self.act = h_swish()

        self.conv_h = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
        self.conv_w = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)

    def forward(self, x):
        identity = x
        n, c, h, w = x.size()
        x_h = self.pool_h(x)
        x_w = self.pool_w(x).permute(0, 1, 3, 2)
        y = torch.cat([x_h, x_w], dim=2)
        y = self.conv1(y)
        y = self.bn1(y)
        y = self.act(y)
        x_h, x_w = torch.split(y, [h, w], dim=2)
        x_w = x_w.permute(0, 1, 3, 2)
        a_h = self.conv_h(x_h).sigmoid()
        a_w = self.conv_w(x_w).sigmoid()
        out = identity * a_w * a_h
        return out

添加方法

1.创建文件

先在ultralytics-main/ultralytics/nn 目录下创建AddAttention文件夹,然后复制其中一个想使用的注意力机制到AddAttention文件夹下创建py文件,比如CBAM.py,然后同样在该目录下创建一个__init__.py 文件,里面用哪个注意力机制就导入哪个文件,比如

python 复制代码
from .CBAM import *
from .GAM import *
from .SimAM import *
from .EMA import *
from .CAA import *
from .ECA import *
from .CA import *

用哪个导入哪个就可以,没有创建py文件不要导入,会报错

2.导入文件

找到ultralytics-main/ultralytics/nn/tasks.py,打开后先导入AddAttention文件夹中所有的库,输入

python 复制代码
from .AddAttention import *

效果如下图,可以在前几行任意一行位置添加。

然后往下翻找到 parse_model 函数,在elif m is AIFI 前面添加。

python 复制代码
        #注意力机制
        elif m in {CBAM, GAM, EMA, ECA, CA, CAA}:
            c2 = ch[f]
            args = [c2, *args]

大概是1000行左右的位置,添加到elif m is AIFI 前面,注意缩进。用到哪个添加哪个,没有用到的添加会报错,注意SimAM没有参数传入,所以这一步不需要添加SimAM。

YAML文件添加

模块需要加入到YAML文件中并使用训练才算被应用的改进,YOLOv11的YAML如下,

在ultralytics-main/ultralytics/cfg/models/11 目录下新建一个 yolo11n-CBAM.yaml ,其它注意力机制请自行修改文件名,本文列出了很多注意力机制可添加的位置,使用时留一个两个就可以。

bash 复制代码
# Ultralytics YOLO 🚀, AGPL-3.0 license
# YOLO11 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect

# Parameters
nc: 80 # number of classes
scales: # model compound scaling constants, i.e. 'model=yolo11n.yaml' will call yolo11.yaml with scale 'n'
  # [depth, width, max_channels]
  n: [0.50, 0.25, 1024] # summary: 319 layers, 2624080 parameters, 2624064 gradients, 6.6 GFLOPs
  s: [0.50, 0.50, 1024] # summary: 319 layers, 9458752 parameters, 9458736 gradients, 21.7 GFLOPs
  m: [0.50, 1.00, 512] # summary: 409 layers, 20114688 parameters, 20114672 gradients, 68.5 GFLOPs
  l: [1.00, 1.00, 512] # summary: 631 layers, 25372160 parameters, 25372144 gradients, 87.6 GFLOPs
  x: [1.00, 1.50, 512] # summary: 631 layers, 56966176 parameters, 56966160 gradients, 196.0 GFLOPs

# YOLO11n backbone
backbone:
  # [from, repeats, module, args]
  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
  - [-1, 2, C3k2, [256, False, 0.25]]
  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
  - [-1, 2, C3k2, [512, False, 0.25]]
  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
  - [-1, 2, C3k2, [512, True]]
  - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
  - [-1, 2, C3k2, [1024, True]] #8
  - [-1, 1, CBAM, []] #9
  - [-1, 1, SPPF, [1024, 5]] # 10
  - [-1, 1, CBAM, []]    # 11
  - [-1, 2, C2PSA, [1024]] # 12
  - [-1, 1, CBAM, []]  #13
  # - [-1, 1, GAM, []]  # 13
  # - [-1, 1, SimAM, []]  # 13
  # - [-1, 1, EMA, []]  # 13
  # - [-1, 1, CAA, []]  # 13
  # - [-1, 1, ECA, []]  # 13
  # - [-1, 1, CA, []]  # 13

# YOLO11n head
head:
  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
  - [-1, 2, C3k2, [512, False]] # 16
  - [-1, 1, CBAM, []]  #17

  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
  - [-1, 2, C3k2, [256, False]] # 20 (P3/8-small)
  - [-1, 1, CBAM, []]  #21

  - [-1, 1, Conv, [256, 3, 2]]
  - [[-1, 17], 1, Concat, [1]] # cat head P4
  - [-1, 2, C3k2, [512, False]] # 24 (P4/16-medium)
  - [-1, 1, CBAM, []]  #25

  - [-1, 1, Conv, [512, 3, 2]]
  - [[-1, 13], 1, Concat, [1]] # cat head P5
  - [-1, 2, C3k2, [1024, True]] # 28 (P5/32-large)
  - [-1, 1, CBAM, []]  #29

  - [[21, 25, 29], 1, Detect, [nc]] # Detect(P3, P4, P5)

多个位置可添加,多余的需要删除,CBAM可以替换为其它的,自行替换且在不同位置添加测试效果。

YOLOv8的YAML如下,在ultralytics-main/ultralytics/cfg/models/v8 目录下新建一个 yolov8n-CBAM.yaml,其它注意力机制请自行修改文件名。

bash 复制代码
# Ultralytics YOLO 🚀, AGPL-3.0 license
# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect

# Parameters
nc: 80 # number of classes
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
  # [depth, width, max_channels]
  n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers,  3157200 parameters,  3157184 gradients,   8.9 GFLOPs
  s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients,  28.8 GFLOPs
  m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients,  79.3 GFLOPs
  l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
  x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs

# YOLOv8.0n backbone
backbone:
  # [from, repeats, module, args]
  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
  - [-1, 3, C2f, [128, True]]
  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
  - [-1, 6, C2f, [256, True]]
  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
  - [-1, 6, C2f, [512, True]]
  - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
  - [-1, 3, C2f, [1024, True]]  #8
  - [-1, 1, CBAM, []]  #9
  - [-1, 1, SPPF, [1024, 5]] # 10
  - [-1, 1, CBAM, []]  #11
  # - [-1, 1, GAM, []]  # 11
  # - [-1, 1, SimAM, []]  # 11
  # - [-1, 1, EMA, []]  # 11
  # - [-1, 1, CAA, []]  # 11
  # - [-1, 1, ECA, []]  # 11
  # - [-1, 1, CA, []]  # 11

# YOLOv8.0n head
head:
  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
  - [-1, 3, C2f, [512]] # 14
  - [-1, 1, CBAM, []]  #15

  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
  - [-1, 3, C2f, [256]] # 18 (P3/8-small)
  - [-1, 1, CBAM, []]  #19

  - [-1, 1, Conv, [256, 3, 2]]
  - [[-1, 15], 1, Concat, [1]] # cat head P4
  - [-1, 3, C2f, [512]] # 22 (P4/16-medium)
  - [-1, 1, CBAM, []]  #23

  - [-1, 1, Conv, [512, 3, 2]]
  - [[-1, 11], 1, Concat, [1]] # cat head P5
  - [-1, 3, C2f, [1024]] # 26 (P5/32-large)
  - [-1, 1, CBAM, []]  #27

  - [[19, 23, 27], 1, Detect, [nc]] # Detect(P3, P4, P5)

需要绘制网络结构图的可看下面这篇文章。

YOLOv11/YOLOv8网络结构图绘制,本文适用于论文添加修改模块,绘制属于自己的网络结构图_在python用graphviz画yolov8算法结构图-CSDN博客文章浏览阅读1.4k次,点赞10次,收藏27次。本文将将会您绘制自己的YOLOv11/v8模型网络结构图,无论是针对初学者还是有经验的深度学习研究者,本文都将为您提供清晰的指导和实用的技巧,使您能够快速上手绘制高质量的网络结构图,为您的研究工作增添亮点。_在python用graphviz画yolov8算法结构图https://blog.csdn.net/qq_67105081/article/details/144703912?spm=1001.2014.3001.5501

使用改进训练

python 复制代码
from ultralytics import YOLO
if __name__ == '__main__': 
    model = YOLO('ultralytics/cfg/models/11/yolo11n-CBAM.yaml')  # 从YAML建立一个新模型
    #model.load('yolo11n.pt')
    # # 训练模型
    results = model.train(data='data.yaml',
                      epochs=100, imgsz=640, device=0, optimizer='SGD', workers=8, batch=64, amp=False)

使用YOLOv11训练代码做演示,YOLOv8则替换文件路径及文件名。

改进模块的教程制作不易,如果这篇文章对你有帮助的话,请点赞、收藏和打赏!

相关推荐
小兔崽子去哪了8 分钟前
RFM 模型 项目实战
python
昨天那个谁谁19 分钟前
ROS2运行时报无法加载create_key等符号错误
c++·python·ros2
nju_spy1 小时前
python 算法题基础常用总结(比赛 or 机试 or 面试)
python·记忆化搜索·位运算·二分查找 - bisect·排序与lambda·最短路和最小生成树·堆与优先队列
Deng8723473481 小时前
自动化极验3点选验证码的识别与验证方案
运维·python·自动化
川石课堂软件测试1 小时前
自动化测试的基本概念及常用框架
数据库·python·功能测试·测试工具·单元测试·自动化·流程图
灰勒塔德1 小时前
jetson orin nano super开发指南
linux·服务器·python
8278209372 小时前
python scp 备份
开发语言·python
TOYOAUTOMATON2 小时前
自动化工业夹爪
大数据·人工智能·算法·目标检测·机器人
poggioxay2 小时前
JAVA零基础入门知识3(持续更新中)
java·开发语言·python
serve the people2 小时前
TensorFlow 基础训练循环(简化版 + 补全代码)
人工智能·python·tensorflow