YOLO26 增加 LoRA 支持（参数高效微调 PEFT）

什么是 LoRA（Low-Rank Adaptation）

不训练完整模型权重，只训练低秩小矩阵
冻结主干网络，仅训练 1%~2% 参数
速度快、显存占用低、不会过拟合
推理时可直接融合进原权重，不增加延迟

bash 复制代码

原始权重矩阵 W (D × K)
          ↓  冻结不动
LoRA 插入两个小矩阵 A、B
          A: D × r
          B: r × K
          r ≪ min(D,K)  # 秩非常小（默认8/16）

前向传播：
h = W x + α * B A x

推理融合：
W_final = W + (B A) * α
完全变成普通卷积，无额外计算

1. 配置文件

yaml 复制代码

# YOLO26 训练配置 yolo26-lora.yaml
lora: True  # 开启 LoRA
lora_rank: 8
lora_alpha: 16
lora_dropout: 0.0
freeze_backbone: True  # 冻结主干，只训练 LoRA

2. lora.py 文件

python 复制代码

import torch
import torch.nn as nn
import torch.nn.functional as F

# ---------------------------
# LoRA 卷积层（适配 YOLO26 Conv）
# ---------------------------
class LoRAConv2d(nn.Module):
    def __init__(self, conv, r=8, lora_alpha=16, dropout=0.0):
        super().__init__()
        self.conv = conv  # 原始卷积（冻结）
        self.r = r
        self.lora_alpha = lora_alpha
        self.scaling = lora_alpha / r

        # 冻结原始权重
        for p in self.conv.parameters():
            p.requires_grad = False

        # LoRA 低秩矩阵
        in_channels = conv.in_channels
        out_channels = conv.out_channels
        ksize = conv.kernel_size[0]

        # 只对 1x1 / 3x3 卷积做 LoRA
        if ksize == 1:
            self.lora_down = nn.Conv2d(in_channels, r, 1, bias=False)
            self.lora_up = nn.Conv2d(r, out_channels, 1, bias=False)
        elif ksize == 3:
            self.lora_down = nn.Conv2d(in_channels, r, 3, padding=1, groups=1, bias=False)
            self.lora_up = nn.Conv2d(r, out_channels, 1, bias=False)
        else:
            self.lora_down = None
            self.lora_up = None

        # 初始化
        nn.init.normal_(self.lora_down.weight, std=1e-5)
        nn.init.zeros_(self.lora_up.weight)

        self.dropout = nn.Dropout(dropout) if dropout > 0 else nn.Identity()

    def forward(self, x):
        # 原始卷积输出
        out = self.conv(x)
        
        # LoRA 增量
        if self.lora_down is not None:
            lora_out = self.lora_up(self.dropout(self.lora_down(x))) * self.scaling
            out = out + lora_out
        return out

# ---------------------------
# LoRA 线性层（适配 YOLO26 Detect）
# ---------------------------
class LoRALinear(nn.Module):
    def __init__(self, linear, r=8, lora_alpha=16, dropout=0.0):
        super().__init__()
        self.linear = linear
        for p in linear.parameters():
            p.requires_grad = False

        self.r = r
        self.scaling = lora_alpha / r

        in_features = linear.in_features
        out_features = linear.out_features

        self.lora_down = nn.Linear(in_features, r, bias=False)
        self.lora_up = nn.Linear(r, out_features, bias=False)
        nn.init.normal_(self.lora_down.weight, std=1e-5)
        nn.init.zeros_(self.lora_up.weight)

        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        out = self.linear(x)
        lora_out = self.lora_up(self.dropout(self.lora_down(x))) * self.scaling
        return out + lora_out

# ---------------------------
# 自动注入 LoRA（核心！无模型侵入）
# ---------------------------
def inject_lora(model, r=8, alpha=16, dropout=0.0, target_modules=None):
    """
    自动给 YOLO26 注入 LoRA
    不修改模型代码，不破坏结构
    """
    if target_modules is None:
        target_modules = [
            'conv', 'cv1', 'cv2', 'cv3',  # Conv 模块
            'q', 'k', 'v', 'proj',       # Transformer（如果有）
            'fc', 'linear'               # 检测头线性层
        ]

    # 递归替换层
    for name, module in model.named_children():
        # 替换 Conv2d
        if isinstance(module, nn.Conv2d) and any(k in name for k in target_modules):
            if module.weight.requires_grad:  # 只替换可训练层
                setattr(model, name, LoRAConv2d(module, r, alpha, dropout))
        
        # 替换 Linear
        elif isinstance(module, nn.Linear) and any(k in name for k in target_modules):
            setattr(model, name, LoRALinear(module, r, alpha, dropout))
        
        # 递归
        else:
            inject_lora(module, r, alpha, dropout, target_modules)

# ---------------------------
# 合并 LoRA 权重（推理提速）
# ---------------------------
def merge_lora(model):
    """将 LoRA 合并到原始权重，推理速度和原生模型完全一致"""
    for name, module in model.named_modules():
        if isinstance(module, (LoRAConv2d, LoRALinear)):
            # 合并卷积
            if isinstance(module, LoRAConv2d):
                w = module.conv.weight
                lora_w = module.lora_up.weight @ module.lora_down.weight * module.scaling
                if module.conv.kernel_size == (1,1):
                    w.data += lora_w
                elif module.conv.kernel_size == (3,3):
                    w.data += lora_w.view(w.shape)

            # 合并线性
            elif isinstance(module, LoRALinear):
                w = module.linear.weight
                lora_w = module.lora_up.weight @ module.lora_down.weight * module.scaling
                w.data += lora_w

3. 集成到 YOLO26

python 复制代码

from lora import inject_lora, merge_lora

# 加载 YOLO26 模型
model = YOLO26(cfg='yolo26.yaml').cuda()

# ------------------  LoRA 开启  ------------------
if cfg.get('lora', False):
    print("✅ 启用 LoRA 参数高效微调")
    inject_lora(
        model,
        r=cfg.get('lora_rank', 8),
        alpha=cfg.get('lora_alpha', 16),
        dropout=cfg.get('lora_dropout', 0.0)
    )

# 训练后合并（推理）
# merge_lora(model)

4. 训练配置文件

yolo26_lora_train.yaml

yaml 复制代码

epochs: 100
batch: 16
imgsz: 640
freeze_backbone: True  # 冻结主干

# LoRA 设置
lora: True
lora_rank: 8
lora_alpha: 16
lora_dropout: 0.0

optimizer: AdamW
lr0: 0.001
weight_decay: 0.0001