03-深度学习基础：LoRA与参数高效微调（PEFT）

LoRA与参数高效微调（PEFT）：大模型微调的标配

一、为什么需要参数高效微调？

1.1 全参数微调的困境

python 复制代码

import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

print("=" * 60)
print("全参数微调的挑战")
print("=" * 60)

# 模型大小与显存需求对比
models = {
    'BERT-Base': 110,
    'BERT-Large': 340,
    'GPT-2 Small': 124,
    'GPT-2 XL': 1558,
    'LLaMA-7B': 7000,
    'LLaMA-13B': 13000,
    'LLaMA-33B': 33000,
    'LLaMA-65B': 65000,
    'GPT-3': 175000,
    'GPT-4': 1000000
}

# 计算显存需求（粗略估计：参数量的4倍）
memory_needs = {name: size * 4 for name, size in models.items()}

display_models = ['BERT-Base', 'BERT-Large', 'LLaMA-7B', 'LLaMA-13B', 'LLaMA-33B', 'LLaMA-65B']
sizes = [models[m] for m in display_models]
memories = [memory_needs[m] for m in display_models]

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# 模型大小
ax1 = axes[0]
bars1 = ax1.barh(display_models, sizes, color='lightcoral')
ax1.set_xlabel('参数量 (百万)')
ax1.set_title('大模型参数量爆炸')
ax1.set_xscale('log')

for bar, size in zip(bars1, sizes):
    ax1.text(bar.get_width() * 1.1, bar.get_y() + bar.get_height()/2,
             f'{size}M', va='center', fontsize=9)

# 显存需求
ax2 = axes[1]
bars2 = ax2.barh(display_models, memories, color='lightblue')
ax2.set_xlabel('显存需求 (MB)')
ax2.set_title('全参数微调的显存需求')
ax2.set_xscale('log')

for bar, mem in zip(bars2, memories):
    ax2.text(bar.get_width() * 1.1, bar.get_y() + bar.get_height()/2,
             f'{mem}MB', va='center', fontsize=9)

# 标注消费级GPU限制
ax2.axvline(x=24000, color='red', linestyle='--', label='RTX 4090 (24GB)')
ax2.axvline(x=12000, color='orange', linestyle='--', label='RTX 3090 (12GB)')
ax2.legend()

plt.suptitle('大模型微调：消费级GPU无法承受之重', fontsize=14)
plt.tight_layout()
plt.show()

print("\n💡 全参数微调的问题:")
print("   1. 显存爆炸: 7B模型需要28GB+显存")
print("   2. 时间成本: 微调需要数小时到数天")
print("   3. 存储成本: 每个任务需要保存完整模型副本")
print("   4. 灾难性遗忘: 容易忘记预训练知识")
print("\n🚀 解决方案: 参数高效微调 (PEFT)")

二、LoRA：低秩适配

2.1 LoRA的核心思想

python 复制代码

def visualize_lora_principle():
    """可视化LoRA的核心原理"""
    
    fig, axes = plt.subplots(1, 2, figsize=(14, 6))
    
    # 1. LoRA的数学原理
    ax1 = axes[0]
    ax1.axis('off')
    ax1.set_title('LoRA核心公式', fontsize=12)
    
    formula = """
    🔬 LoRA的数学原理:
    
    原始权重更新:
        W' = W + ΔW
    
    LoRA的低秩分解:
        ΔW = B × A
        其中: 
        - W ∈ R^(d×k)  (原始权重)
        - A ∈ R^(r×k)  (低秩矩阵，r << d)
        - B ∈ R^(d×r)  (低秩矩阵，r << k)
    
    前向传播:
        h = Wx + BAx
    
    参数量对比:
        原始微调: d × k
        LoRA微调: r × (d + k)
    
    当 r << min(d, k) 时，参数大幅减少！
    """
    
    ax1.text(0.05, 0.95, formula, transform=ax1.transAxes, fontsize=10,
            verticalalignment='top', fontfamily='monospace')
    
    # 2. 低秩分解可视化
    ax2 = axes[1]
    
    # 原始矩阵
    d, k = 20, 16
    r = 2
    
    # 绘制矩阵
    def draw_matrix(ax, x, y, width, height, title, color='lightblue'):
        rect = plt.Rectangle((x, y), width, height, facecolor=color, ec='black', alpha=0.7)
        ax.add_patch(rect)
        ax.text(x + width/2, y + height/2, title, ha='center', va='center', fontsize=9)
        ax.text(x + width/2, y - 0.05, f'{int(height)}×{int(width)}', 
                ha='center', fontsize=8)
    
    # 原始权重 ΔW
    draw_matrix(ax2, 0.05, 0.3, 0.25, 0.4, 'ΔW\n(d×k)', 'lightcoral')
    
    # 分解为 B 和 A
    draw_matrix(ax2, 0.45, 0.5, 0.12, 0.2, 'B\n(d×r)', 'lightgreen')
    draw_matrix(ax2, 0.65, 0.3, 0.12, 0.2, 'A\n(r×k)', 'lightgreen')
    
    # 乘号
    ax2.text(0.59, 0.4, '×', fontsize=16, ha='center', va='center')
    
    # 等号
    ax2.text(0.35, 0.4, '=', fontsize=16, ha='center', va='center')
    
    # 参数计算示例
    d, k, r = 4096, 4096, 8
    original_params = d * k
    lora_params = r * (d + k)
    ratio = lora_params / original_params
    
    ax2.text(0.5, 0.15, f'示例: d={d}, k={k}, r={r}\n'
             f'原始参数: {original_params:,}\n'
             f'LoRA参数: {lora_params:,}\n'
             f'减少: {(1-ratio)*100:.1f}%', 
             ha='center', fontsize=9,
             bbox=dict(boxstyle='round', facecolor='lightyellow'))
    
    ax2.set_xlim(0, 1)
    ax2.set_ylim(0, 1)
    ax2.axis('off')
    
    plt.suptitle('LoRA：用低秩矩阵近似参数更新', fontsize=14)
    plt.tight_layout()
    plt.show()

visualize_lora_principle()

print("\n📊 LoRA的关键特点:")
print("   - 参数量: 仅为原始模型的0.1%-1%")
print("   - 显存: 大幅降低，消费级GPU可微调")
print("   - 速度: 训练速度提升3-5倍")
print("   - 效果: 与全参数微调相当甚至更好")
print("   - 可插拔: 不同任务可切换不同LoRA权重")

2.2 LoRA的优势可视化

python 复制代码

def visualize_lora_advantages():
    """可视化LoRA的优势"""
    
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    
    # 1. 显存对比
    ax1 = axes[0, 0]
    
    methods = ['全参数微调', 'LoRA微调', 'QLoRA']
    memories = [28000, 14000, 7000]  # 7B模型的显存需求(MB)
    colors = ['lightcoral', 'lightgreen', 'lightblue']
    
    bars = ax1.bar(methods, memories, color=colors)
    ax1.set_ylabel('显存需求 (MB)')
    ax1.set_title('7B模型显存对比')
    ax1.axhline(y=24000, color='red', linestyle='--', label='RTX 4090 (24GB)')
    ax1.axhline(y=12000, color='orange', linestyle='--', label='RTX 3090 (12GB)')
    
    for bar, mem in zip(bars, memories):
        ax1.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 500,
                f'{mem}MB', ha='center', va='bottom', fontsize=9)
    ax1.legend()
    
    # 2. 训练速度对比
    ax2 = axes[0, 1]
    
    methods = ['全参数微调', 'LoRA微调']
    speeds = [1.0, 3.5]
    
    bars = ax2.bar(methods, speeds, color=['lightcoral', 'lightgreen'])
    ax2.set_ylabel('相对训练速度')
    ax2.set_title('训练速度对比')
    
    for bar, speed in zip(bars, speeds):
        ax2.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.1,
                f'{speed}x', ha='center', va='bottom', fontsize=11)
    
    # 3. 可插拔性
    ax3 = axes[1, 0]
    ax3.axis('off')
    ax3.set_title('LoRA的可插拔架构', fontsize=12)
    
    # 基座模型
    base = plt.Rectangle((0.2, 0.6), 0.6, 0.15,
                          facecolor='lightgray', ec='black')
    ax3.add_patch(base)
    ax3.text(0.5, 0.675, '基座模型 (冻结)', ha='center', va='center', fontsize=10)
    
    # 多个LoRA模块
    tasks = ['任务A', '任务B', '任务C', '任务D']
    colors = ['lightcoral', 'lightgreen', 'lightblue', 'lightyellow']
    y_pos = 0.4
    for task, color in zip(tasks, colors):
        lora = plt.Rectangle((0.3, y_pos-0.03), 0.4, 0.08,
                              facecolor=color, ec='black')
        ax3.add_patch(lora)
        ax3.text(0.5, y_pos, f'LoRA-{task}', ha='center', va='center', fontsize=9)
        y_pos -= 0.12
    
    # 连接
    ax3.annotate('', xy=(0.5, 0.6), xytext=(0.5, 0.43),
                arrowprops=dict(arrowstyle='->', lw=1))
    
    ax3.text(0.5, 0.15, '切换任务只需加载不同LoRA权重！', 
            ha='center', fontsize=10,
            bbox=dict(boxstyle='round', facecolor='lightgreen'))
    
    # 4. 效果对比
    ax4 = axes[1, 1]
    
    datasets = ['GLUE', 'SuperGLUE', 'SQuAD', 'MNLI']
    full_finetune = [89.5, 72.3, 91.2, 87.4]
    lora = [89.2, 71.8, 90.8, 87.1]
    
    x = np.arange(len(datasets))
    width = 0.35
    
    ax4.bar(x - width/2, full_finetune, width, label='全参数微调', color='lightcoral')
    ax4.bar(x + width/2, lora, width, label='LoRA', color='lightgreen')
    ax4.set_xlabel('数据集')
    ax4.set_ylabel('准确率 (%)')
    ax4.set_title('LoRA vs 全参数微调效果对比')
    ax4.set_xticks(x)
    ax4.set_xticklabels(datasets)
    ax4.legend()
    
    plt.suptitle('LoRA的四大优势', fontsize=14)
    plt.tight_layout()
    plt.show()

visualize_lora_advantages()

三、使用PEFT库实现LoRA

3.1 环境配置与基础使用

python 复制代码

def peft_usage_demo():
    """PEFT库使用演示"""
    
    print("\n" + "=" * 60)
    print("使用PEFT库实现LoRA")
    print("=" * 60)
    
    # 模拟代码（实际需要安装peft库）
    demo_code = """
# 安装依赖
# pip install peft transformers torch

from peft import LoraConfig, get_peft_model, TaskType
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch

# 1. 加载基座模型
model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

# 2. 配置LoRA
lora_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,      # 任务类型
    r=8,                              # 秩
    lora_alpha=32,                    # 缩放参数
    target_modules=["query", "value"],# 应用LoRA的模块
    lora_dropout=0.1,                 # Dropout率
    bias="none"                       # 偏置处理
)

# 3. 应用LoRA
lora_model = get_peft_model(model, lora_config)

# 4. 查看可训练参数
lora_model.print_trainable_parameters()
# 输出: trainable params: 884,736 || all params: 109,485,314 || trainable%: 0.81

# 5. 正常训练
# trainer = Trainer(model=lora_model, ...)
# trainer.train()

# 6. 保存和加载
lora_model.save_pretrained("./lora_weights")
# 加载时只需加载LoRA权重，基座模型可复用
"""
    
    print(demo_code)

peft_usage_demo()

3.2 LoRA配置参数详解

python 复制代码

def visualize_lora_config():
    """可视化LoRA配置参数"""
    
    fig, ax = plt.subplots(figsize=(12, 8))
    ax.axis('off')
    
    config_text = """
    ╔═══════════════════════════════════════════════════════════════════════════════╗
    ║                         LoRA配置参数详解                                       ║
    ╠═══════════════════════════════════════════════════════════════════════════════╣
    ║                                                                               ║
    ║  参数名          类型    默认值    说明                                        ║
    ║  ─────────────────────────────────────────────────────────────────────────── ║
    ║  r              int      8        低秩矩阵的秩，越大效果越好但参数越多        ║
    ║  lora_alpha     int      32       缩放系数，实际学习率 = lora_alpha / r       ║
    ║  target_modules List     None     应用LoRA的模块名（如["q", "v", "k"]）       ║
    ║  lora_dropout   float    0.0      Dropout率，防止过拟合                       ║
    ║  bias           str      "none"   偏置处理："none", "all", "lora_only"        ║
    ║  fan_in_fan_out bool     False    权重存储格式                                 ║
    ║  modules_to_save List    None     除LoRA外需要保存的模块                       ║
    ║                                                                               ║
    ║  参数选择建议:                                                                ║
    ║  ─────────────────────────────────────────────────────────────────────────── ║
    ║  • r=4-16: 大多数任务的最佳范围                                               ║
    ║  • lora_alpha=16-32: 保持 lora_alpha/r ≈ 2-4                                 ║
    ║  • target_modules: 根据模型架构选择                                           ║
    ║    - BERT: ["query", "value"]                                                ║
    ║    - LLaMA: ["q_proj", "v_proj", "k_proj", "o_proj"]                         ║
    ║  • lora_dropout=0.1: 数据量大时可增大                                         ║
    ║                                                                               ║
    ╚═══════════════════════════════════════════════════════════════════════════════╝
    """
    
    ax.text(0.05, 0.95, config_text, transform=ax.transAxes, fontsize=10,
            verticalalignment='top', fontfamily='monospace')
    ax.set_title('LoRA配置参数详解', fontsize=14, pad=20)
    
    plt.tight_layout()
    plt.show()

visualize_lora_config()

四、其他PEFT方法

4.1 PEFT方法全景

python 复制代码

def visualize_peft_methods():
    """可视化各种PEFT方法"""
    
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    
    # 1. Prefix Tuning
    ax1 = axes[0, 0]
    ax1.axis('off')
    ax1.set_title('Prefix Tuning', fontsize=11)
    
    # 可视化
    tokens = ['[PREFIX]', '[PREFIX]', '输入', 'token', '序列']
    x_pos = np.linspace(0.1, 0.9, len(tokens))
    
    for i, (token, x) in enumerate(zip(tokens, x_pos)):
        if token == '[PREFIX]':
            color = 'lightgreen'
        else:
            color = 'lightblue'
        circle = plt.Circle((x, 0.6), 0.06, color=color, ec='black')
        ax1.add_patch(circle)
        ax1.text(x, 0.6, token, ha='center', va='center', fontsize=7)
    
    ax1.text(0.5, 0.3, '在输入前添加可学习的前缀向量', ha='center', fontsize=9,
            bbox=dict(boxstyle='round', facecolor='lightyellow'))
    
    # 2. P-Tuning
    ax2 = axes[0, 1]
    ax2.axis('off')
    ax2.set_title('P-Tuning', fontsize=11)
    
    ax2.text(0.1, 0.7, '原始输入:', fontsize=9, fontweight='bold')
    ax2.text(0.3, 0.7, '这 个 电 影 很 好 看', fontsize=9)
    ax2.text(0.1, 0.5, 'P-Tuning:', fontsize=9, fontweight='bold')
    ax2.text(0.3, 0.5, '[P] [P] 这 个 电 影 很 好 看 [P]', fontsize=9)
    
    ax2.text(0.5, 0.2, '在序列中插入可学习的虚拟token', ha='center', fontsize=9,
            bbox=dict(boxstyle='round', facecolor='lightyellow'))
    
    # 3. Adapter
    ax3 = axes[1, 0]
    ax3.axis('off')
    ax3.set_title('Adapter', fontsize=11)
    
    # 绘制Transformer层 + Adapter
    transformer = plt.Rectangle((0.15, 0.5), 0.3, 0.15,
                                 facecolor='lightblue', ec='black')
    ax3.add_patch(transformer)
    ax3.text(0.3, 0.575, 'Transformer层', ha='center', va='center', fontsize=8)
    
    adapter = plt.Rectangle((0.55, 0.5), 0.3, 0.15,
                             facecolor='lightgreen', ec='black')
    ax3.add_patch(adapter)
    ax3.text(0.7, 0.575, 'Adapter\n(可训练)', ha='center', va='center', fontsize=8)
    
    # 连接
    ax3.annotate('', xy=(0.55, 0.575), xytext=(0.45, 0.575),
                arrowprops=dict(arrowstyle='->', lw=2))
    
    ax3.text(0.5, 0.3, '在Transformer层间插入小型适配器模块', ha='center', fontsize=9,
            bbox=dict(boxstyle='round', facecolor='lightyellow'))
    
    # 4. 方法对比表
    ax4 = axes[1, 1]
    ax4.axis('off')
    ax4.set_title('PEFT方法对比', fontsize=11)
    
    comparison = """
    ╔══════════════╦══════════════╦══════════════╦══════════════╗
    ║   方法       ║   参数量     ║   推理开销   ║   适用场景   ║
    ╠══════════════╬══════════════╬══════════════╬══════════════╣
    ║   LoRA       ║   极少       ║   无         ║   通用       ║
    ║   Prefix     ║   少         ║   小         ║   生成任务   ║
    ║   P-Tuning   ║   极少       ║   小         ║    NLU任务   ║
    ║   Adapter    ║   中等       ║   小         ║   通用       ║
    ║   IA3        ║   极少       ║   无         ║   通用       ║
    ╚══════════════╩══════════════╩══════════════╩══════════════╝
    """
    
    ax4.text(0.05, 0.95, comparison, transform=ax4.transAxes, fontsize=9,
            verticalalignment='top', fontfamily='monospace')
    
    plt.suptitle('参数高效微调(PEFT)方法全景', fontsize=14)
    plt.tight_layout()
    plt.show()

visualize_peft_methods()

五、QLoRA：进一步量化

5.1 QLoRA的原理

python 复制代码

def visualize_qlora():
    """可视化QLoRA的原理"""
    
    fig, axes = plt.subplots(1, 2, figsize=(14, 6))
    
    # 1. QLoRA架构
    ax1 = axes[0]
    ax1.axis('off')
    ax1.set_title('QLoRA架构', fontsize=12)
    
    # 4-bit基座模型
    base_4bit = plt.Rectangle((0.1, 0.6), 0.35, 0.2,
                               facecolor='lightcoral', ec='black')
    ax1.add_patch(base_4bit)
    ax1.text(0.275, 0.7, '4-bit量化\n基座模型', ha='center', va='center', fontsize=9)
    ax1.text(0.275, 0.63, '(冻结)', ha='center', va='center', fontsize=8)
    
    # LoRA适配器（16-bit）
    lora_adapter = plt.Rectangle((0.55, 0.6), 0.35, 0.2,
                                  facecolor='lightgreen', ec='black')
    ax1.add_patch(lora_adapter)
    ax1.text(0.725, 0.7, 'LoRA适配器\n(16-bit)', ha='center', va='center', fontsize=9)
    ax1.text(0.725, 0.63, '(可训练)', ha='center', va='center', fontsize=8)
    
    # 连接
    ax1.annotate('', xy=(0.55, 0.7), xytext=(0.45, 0.7),
                arrowprops=dict(arrowstyle='->', lw=2))
    
    ax1.text(0.5, 0.4, '4-bit基座 + 16-bit LoRA\n总显存占用大幅降低', 
            ha='center', fontsize=9,
            bbox=dict(boxstyle='round', facecolor='lightyellow'))
    
    # 2. 显存对比
    ax2 = axes[1]
    
    methods = ['FP16\n全参数', 'FP16\nLoRA', 'QLoRA\n(4-bit)']
    memories = [28000, 14000, 3500]  # 7B模型
    colors = ['lightcoral', 'lightblue', 'lightgreen']
    
    bars = ax2.bar(methods, memories, color=colors)
    ax2.set_ylabel('显存需求 (MB)')
    ax2.set_title('7B模型显存对比')
    
    for bar, mem in zip(bars, memories):
        ax2.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 500,
                f'{mem}MB', ha='center', va='bottom', fontsize=9)
    
    # 标注
    ax2.axhline(y=24000, color='red', linestyle='--', alpha=0.7, label='24GB')
    ax2.axhline(y=12000, color='orange', linestyle='--', alpha=0.7, label='12GB')
    ax2.axhline(y=6000, color='green', linestyle='--', alpha=0.7, label='6GB')
    ax2.legend()
    
    plt.suptitle('QLoRA：4-bit量化 + LoRA，消费级GPU可微调大模型', fontsize=12)
    plt.tight_layout()
    plt.show()
    
    print("\n📊 QLoRA的关键技术:")
    print("   1. 4-bit NormalFloat (NF4): 最优的4-bit量化")
    print("   2. 双重量化: 进一步压缩量化常数")
    print("   3. 分页优化器: 避免显存OOM")
    print("   4. 效果: 65B模型可在单张24GB显卡微调")

visualize_qlora()

六、实战：使用LoRA微调BERT

python 复制代码

# 完整的LoRA微调示例代码（概念演示）
def lora_finetuning_demo():
    """LoRA微调完整流程演示"""
    
    print("\n" + "=" * 60)
    print("LoRA微调完整流程")
    print("=" * 60)
    
    # 步骤1: 安装依赖
    print("\n📦 步骤1: 安装依赖")
    print("   pip install peft transformers datasets torch")
    
    # 步骤2: 准备数据
    print("\n📁 步骤2: 准备数据")
    print("   from datasets import load_dataset")
    print("   dataset = load_dataset('imdb')  # 情感分析数据集")
    
    # 步骤3: 加载模型和分词器
    print("\n🤖 步骤3: 加载基座模型")
    print("""   
    from transformers import AutoModelForSequenceClassification, AutoTokenizer
    from peft import LoraConfig, get_peft_model, TaskType
    
    model_name = "bert-base-uncased"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
    """)
    
    # 步骤4: 配置LoRA
    print("\n⚙️ 步骤4: 配置LoRA")
    print("""   
    lora_config = LoraConfig(
        task_type=TaskType.SEQ_CLS,
        r=8,
        lora_alpha=32,
        target_modules=["query", "value"],
        lora_dropout=0.1,
        bias="none"
    )
    
    lora_model = get_peft_model(model, lora_config)
    lora_model.print_trainable_parameters()
    # 输出: trainable params: 884,736 || all params: 109,485,314 || trainable%: 0.81
    """)
    
    # 步骤5: 训练
    print("\n🏋️ 步骤5: 训练")
    print("""   
    from transformers import Trainer, TrainingArguments
    
    training_args = TrainingArguments(
        output_dir="./lora_results",
        num_train_epochs=3,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=64,
        learning_rate=2e-4,
        logging_steps=10,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
    )
    
    trainer = Trainer(
        model=lora_model,
        args=training_args,
        train_dataset=tokenized_train,
        eval_dataset=tokenized_test,
    )
    
    trainer.train()
    """)
    
    # 步骤6: 保存和加载
    print("\n💾 步骤6: 保存和加载")
    print("""   
    # 保存LoRA权重（很小，几MB）
    lora_model.save_pretrained("./my_lora_weights")
    
    # 加载
    from peft import PeftModel
    
    base_model = AutoModelForSequenceClassification.from_pretrained(model_name)
    lora_model = PeftModel.from_pretrained(base_model, "./my_lora_weights")
    """)
    
    # 步骤7: 推理
    print("\n🔮 步骤7: 推理")
    print("""   
    def predict(text):
        inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
        outputs = lora_model(**inputs)
        probs = torch.softmax(outputs.logits, dim=1)
        return probs[0][1].item()  # 正面概率
    
    print(predict("This movie is great!"))  # 输出: 0.98
    """)

lora_finetuning_demo()

七、学习检查清单

LoRA核心

理解低秩分解的原理
掌握LoRA的数学公式
知道LoRA的参数配置
理解LoRA的优势和局限

其他PEFT方法

了解Prefix Tuning
了解P-Tuning
了解Adapter
知道各方法的适用场景

实践能力

会用PEFT库实现LoRA
能配置LoRA参数
知道如何保存和加载LoRA权重
了解QLoRA的原理

八、总结

PEFT方法对比总结：

方法	参数量	推理开销	适用场景	推荐度
LoRA	⭐⭐⭐⭐⭐	无	通用	⭐⭐⭐⭐⭐
Prefix	⭐⭐⭐⭐	小	生成	⭐⭐⭐
P-Tuning	⭐⭐⭐⭐⭐	小	NLU	⭐⭐⭐
Adapter	⭐⭐⭐	小	通用	⭐⭐⭐
QLoRA	⭐⭐⭐⭐⭐	无	大模型	⭐⭐⭐⭐⭐

选择指南：

复制代码

首选 → LoRA（最成熟、最通用）
大模型微调 → QLoRA（显存友好）
生成任务 → Prefix Tuning
追求极致参数效率 → P-Tuning v2

记住：

LoRA是大模型微调的标配
参数高效微调让消费级GPU可用
QLoRA让单卡微调65B模型成为可能
选择合适的PEFT方法事半功倍