目的
为避免一学就会、一用就废,这里做下笔记
说明
- 本文内容紧承前文-模型微调1-基础理论,欲渐进,请循序
- 前面学完了6种不同的微调方法,这里介绍下多种方法中都适用的进阶操作:
- 自定义模型适配(自己创建的模型,也能二次微调)
- 多适配器加载与切换(多任务的适配器切换)
- 禁用适配器(切换回模型原本的通用能力)
- 模型的融合(包括参数的合并、适配器与基准模型的合并)
实战代码(Jupyter)
1、自定义模型适配
python
import torch
from torch import nn
from peft import LoraConfig, get_peft_model, PeftModel
python
# 创建自定义模型
net = nn.Sequential(
nn.Linear(10, 10),
nn.ReLU(),
nn.Linear(10, 2)
)
net
Sequential(
(0): Linear(in_features=10, out_features=10, bias=True)
(1): ReLU()
(2): Linear(in_features=10, out_features=2, bias=True)
)
python
# 打印模型参数
for name, param in net.named_parameters():
print(name)
0.weight
0.bias
2.weight
2.bias
python
# 微调配置,指定针对编号为为0的层进行适配
config = LoraConfig(target_modules=["0"])
config
LoraConfig(task_type=None, peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, peft_version='0.18.1', base_model_name_or_path=None, revision=None, inference_mode=False, r=8, target_modules={'0'}, exclude_modules=None, lora_alpha=8, lora_dropout=0.0, fan_in_fan_out=False, bias='none', use_rslora=False, modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None, rank_pattern={}, alpha_pattern={}, megatron_config=None, megatron_core='megatron.core', trainable_token_indices=None, loftq_config={}, eva_config=None, corda_config=None, use_dora=False, alora_invocation_tokens=None, use_qalora=False, qalora_group_size=16, layer_replication=None, runtime_config=LoraRuntimeConfig(ephemeral_gpu_offload=False), lora_bias=False, target_parameters=None, arrow_config=None, ensure_weight_tying=False)
python
# 包装成peft模型
peft_model = get_peft_model(net, config)
peft_model
PeftModel(
(base_model): LoraModel(
(model): Sequential(
(0): lora.Linear(
(base_layer): Linear(in_features=10, out_features=10, bias=True)
(lora_dropout): ModuleDict(
(default): Identity()
)
(lora_A): ModuleDict(
(default): Linear(in_features=10, out_features=8, bias=False)
)
(lora_B): ModuleDict(
(default): Linear(in_features=8, out_features=10, bias=False)
)
(lora_embedding_A): ParameterDict()
(lora_embedding_B): ParameterDict()
(lora_magnitude_vector): ModuleDict()
)
(1): ReLU()
(2): Linear(in_features=10, out_features=2, bias=True)
)
)
)
2、多适配器加载与切换
python
net = nn.Sequential(
nn.Linear(10, 10),
nn.ReLU(),
nn.Linear(10, 2)
)
net
Sequential(
(0): Linear(in_features=10, out_features=10, bias=True)
(1): ReLU()
(2): Linear(in_features=10, out_features=2, bias=True)
)
python
config = LoraConfig(target_modules=["0"])
config
LoraConfig(task_type=None, peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, peft_version='0.18.1', base_model_name_or_path=None, revision=None, inference_mode=False, r=8, target_modules={'0'}, exclude_modules=None, lora_alpha=8, lora_dropout=0.0, fan_in_fan_out=False, bias='none', use_rslora=False, modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None, rank_pattern={}, alpha_pattern={}, megatron_config=None, megatron_core='megatron.core', trainable_token_indices=None, loftq_config={}, eva_config=None, corda_config=None, use_dora=False, alora_invocation_tokens=None, use_qalora=False, qalora_group_size=16, layer_replication=None, runtime_config=LoraRuntimeConfig(ephemeral_gpu_offload=False), lora_bias=False, target_parameters=None, arrow_config=None, ensure_weight_tying=False)
python
peft_model = get_peft_model(net, config)
python
# 假设训练完之后要去进行保存
peft_model.save_pretrained("./loraA")
python
net = nn.Sequential(
nn.Linear(10, 10),
nn.ReLU(),
nn.Linear(10, 2)
)
net
Sequential(
(0): Linear(in_features=10, out_features=10, bias=True)
(1): ReLU()
(2): Linear(in_features=10, out_features=2, bias=True)
)
python
config2 = LoraConfig(target_modules=["2"])
config
LoraConfig(task_type=None, peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, peft_version='0.18.1', base_model_name_or_path=None, revision=None, inference_mode=False, r=8, target_modules={'0'}, exclude_modules=None, lora_alpha=8, lora_dropout=0.0, fan_in_fan_out=False, bias='none', use_rslora=False, modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None, rank_pattern={}, alpha_pattern={}, megatron_config=None, megatron_core='megatron.core', trainable_token_indices=None, loftq_config={}, eva_config=None, corda_config=None, use_dora=False, alora_invocation_tokens=None, use_qalora=False, qalora_group_size=16, layer_replication=None, runtime_config=LoraRuntimeConfig(ephemeral_gpu_offload=False), lora_bias=False, target_parameters=None, arrow_config=None, ensure_weight_tying=False)
python
peft_model2 = get_peft_model(net, config2)
python
# 假设训练完之后要去进行保存
peft_model2.save_pretrained("./loraB")
python
net = nn.Sequential(
nn.Linear(10, 10),
nn.ReLU(),
nn.Linear(10, 2)
)
net
Sequential(
(0): Linear(in_features=10, out_features=10, bias=True)
(1): ReLU()
(2): Linear(in_features=10, out_features=2, bias=True)
)
python
model_loaded = PeftModel.from_pretrained(net, model_id="./loraA", adapter_name="LoraA")
model_loaded
PeftModel(
(base_model): LoraModel(
(model): Sequential(
(0): lora.Linear(
(base_layer): Linear(in_features=10, out_features=10, bias=True)
(lora_dropout): ModuleDict(
(LoraA): Identity()
)
(lora_A): ModuleDict(
(LoraA): Linear(in_features=10, out_features=8, bias=False)
)
(lora_B): ModuleDict(
(LoraA): Linear(in_features=8, out_features=10, bias=False)
)
(lora_embedding_A): ParameterDict()
(lora_embedding_B): ParameterDict()
(lora_magnitude_vector): ModuleDict()
)
(1): ReLU()
(2): Linear(in_features=10, out_features=2, bias=True)
)
)
)
python
model_loaded.load_adapter("./loraB", adapter_name="LoraB")
model_loaded
PeftModel(
(base_model): LoraModel(
(model): Sequential(
(0): lora.Linear(
(base_layer): Linear(in_features=10, out_features=10, bias=True)
(lora_dropout): ModuleDict(
(LoraA): Identity()
)
(lora_A): ModuleDict(
(LoraA): Linear(in_features=10, out_features=8, bias=False)
)
(lora_B): ModuleDict(
(LoraA): Linear(in_features=8, out_features=10, bias=False)
)
(lora_embedding_A): ParameterDict()
(lora_embedding_B): ParameterDict()
(lora_magnitude_vector): ModuleDict()
)
(1): ReLU()
(2): lora.Linear(
(base_layer): Linear(in_features=10, out_features=2, bias=True)
(lora_dropout): ModuleDict(
(LoraB): Identity()
)
(lora_A): ModuleDict(
(LoraB): Linear(in_features=10, out_features=8, bias=False)
)
(lora_B): ModuleDict(
(LoraB): Linear(in_features=8, out_features=2, bias=False)
)
(lora_embedding_A): ParameterDict()
(lora_embedding_B): ParameterDict()
(lora_magnitude_vector): ModuleDict()
)
)
)
)
python
# 当前启用的只有一个适配器,两个适配器需要切换使用而不是同时启用
model_loaded.active_adapter
'LoraA'
python
# 测试加载的PEFT模型是否能正常工作,通过提供一个人工构造的简单输入来验证模型的前向传播功能
model_loaded(torch.arange(0, 10).view(1, 10).float())
tensor([[ 0.4449, -2.2265]])
python
for name, param in model_loaded.named_parameters():
print(name, param)
base_model.model.0.base_layer.weight Parameter containing:
tensor([[-0.1588, 0.2752, -0.1211, -0.1935, -0.0534, 0.1522, 0.1751, 0.0726,
0.0654, 0.1336],
[-0.1522, -0.2216, 0.1408, -0.2012, -0.2923, 0.0492, 0.0941, 0.2587,
0.0803, 0.1600],
[ 0.1929, 0.3138, -0.1657, -0.2600, 0.2811, -0.1913, -0.2518, 0.1203,
-0.2471, 0.1717],
[-0.1881, -0.1632, 0.0110, -0.0272, -0.2551, 0.2584, -0.0484, 0.0257,
0.2736, -0.1886],
[ 0.2098, 0.1332, 0.1937, -0.2025, -0.0216, 0.2926, 0.1651, 0.1238,
0.1512, 0.0283],
[ 0.1667, 0.2422, -0.1701, -0.0426, 0.0591, 0.2006, -0.0754, 0.0757,
-0.2234, -0.0005],
[ 0.1603, -0.1021, 0.1488, -0.1513, 0.0851, -0.2836, 0.0340, -0.0369,
0.0491, -0.1348],
[-0.1903, -0.2202, -0.0668, -0.2711, 0.3153, -0.2939, 0.1590, -0.0337,
-0.1344, 0.2653],
[-0.2426, -0.0390, -0.0306, 0.1652, 0.1942, 0.0974, -0.1914, 0.2384,
0.1367, 0.2772],
[-0.0545, -0.2624, 0.1184, -0.2571, 0.0918, -0.1298, -0.2747, -0.0226,
0.1631, -0.2922]])
base_model.model.0.base_layer.bias Parameter containing:
tensor([-0.0312, -0.2931, -0.0387, 0.3002, 0.2812, -0.0856, 0.2703, -0.1790,
0.2715, -0.2011])
base_model.model.0.lora_A.LoraA.weight Parameter containing:
tensor([[ 2.1104e-01, 9.2989e-02, -2.0273e-01, -5.0939e-02, -2.7361e-02,
2.4165e-01, 1.1713e-01, 1.6662e-01, 1.6932e-02, -2.7329e-01],
[-6.6695e-02, 1.8758e-01, -3.4211e-02, -7.1618e-02, -1.6356e-01,
1.6244e-01, -1.7447e-01, -2.8571e-01, 2.9213e-01, 5.1914e-02],
[-7.2140e-02, -1.0561e-01, -7.5215e-02, 1.1768e-01, -6.0844e-03,
5.8701e-02, 7.2241e-02, 1.5369e-01, 1.3145e-01, 2.4837e-01],
[ 1.3417e-01, -1.5555e-01, -3.5348e-02, -2.9854e-01, 2.2981e-01,
-1.1707e-01, 2.2707e-01, -3.6874e-02, -3.0112e-01, 2.6405e-01],
[-2.7007e-01, -1.9705e-04, -1.5266e-01, -1.8104e-01, 9.4075e-02,
2.7261e-02, 1.3942e-01, -2.5735e-01, 1.9763e-01, -2.3715e-01],
[ 2.0395e-01, -1.7599e-01, -3.4541e-02, -6.9124e-02, -1.3147e-01,
-2.1249e-01, -2.5208e-01, 3.8740e-02, 4.3608e-02, 3.1246e-01],
[-2.3700e-01, 1.2804e-01, -7.9218e-04, -1.8738e-02, 1.0211e-01,
-4.8385e-03, 1.9376e-01, -1.4640e-01, -1.0300e-01, -1.6072e-02],
[-2.1022e-01, -1.5291e-01, 1.2801e-01, -7.4499e-02, -1.7619e-01,
8.5758e-02, 2.1931e-02, -1.1871e-01, 1.2398e-01, 7.6120e-02]])
base_model.model.0.lora_B.LoraA.weight Parameter containing:
tensor([[0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0.]])
base_model.model.2.base_layer.weight Parameter containing:
tensor([[-0.2550, 0.2741, 0.0927, 0.1097, 0.1821, -0.2754, -0.0572, 0.1614,
-0.0336, 0.0791],
[-0.1876, -0.3106, -0.0799, 0.1561, 0.0991, -0.2563, 0.3032, -0.1134,
-0.2010, 0.3143]])
base_model.model.2.base_layer.bias Parameter containing:
tensor([-0.3101, -0.1090])
base_model.model.2.lora_A.LoraB.weight Parameter containing:
tensor([[ 0.0294, -0.0403, 0.3084, -0.2337, 0.2660, 0.2608, -0.2412, 0.0918,
0.0465, 0.1814],
[ 0.2355, 0.0298, 0.2422, -0.0098, -0.2524, -0.2019, -0.1214, -0.1448,
0.1570, 0.0962],
[ 0.1007, 0.1727, -0.2776, 0.1225, -0.3049, -0.1696, 0.2583, 0.0150,
0.0819, 0.3162],
[ 0.0915, -0.1027, -0.1306, -0.1173, -0.1742, 0.0534, -0.1520, -0.1085,
0.1247, 0.2614],
[ 0.2219, -0.2623, 0.0299, -0.1905, 0.1224, 0.2686, 0.0037, -0.0494,
-0.0007, 0.2626],
[ 0.0542, -0.2540, 0.3101, 0.2258, -0.1486, -0.1404, 0.2641, 0.2413,
-0.0004, -0.0799],
[ 0.0701, -0.1045, -0.0033, -0.1111, -0.1975, -0.0614, 0.0315, 0.0308,
-0.3117, -0.2942],
[-0.2646, -0.0780, 0.1677, 0.1995, -0.2687, -0.1401, -0.2531, 0.2024,
-0.1899, -0.2917]])
base_model.model.2.lora_B.LoraB.weight Parameter containing:
tensor([[0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0.]])
python
# 手动调整模型参数
for name, param in model_loaded.named_parameters():
if name in ["base_model.model.0.lora_A.LoraA.weight", "base_model.model.0.lora_B.LoraA.weight"]:
param.data = torch.ones_like(param)
python
model_loaded(torch.arange(0, 10).view(1, 10).float())
tensor([[ 100.3435, -103.1971]])
python
model_loaded.set_adapter("LoraB")
python
model_loaded(torch.arange(0, 10).view(1, 10).float())
tensor([[ 0.4449, -2.2265]], grad_fn=<AddBackward0>)
3、禁用适配器, 获得原始模型输出
python
model_loaded.set_adapter("LoraA")
python
model_loaded.active_adapter
'LoraA'
python
model_loaded(torch.arange(0, 10).view(1, 10).float())
tensor([[ 100.3435, -103.1971]], grad_fn=<AddmmBackward0>)
python
with model_loaded.disable_adapter():
print(model_loaded(torch.arange(0, 10).view(1, 10).float()))
tensor([[ 0.4449, -2.2265]])
python
import warnings
warnings.filterwarnings('ignore')
python
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
python
model = AutoModelForCausalLM.from_pretrained("Langboat/bloom-1b4-zh", low_cpu_mem_usage=True)
tokenizer = AutoTokenizer.from_pretrained("Langboat/bloom-1b4-zh")
python
p_model = PeftModel.from_pretrained(model, model_id="./my-lora/")
p_model
PeftModelForCausalLM(
(base_model): LoraModel(
(model): BloomForCausalLM(
(transformer): BloomModel(
(word_embeddings): Embedding(46145, 2048)
(word_embeddings_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
(h): ModuleList(
(0-23): 24 x BloomBlock(
(input_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
(self_attention): BloomAttention(
(query_key_value): lora.Linear(
(base_layer): Linear(in_features=2048, out_features=6144, bias=True)
(lora_dropout): ModuleDict(
(default): Identity()
)
(lora_A): ModuleDict(
(default): Linear(in_features=2048, out_features=8, bias=False)
)
(lora_B): ModuleDict(
(default): Linear(in_features=8, out_features=6144, bias=False)
)
(lora_embedding_A): ParameterDict()
(lora_embedding_B): ParameterDict()
(lora_magnitude_vector): ModuleDict()
)
(dense): Linear(in_features=2048, out_features=2048, bias=True)
(attention_dropout): Dropout(p=0.0, inplace=False)
)
(post_attention_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
(mlp): BloomMLP(
(dense_h_to_4h): Linear(in_features=2048, out_features=8192, bias=True)
(gelu_impl): BloomGelu()
(dense_4h_to_h): Linear(in_features=8192, out_features=2048, bias=True)
)
)
)
(ln_f): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
)
(lm_head): Linear(in_features=2048, out_features=46145, bias=False)
)
)
)
python
# p_model = p_model.cuda()
ipt = tokenizer("Human: {}\n{}".format("如何提高学习效率?", "").strip() + "\n\nAssistant: ", return_tensors="pt").to(model.device)
# 把model输出的response结果再次转为文本
print(tokenizer.decode(model.generate(**ipt, max_length=256, do_sample=True)[0], skip_special_tokens=True))
Human: 如何提高学习效率?
Assistant: 如何提高学习效率?首先要学会合理分配时间。科学合理的安排学习时间非常重要,不仅可以提高学习效率,还可以避免在学习时感到不适、疲劳。
4、模型的融合
4.1、模型参数的合并
python
# 将模型中的适配器参数(如LoRA权重)与基础模型的参数进行融合,并卸载掉适配器相关的结构
merge_model = p_model.merge_and_unload()
merge_model
BloomForCausalLM(
(transformer): BloomModel(
(word_embeddings): Embedding(46145, 2048)
(word_embeddings_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
(h): ModuleList(
(0-23): 24 x BloomBlock(
(input_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
(self_attention): BloomAttention(
(query_key_value): Linear(in_features=2048, out_features=6144, bias=True)
(dense): Linear(in_features=2048, out_features=2048, bias=True)
(attention_dropout): Dropout(p=0.0, inplace=False)
)
(post_attention_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
(mlp): BloomMLP(
(dense_h_to_4h): Linear(in_features=2048, out_features=8192, bias=True)
(gelu_impl): BloomGelu()
(dense_4h_to_h): Linear(in_features=8192, out_features=2048, bias=True)
)
)
)
(ln_f): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
)
(lm_head): Linear(in_features=2048, out_features=46145, bias=False)
)
python
# merge_model = merge_model.cuda()
ipt = tokenizer("Human: {}\n{}".format("如何提高学习效率?", "").strip() + "\n\nAssistant: ", return_tensors="pt").to(model.device)
# 把model输出的response结果再次转为文本
print(tokenizer.decode(model.generate(**ipt, max_length=256, do_sample=True)[0], skip_special_tokens=True))
Human: 如何提高学习效率?
Assistant: 效率的高低取决于许多条件,具体包括:个人因素(如学习动机、注意力)、学习方法和学习环境。不过,以下推荐几种提高学习效率的方法:
① 设定目标:大多数人都不会事事达到100%,但在学习上,我们必须设定目标,如每次学习的时间、学习内容的难易程度、预估学习效果等。在制定目标的同时,我建议使用"小步计划法":
1. 计划完成当前任务的期限
2. 设定完成目标的百分比
3. 评估达到目标的程度
② 养成良好的学习习惯:学习是有规律的,我们应每天安排固定的学习时间。每天保持充足、安静的学习环境,合理分配时间,提高效率。此外,我建议每天给自己制定一个规划,如学习任务清单,记下每天的学习任务、完成时间。这样,我们可以养成良好的学习习惯,保持充足的学习动力。
4.2、 保存融合之后的模型
python
# 这样存储的方式保存的模型就是和原始模型大小一样的模型了, 不再是只保存 LoRA 部分
merge_model.save_pretrained("./chatbot/merge_model")