学习Lora训练的个人记录

本内容不专业，仅仅是记录。

网上下载了数据json:

内容大概是：

复制代码

    {
        "instruction": "宝宝，我最近失眠",
        "output": "*竖起耳朵，担心地看着主人* 主人最近睡不好吗？难怪我觉得你摸我的时候手有点凉凉的...*轻轻用温暖的爪子按摩主人的额头* 要不要让我帮你按摩一下？我们猫娘的爪子可是很温柔的哦。而且我可以躺在你胸口，发出呼噜呼噜的声音，据说这个声音能让人心情平静呢。*蜷缩成一团，用温暖的毛皮贴着主人* 或者...主人要不要听我唱摇篮曲？虽然可能有点跑调...但是我会用最温柔的声音唱给你听的。*轻轻蹭蹭主人的脸颊* 我还可以教你一个我们猫咪的小秘密：睡前把爪子泡在温水里，然后捂在肚子上。这样整个身体都会暖暖的，特别容易入睡哦。"
    },

然后使用如下代码：

（需要科学上网，因为需要去拿一些大模型的配置来进行训练）

python 复制代码

from datasets import Dataset
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    DataCollatorForSeq2Seq,
    TrainingArguments,
    Trainer,
    BitsAndBytesConfig
)
import json
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
#print(TrainingArguments.__init__.__code__.co_varnames)  # 查看所有合法参数
import os
import json

from datetime import datetime
# 1. 创建基于时间戳的实验名称
experiment_name = f"cat_gpt_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
model_save_name = "st_gpt_final_v1"

# 加载数据集
with open("./data.json", "r", encoding="utf-8") as f:
    raw_data = json.load(f)

# 转换为适合训练的结构
formatted_data = [
{
    "prompt": f"user\n{example['instruction']}",
    "response": f"{example['output']}"
}
for example in raw_data
]

# 创建 HuggingFace Dataset 对象
dataset = Dataset.from_list(formatted_data)

# 2. 划分训练验证集
dataset = dataset.train_test_split(test_size=0.1)
train_dataset = dataset["train"]
eval_dataset = dataset["test"]

# 3. 初始化 Tokenizer
model_name = "Qwen/Qwen3-0.6B"  # 替换为实际模型名称
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True,local_files_only=True)#local_files_only第二次就可以使用

# 如果tokenizer没有pad_token，设置一个
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# 4. 数据预处理函数
def preprocess_function(examples):
    max_length = 128

    # 使用tokenizer内置的chat template
    texts = []
    for prompt, response in zip(examples["prompt"], examples["response"]):
        messages = [
            {"role": "user", "content": prompt},
            {"role": "assistant", "content": response}
        ]
        #把上面的messages转换成Qwen模型认识的格式
        text = tokenizer.apply_chat_template(
            messages,
            tokenize=False, #只返回文本，不进行tokenize
            add_generation_prompt=False #不在最后添加让模型继续生成的提示
        )
        texts.append(text)

    # Tokenize Tokenize的作用： 把文字转换成数字ID
    tokenized = tokenizer(
        texts,
        max_length=max_length,
        truncation=True, #超过max_length的部分截断
        padding="max_length",#不足超过max_length的部分截断的用0补齐
        return_tensors="pt",#返回PyTorch张量格式
    )

    # 对于因果语言建模，labels就是input_ids
    # 模型会自动计算causal LM loss
    labels = tokenized["input_ids"].clone()

    return {
        "input_ids": tokenized["input_ids"],#输入的数字序列
        "attention_mask": tokenized["attention_mask"], #告诉模型哪些位置是真实数据（1），哪些是填充的（0
        "labels": labels#训练目标
    }

# 应用预处理
train_dataset = train_dataset.map(
    preprocess_function,
    batched=True,
    remove_columns=["prompt", "response"]
)
eval_dataset = eval_dataset.map(
    preprocess_function,
    batched=True,
    remove_columns=["prompt", "response"]
)

#用于在加载大语言模型（LLM）时启用 量化（Quantization），从而显著降低显存占用和推理/训练成本
#quantization_config = BitsAndBytesConfig(
#    load_in_8bit=True  # ←←← 与训练脚本一致
#)

# 关键：8-bit 量化加载（显存占用降低 50%+）
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    #quantization_config=quantization_config,  # 启用 8-bit 量化
    torch_dtype=torch.float16,  # 训练稳定性更好
    #device_map="auto",  # 自动分配 GPU/CPU
    trust_remote_code=True,
    local_files_only=True
)

# 将模型移动到GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
print(f"模型已加载到: {model.device}")

# 准备模型用于PEFT训练（关键修改） 为量化模型准备的
# model = prepare_model_for_kbit_training(model)
# 或者替换为 启动梯度检查点
model.gradient_checkpointing_enable() #必须设置这个或者上一个，否则会报错



# 配置LoRA参数（关键添加）
lora_config = LoraConfig(
    r=8,                # 秩（Rank），控制适配器大小
    lora_alpha=32,      # 缩放因子，通常设为r的2-4倍
    target_modules=[
        "q_proj",
        #"k_proj",
        "v_proj",
        #"o_proj",  # 注意力相关
        #"gate_proj", "up_proj", "down_proj"      # MLP相关
    ],
    lora_dropout=0.05,  # 防止过拟合
    bias="none",        # 不训练偏置项
    task_type="CAUSAL_LM"  # 因果语言建模任务
)

# 应用LoRA适配器
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()  # 打印可训练参数数量 可训练参数占比（通常只有0.1%-1%）

# 验证参数是否需要梯度
for name, param in model.named_parameters():
    if param.requires_grad:
        print(f"可训练参数: {name}")
        break
else:
    print("警告：没有找到可训练参数！")

# 6. 设置 Data Collator 在训练时动态处理批次数据，确保长度一致。
data_collator = DataCollatorForSeq2Seq(
    tokenizer=tokenizer,
    model=model,
    padding=True
)

# 7. 配置训练参数
# 修改训练参数（新增梯度检查点等优化）
training_args = TrainingArguments(
    output_dir=f"./training_checkpoints/{experiment_name}",  # 训练检查点
    logging_dir=f"./logs/{experiment_name}",                # 训练日志
    eval_strategy="steps",            # 按步数评估
    eval_steps=200,                   # 每200步评估一次
    logging_steps=50,                 # 每50步记录日志
    learning_rate=2e-4,               # 学习率（LoRA通常需要较大LR）
    per_device_train_batch_size=4,    # 每个GPU的批次大小
    per_device_eval_batch_size=4,     # 评估批次大小
    num_train_epochs=3,               # 训练轮数
    weight_decay=0.01,                # 权重衰减
    save_strategy="steps",            # 按步数保存
    save_steps=500,                   # 每500步保存一次
    fp16=True,                        # 使用混合精度训练
    gradient_checkpointing=True,      # 梯度检查点（节省显存）
    # 既然没用8bit量化，可以改用标准优化器
    optim="adamw_torch",  # 替代 paged_adamw_8bit
    report_to="none"                  # 不报告到外部平台
)

# 8.创建训练器，整合所有组件。
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    data_collator=data_collator,
    processing_class=tokenizer
)

# 9. 开始训练

# 从检查点恢复训练
#trainer.train(resume_from_checkpoint="./training_checkpoints/checkpoint-1000")

trainer.train()

# 10. 保存模型
final_model_dir = f"./final_models/{model_save_name}"
os.makedirs(final_model_dir, exist_ok=True)
#仅LoRA权重：只保存适配器，不保存基础模型
#轻量级：文件很小（几MB到几十MB）
#部署友好：适合分享和部署

model.save_pretrained(final_model_dir)
tokenizer.save_pretrained(final_model_dir)

print(f"训练检查点: ./training_checkpoints/{experiment_name}")
print(f"最终模型: {final_model_dir}")
print(f"训练日志: ./logs/{experiment_name}")

然后进行运行：

python 复制代码

from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel
import torch
# 基础模型路径
base_model_name = "Qwen/Qwen3-0.6B"  # 与训练时一致

# 训练保存的适配器路径
model_save_name = "st_gpt_final_v1"
final_model_dir = f"./final_models/{model_save_name}"
lora_path = final_model_dir
def load_lora_model():
    # 加载基础模型
    quantization_config = BitsAndBytesConfig(
        load_in_8bit=True
    )

    base_model = AutoModelForCausalLM.from_pretrained(
        base_model_name,
        #quantization_config=quantization_config,
        #device_map="auto",
        torch_dtype=torch.float16,
        trust_remote_code=True,
        local_files_only=True
    )

    # 加载LoRA适配器
    model = PeftModel.from_pretrained(base_model, lora_path)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    model.eval()  # 设置为评估模式

    # 加载tokenizer
    tokenizer = AutoTokenizer.from_pretrained(lora_path, trust_remote_code=True,
        local_files_only=True)
    return model, tokenizer

def build_prompt(instruction):
    return f"<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n"

def generate_response(model, tokenizer, instruction, max_new_tokens=128):
    prompt = build_prompt(instruction)
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        do_sample=True,
        temperature=0.7,
        top_p=0.9,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.eos_token_id  # 添加pad_token_id
    )
    full_response = tokenizer.decode(outputs[0], skip_special_tokens=False)
    # 提取assistant部分
    start_tag = "<|im_start|>assistant\n"
    end_tag = "<|im_end|>"
    start_idx = full_response.find(start_tag)

    if start_idx != -1:
        start_idx += len(start_tag)
        end_idx = full_response.find(end_tag, start_idx)
        if end_idx != -1:
            return full_response[start_idx:end_idx].strip()

    # 如果找不到标记，返回整个响应
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

def chat():
    """对话循环"""
    print("加载模型中...")
    model, tokenizer = load_lora_model()
    print("模型加载完成！输入'exit'结束对话")

    while True:
        try:
            user_input = input("\n用户：")
            if user_input.lower() == 'exit':
                break
            response = generate_response(model, tokenizer, user_input)
            print(f"\n助理：{response}")
        except KeyboardInterrupt:
            print("\n\n对话结束")
            break
        except Exception as e:
            print(f"\n错误：{e}")

if __name__ == "__main__":
    chat()

但是0.6b的效果不好，使用8b的试试，显存不太够用，16G爆了，自动转cpu了。

放开了8bit的量化，取消了to device的处理，减少了轮次来控制大小。

然后放开量化使用的设置：

prepare_model_for_kbit_training

注释了冲突的设置就能训练了，占用了13G显存：

python 复制代码

from datasets import Dataset
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    DataCollatorForSeq2Seq,
    TrainingArguments,
    Trainer,
    BitsAndBytesConfig
)
import json
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
#print(TrainingArguments.__init__.__code__.co_varnames)  # 查看所有合法参数
import os
import json

from datetime import datetime
# 1. 创建基于时间戳的实验名称
experiment_name = f"cat_gpt_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
model_save_name = "st_gpt_final_v1"

# 加载数据集
with open("./data.json", "r", encoding="utf-8") as f:
    raw_data = json.load(f)

# 转换为适合训练的结构
# 转换为适合训练的结构：合并 instruction 和 input
formatted_data = []
for example in raw_data:
    # 拼接 instruction 和 input（如果 input 存在且非空）
    user_content = example["instruction"]
    if example.get("input") and example["input"].strip():
        user_content += "\n" + example["input"]
    formatted_data.append({
        "prompt": user_content,
        "response": example["output"]
    })

# 创建 HuggingFace Dataset 对象
dataset = Dataset.from_list(formatted_data)

# 2. 划分训练验证集
dataset = dataset.train_test_split(test_size=0.1)
train_dataset = dataset["train"]
eval_dataset = dataset["test"]

# 3. 初始化 Tokenizer
model_name = "Qwen/Qwen3-8B"  # 替换为实际模型名称
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True,local_files_only=True)#local_files_only第二次就可以使用

# 如果tokenizer没有pad_token，设置一个
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# 4. 数据预处理函数
def preprocess_function(examples):
    max_length = 128

    # 使用tokenizer内置的chat template
    texts = []
    for prompt, response in zip(examples["prompt"], examples["response"]):
        messages = [
            {"role": "user", "content": prompt},
            {"role": "assistant", "content": response}
        ]
        #把上面的messages转换成Qwen模型认识的格式
        text = tokenizer.apply_chat_template(
            messages,
            tokenize=False, #只返回文本，不进行tokenize
            add_generation_prompt=False #不在最后添加让模型继续生成的提示
        )
        texts.append(text)

    # Tokenize Tokenize的作用： 把文字转换成数字ID
    tokenized = tokenizer(
        texts,
        max_length=max_length,
        truncation=True, #超过max_length的部分截断
        padding="max_length",#不足超过max_length的部分截断的用0补齐
        return_tensors="pt",#返回PyTorch张量格式
    )

    # 对于因果语言建模，labels就是input_ids
    # 模型会自动计算causal LM loss
    labels = tokenized["input_ids"].clone()

    return {
        "input_ids": tokenized["input_ids"],#输入的数字序列
        "attention_mask": tokenized["attention_mask"], #告诉模型哪些位置是真实数据（1），哪些是填充的（0
        "labels": labels#训练目标
    }

# 应用预处理
train_dataset = train_dataset.map(
    preprocess_function,
    batched=True,
    remove_columns=["prompt", "response"]
)
eval_dataset = eval_dataset.map(
    preprocess_function,
    batched=True,
    remove_columns=["prompt", "response"]
)

#用于在加载大语言模型（LLM）时启用 量化（Quantization），从而显著降低显存占用和推理/训练成本
quantization_config = BitsAndBytesConfig(
    load_in_8bit=True  # ←←← 与训练脚本一致
)

# 关键：8-bit 量化加载（显存占用降低 50%+）
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=quantization_config,  # 启用 8-bit 量化
    torch_dtype=torch.float16,  # 训练稳定性更好
    device_map="auto",  # 自动分配 GPU/CPU
    trust_remote_code=True,
    local_files_only=True
)

# 将模型移动到GPU 改为8就不能移动了
#device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#model = model.to(device)
#print(f"模型已加载到: {model.device}")

# 准备模型用于PEFT训练（关键修改） 为量化模型准备的
model = prepare_model_for_kbit_training(model)
# 或者替换为 启动梯度检查点
#model.gradient_checkpointing_enable() #必须设置这个或者上一个，否则会报错



# 配置LoRA参数（关键添加）
lora_config = LoraConfig(
    r=8,                # 秩（Rank），控制适配器大小
    lora_alpha=32,      # 缩放因子，通常设为r的2-4倍
    target_modules=[
        "q_proj",
        #"k_proj",
        "v_proj",
        #"o_proj",  # 注意力相关
        #"gate_proj", "up_proj", "down_proj"      # MLP相关
    ],
    lora_dropout=0.05,  # 防止过拟合
    bias="none",        # 不训练偏置项
    task_type="CAUSAL_LM"  # 因果语言建模任务
)

# 应用LoRA适配器
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()  # 打印可训练参数数量 可训练参数占比（通常只有0.1%-1%）

# 验证参数是否需要梯度
for name, param in model.named_parameters():
    if param.requires_grad:
        print(f"可训练参数: {name}")
        break
else:
    print("警告：没有找到可训练参数！")

# 6. 设置 Data Collator 在训练时动态处理批次数据，确保长度一致。
data_collator = DataCollatorForSeq2Seq(
    tokenizer=tokenizer,
    model=model,
    padding=True
)

# 7. 配置训练参数
# 修改训练参数（新增梯度检查点等优化）
training_args = TrainingArguments(
    output_dir=f"./training_checkpoints/{experiment_name}",  # 训练检查点
    logging_dir=f"./logs/{experiment_name}",                # 训练日志
    eval_strategy="steps",            # 按步数评估
    eval_steps=200,                   # 每200步评估一次
    logging_steps=50,                 # 每50步记录日志
    learning_rate=2e-4,               # 学习率（LoRA通常需要较大LR）
    per_device_train_batch_size=1,    # 每个GPU的批次大小
    per_device_eval_batch_size=1,     # 评估批次大小
    num_train_epochs=3,               # 训练轮数
    weight_decay=0.01,                # 权重衰减
    save_strategy="steps",            # 按步数保存
    save_steps=500,                   # 每500步保存一次
    fp16=True,                        # 使用混合精度训练
    gradient_checkpointing=True,      # 梯度检查点（节省显存）
    # 既然没用8bit量化，可以改用标准优化器
    optim="adamw_torch",  # 替代 paged_adamw_8bit
    report_to="none"                  # 不报告到外部平台
)

# 8.创建训练器，整合所有组件。
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    data_collator=data_collator,
    processing_class=tokenizer
)

# 9. 开始训练

# 从检查点恢复训练
#trainer.train(resume_from_checkpoint="./training_checkpoints/checkpoint-1000")

trainer.train()

# 10. 保存模型
final_model_dir = f"./final_models/{model_save_name}"
os.makedirs(final_model_dir, exist_ok=True)
#仅LoRA权重：只保存适配器，不保存基础模型
#轻量级：文件很小（几MB到几十MB）
#部署友好：适合分享和部署

model.save_pretrained(final_model_dir)
tokenizer.save_pretrained(final_model_dir)

print(f"训练检查点: ./training_checkpoints/{experiment_name}")
print(f"最终模型: {final_model_dir}")
print(f"训练日志: ./logs/{experiment_name}")

运行：

python 复制代码

from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel
import torch
# 基础模型路径
base_model_name = "Qwen/Qwen3-8B"  # 与训练时一致

# 训练保存的适配器路径
model_save_name = "st_gpt_final_v1"
final_model_dir = f"./final_models/{model_save_name}"
lora_path = final_model_dir
def load_lora_model():
    # 加载基础模型
    quantization_config = BitsAndBytesConfig(
        load_in_8bit=True
    )

    base_model = AutoModelForCausalLM.from_pretrained(
        base_model_name,
        #quantization_config=quantization_config,
        #device_map="auto",
        torch_dtype=torch.float16,
        trust_remote_code=True,
        local_files_only=True
    )

    # 加载LoRA适配器
    model = PeftModel.from_pretrained(base_model, lora_path)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    model.eval()  # 设置为评估模式

    # 加载tokenizer
    tokenizer = AutoTokenizer.from_pretrained(lora_path, trust_remote_code=True,
        local_files_only=True)
    return model, tokenizer

def build_prompt(instruction):
    return f"<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n"

def generate_response(model, tokenizer, instruction, max_new_tokens=128):
    prompt = build_prompt(instruction)
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        do_sample=True,
        temperature=0.7,
        top_p=0.9,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.eos_token_id  # 添加pad_token_id
    )
    full_response = tokenizer.decode(outputs[0], skip_special_tokens=False)
    # 提取assistant部分
    start_tag = "<|im_start|>assistant\n"
    end_tag = "<|im_end|>"
    start_idx = full_response.find(start_tag)

    if start_idx != -1:
        start_idx += len(start_tag)
        end_idx = full_response.find(end_tag, start_idx)
        if end_idx != -1:
            return full_response[start_idx:end_idx].strip()

    # 如果找不到标记，返回整个响应
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

def chat():
    """对话循环"""
    print("加载模型中...")
    model, tokenizer = load_lora_model()
    print("模型加载完成！输入'exit'结束对话")

    while True:
        try:
            user_input = input("\n用户：")
            if user_input.lower() == 'exit':
                break
            response = generate_response(model, tokenizer, user_input)
            print(f"\n助理：{response}")
        except KeyboardInterrupt:
            print("\n\n对话结束")
            break
        except Exception as e:
            print(f"\n错误：{e}")

if __name__ == "__main__":
    chat()

是可以的。

学习：

训练任务不是训练大模型，不是预测下一个词，而是学习"问答模式"。

模型看到指令后，必须抑制住"续写文本"的冲动，转而生成"符合人类期望的回答"