强化lora训练后的 用qwen训练的虚拟自己模型 这次挺好 数据总量300多条 加了十几条正常对话聊天记录

在300多条抽象qq空间数据 加了十几条正常聊天记录 现在像个人了

🎯 能力展示:

⚠️ 春节到了想说什么?

, ˇ_ˇ 春节到了 瞬间长大~

⚠️ 朋友分享感情经历怎么回应?

, =_= 每个感情经历都是一个里程碑~

⚠️ 晚安

晚安~

🤔 开始对话吗? (y/n): y

==================================================

💬 与虚拟的你对话

提示:

  • 直接问训练过的问题:得到准确回答
  • 简单日常聊天:得到有风格的回复
  • 复杂/专业问题:可能得到有趣但不准确的回答
  • 输入 quit 退出

你: 你是

🎭: , =_= 原来我是根棍~

你: 你女朋友去哪了

🎭: 她去上班了~

你: 你数学学的好不

🎭: , 哼 还能说文艺事呢~

你: 傻瓜

🎭: , 瞬间长大,脑袋里装着'傻子'~

你: 傻子

🎭: , 傻逼

你: 你骂人

🎭: 擦擦擦 你是个坏女孩~

在数据中加了十几条正常聊天对话

go 复制代码
{"conversations": [{"role": "user", "content": "破单位"}, {"role": "assistant", "content": "不是 我要是没辞职的话 是要去上班的"}]},
  {"conversations": [{"role": "user", "content": "挺好  跟家歇着"}, {"role": "assistant", "content": "住那 吵不 是不是没水了 喊你打水 不打水就打你"}]},
  {"conversations": [{"role": "user", "content": "就我们俩人   吵个屁"}, {"role": "assistant", "content": "半夜鸡叫 周扒皮"}]}
go 复制代码
# ========== Cell 1:安装依赖库 ==========
import sys
import subprocess

def install_packages():
    print("正在安装必要的库...")
    
    packages = [
        "transformers",
        "datasets", 
        "peft",
        "accelerate",
        "sentencepiece",
        "bitsandbytes",
        "scikit-learn",
        "einops"
    ]
    
    for package in packages:
        try:
            subprocess.check_call([
                sys.executable, "-m", "pip", "install", 
                package, "-q", "--no-deps"
            ])
            print(f"✅ 安装: {package}")
        except:
            print(f"⚠️  可能已安装: {package}")
    
    print("\n安装完成!")

install_packages()

# ========== Cell 2:环境准备和导入 ==========
import os
import json
import torch
import random
import numpy as np
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

# 设置随机种子以确保可重复性
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

# ========== Cell 3:设备检查 ==========
print("🔍 环境检查和GPU设置...")
print(f"PyTorch版本: {torch.__version__}")
print(f"CUDA是否可用: {torch.cuda.is_available()}")

if torch.cuda.is_available():
    print(f"可用GPU数量: {torch.cuda.device_count()}")
    for i in range(torch.cuda.device_count()):
        print(f"  GPU {i}: {torch.cuda.get_device_name(i)}")
    # 强制使用GPU 0
    torch.cuda.set_device(0)
    print(f"当前已设置为使用GPU: {torch.cuda.current_device()}")
else:
    print("使用CPU训练")

print("✅ 环境准备完成")

# ========== Cell 4:数据加载 ==========
print("\n📂 加载数据...")

DATA_PATH = "/kaggle/input/jsonoooho/qqjson.json"

def load_and_analyze_data(data_path):
    if not os.path.exists(data_path):
        print(f"❌ 数据文件不存在: {data_path}")
        for root, dirs, files in os.walk("/kaggle/input"):
            for file in files:
                if file.endswith('.json'):
                    print(f"找到: {os.path.join(root, file)}")
                    data_path = os.path.join(root, file)
                    break
        
    print(f"📄 加载数据: {data_path}")
    with open(data_path, 'r', encoding='utf-8') as f:
        raw_data = json.load(f)
    
    print(f"✅ 成功加载 {len(raw_data)} 条数据")
    
    if len(raw_data) == 0:
        raise ValueError("数据文件为空")
    
    print("\n📊 数据样例 (前2条):")
    for i in range(min(2, len(raw_data))):
        print(f"  [{i+1}] {raw_data[i]}")
    
    return raw_data

raw_data = load_and_analyze_data(DATA_PATH)

# ========== Cell 5:模型和Tokenizer配置 ==========
print("\n🤖 配置模型...")

MODEL_NAME = "Qwen/Qwen2.5-0.5B-Instruct"
print(f"选择模型: {MODEL_NAME}")

from transformers import AutoTokenizer, AutoModelForCausalLM

print("加载tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(
    MODEL_NAME,
    trust_remote_code=True,
    padding_side="right"
)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

print(f"✅ Tokenizer已加载")

# ========== Cell 6:数据预处理 ==========
print("\n🔄 处理训练数据...")

def prepare_training_data_from_conversations(raw_data, tokenizer):
    formatted_examples = []
    
    print(f"处理 {len(raw_data)} 条数据...")
    processed_count = 0
    
    for i, item in enumerate(tqdm(raw_data, desc="格式化数据")):
        try:
            if 'conversations' not in item:
                continue
            
            conversations = item['conversations']
            if len(conversations) < 2:
                continue
            
            if conversations[0]['role'] != 'user':
                if len(conversations) >= 2:
                    if conversations[0]['role'] == 'assistant' and conversations[1]['role'] == 'user':
                        conversations = [conversations[1], conversations[0]]
            
            formatted_text = tokenizer.apply_chat_template(
                conversations,
                tokenize=False,
                add_generation_prompt=False
            )
            
            formatted_examples.append({"text": formatted_text})
            processed_count += 1
            
        except Exception as e:
            continue
    
    print(f"📊 数据预处理完成: {processed_count} 条")
    return formatted_examples

formatted_data = prepare_training_data_from_conversations(raw_data, tokenizer)

from datasets import Dataset
dataset = Dataset.from_list(formatted_data)
print(f"✅ 数据集创建完成: {len(dataset)} 条样本")

def tokenize_function(examples, max_length=256):
    tokenized = tokenizer(
        examples["text"],
        truncation=True,
        padding="max_length",
        max_length=max_length,
        return_tensors=None
    )
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

print("应用Tokenization...")
tokenized_dataset = dataset.map(
    lambda x: tokenize_function(x, max_length=256),
    batched=True,
    batch_size=32,
    remove_columns=["text"]
)

split_dataset = tokenized_dataset.train_test_split(
    test_size=0.1,
    seed=42,
    shuffle=True
)

train_dataset = split_dataset["train"]
eval_dataset = split_dataset["test"]

print(f"📋 数据集分割:")
print(f"  🏋️ 训练集: {len(train_dataset)} 条")
print(f"  🧪 验证集: {len(eval_dataset)} 条")

# ========== Cell 7:配置强化LoRA和模型加载 ==========
print("\n🤖 配置强化版LoRA...")

from peft import LoraConfig, get_peft_model

# ★★★ 强化LoRA配置 ★★★
print("配置强化LoRA参数...")
lora_config = LoraConfig(
    task_type="CAUSAL_LM",
    r=32,                          # ★ 增加秩,提升表达能力
    lora_alpha=64,                 # ★ 对应缩放系数
    lora_dropout=0.1,
    target_modules=[
        # 基础注意力层
        "q_proj", "k_proj", "v_proj", "o_proj",
        # ★★★ 新增MLP层!(真正改进对话逻辑)
        "gate_proj", "up_proj", "down_proj",
        # ★★★ 新增lm_head!(直接改善输出质量)
        "lm_head"
    ],
    bias="lora_only",              
    inference_mode=False,
)

print(f"✨ 强化LoRA配置:")
print(f"  - 秩(r): {lora_config.r} (原来是8)")
print(f"  - 缩放参数: {lora_config.lora_alpha}")
print(f"  - 目标模块: {len(lora_config.target_modules)}个")

# 🚨 关键修改:强制使用GPU 0,避免跨设备错误
print("\n加载基础模型到GPU 0...")
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float16,
    device_map="cuda:0" if torch.cuda.is_available() else "auto",  # 强制GPU 0
    trust_remote_code=True,
    use_cache=False,
)

# 🚨 额外确保模型在当前GPU上
if torch.cuda.is_available():
    model = model.to("cuda")
    print(f"✅ 模型强制移动到: {model.device}")
else:
    print("✅ 使用CPU")

print(f"基础模型设备: {next(model.parameters()).device}")

# 应用强化LoRA适配器
print("\n应用强化LoRA适配器...")
model = get_peft_model(model, lora_config)

# ★★★ 打印更强的可训练参数
print("\n🔍 模型参数统计:")
model.print_trainable_parameters()

# 验证配置
print("\n🔍 模型配置验证:")
print(f"  是Peft模型: {hasattr(model, 'peft_config')}")
model.train()
print("✅ 模型已设置为训练模式")

# ========== Cell 8:训练参数配置 ==========
print("\n⚙️ 配置强化训练参数...")

from transformers import TrainingArguments
import math

num_train_samples = len(train_dataset)
print(f"训练样本数: {num_train_samples}")

# 批次大小配置
per_device_batch_size = 2
gradient_accumulation_steps = 4
effective_batch_size = per_device_batch_size * gradient_accumulation_steps

steps_per_epoch = math.ceil(num_train_samples / effective_batch_size)
total_epochs = 8  # ★ 增加到8轮配合强化LoRA
max_steps = steps_per_epoch * total_epochs

print(f"📊 训练计算:")
print(f"  - 每设备批次: {per_device_batch_size}")
print(f"  - 梯度累积步数: {gradient_accumulation_steps}")
print(f"  - 有效批次大小: {effective_batch_size}")
print(f"  - 每epoch步数: {steps_per_epoch}")
print(f"  - 总训练轮数: {total_epochs}")
print(f"  - 总训练步数: ~{max_steps}")

# ★★★ 配合强化LoRA的训练参数 ★★★
training_args = TrainingArguments(
    output_dir="./qwen_qq_enhanced",  # ★ 改名字区分
    
    # 训练循环
    num_train_epochs=total_epochs,
    per_device_train_batch_size=per_device_batch_size,
    per_device_eval_batch_size=per_device_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    
    # ★ 优化器设置(配合强化LoRA)
    learning_rate=2e-4,          # ★ 提高学习率
    weight_decay=0.01,
    adam_beta1=0.9,
    adam_beta2=0.999,
    adam_epsilon=1e-8,
    max_grad_norm=1.0,
    
    # 学习率调度
    lr_scheduler_type="cosine",
    warmup_ratio=0.1,
    
    # 评估和保存
    eval_strategy="steps",
    eval_steps=min(50, steps_per_epoch // 2),
    save_strategy="steps",
    save_steps=min(100, steps_per_epoch),
    save_total_limit=3,
    
    # 日志
    logging_strategy="steps",
    logging_steps=10,
    logging_first_step=True,
    report_to="none",  # Kaggle中禁用外部报告
    
    # 🚨 解决FP16梯度错误:自动处理
    fp16=False,
    bf16=True,
    fp16_full_eval=False,
    bf16_full_eval=True,
    
    # 其他关键参数
    dataloader_drop_last=True,
    remove_unused_columns=True,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,
)

print(f"\n✅ 训练参数配置完成:")
print(f"  输出目录: {training_args.output_dir}")
print(f"  学习率: {training_args.learning_rate}")
print(f"  训练轮数: {training_args.num_train_epochs}")
print(f"  FP16训练: {training_args.fp16}")

estimated_time_minutes = max_steps * 1.5 / 60
print(f"  预计训练时间: ~{estimated_time_minutes:.1f} 分钟")

# ========== Cell 9:创建训练器 ==========
print("\n🎪 创建训练器...")

from transformers import Trainer, DataCollatorForLanguageModeling
import time

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,
    pad_to_multiple_of=8,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    data_collator=data_collator,
)

print(f"✅ 训练器创建完成!")
print(f"  模型参数量: {sum(p.numel() for p in model.parameters()):,}")
print(f"  可训练参数: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}")

# ========== Cell 10:开始训练(带错误处理) ==========
print("\n" + "="*60)
print("🚀 开始模型训练!")
print("="*60)

try:
    # 开始训练
    trainer.train()
    print("🎉 训练完成!")
    
except Exception as e:
    print(f"❌ 训练出错: {e}")
    
    if "Attempting to unscale FP16 gradients" in str(e):
        print("\n🔧 检测到FP16梯度错误,尝试无FP16训练...")
        # 备选方案:禁用FP16
        training_args.fp16 = False
        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=train_dataset,
            eval_dataset=eval_dataset,
            data_collator=data_collator,
        )
        
        try:
            trainer.train()
            print("🎉 无FP16训练完成!")
        except Exception as e2:
            print(f"❌ 备选训练也失败: {e2}")
            import traceback
            traceback.print_exc()
            raise
    else:
        import traceback
        traceback.print_exc()
        raise

# ========== Cell 11:保存模型(修正路径) ==========
print("\n💾 保存模型...")

try:
    # 🚨 关键:保存路径不要用"./"开头
    save_path = "my_qq_virtual_self_enhanced"  # ✅ 正确路径
    
    # 保存模型
    trainer.save_model(save_path)
    tokenizer.save_pretrained(save_path)
    
    print(f"✅ 模型已保存到: {save_path}")
    print(f"📁 保存目录内容:")
    import os
    if os.path.exists(save_path):
        for file in os.listdir(save_path):
            size = os.path.getsize(os.path.join(save_path, file))
            print(f"  - {file} ({size:,} bytes)")
    
    print("\n🎯 模型保存完成!")
    
except Exception as e:
    print(f"❌ 保存失败: {e}")
    import traceback
    traceback.print_exc()

# ========== Cell 12:测试训练结果(修正加载方法) ==========
print("\n" + "="*60)
print("🧪 测试训练结果")
print("="*60)

from peft import PeftModel

def test_trained_model():
    try:
        save_path = "my_qq_virtual_self_enhanced"
        
        print(f"加载保存的LoRA适配器: {save_path}")
        
        # ✅ 正确方法:先加载基础模型,再加载LoRA
        print("1. 加载基础Qwen模型...")
        base_model = AutoModelForCausalLM.from_pretrained(
            MODEL_NAME,
            torch_dtype=torch.float16,
            device_map="auto",
            trust_remote_code=True
        )
        
        print("2. 加载LoRA适配器...")
        trained_model = PeftModel.from_pretrained(
            base_model,
            save_path,
            adapter_name="qq_virtual"
        )
        
        print("3. 加载Tokenizer...")
        trained_tokenizer = AutoTokenizer.from_pretrained(
            save_path,
            trust_remote_code=True
        )
        
        if trained_tokenizer.pad_token is None:
            trained_tokenizer.pad_token = trained_tokenizer.eos_token
        
        print("✅ 模型加载成功!")
        
        # 设置为评估模式
        trained_model.eval()
        
        # 测试几个问题
        test_questions = [
            "春节到了想说什么?",
            "你好",
            "今天心情怎么样?",
        ]
        
        print("\n📝 测试对话:")
        for question in test_questions:
            print(f"\n你: {question}")
            
            messages = [{"role": "user", "content": question}]
            text = trained_tokenizer.apply_chat_template(
                messages, 
                tokenize=False, 
                add_generation_prompt=True
            )
            
            inputs = trained_tokenizer(text, return_tensors="pt").to(trained_model.device)
            
            with torch.no_grad():
                outputs = trained_model.generate(
                    **inputs,
                    max_new_tokens=80,
                    temperature=0.7,
                    do_sample=True,
                    pad_token_id=trained_tokenizer.eos_token_id
                )
            
            response = trained_tokenizer.decode(
                outputs[0][inputs.input_ids.shape[1]:], 
                skip_special_tokens=True
            )
            print(f"🎭: {response}")
        
        return trained_model, trained_tokenizer
        
    except Exception as e:
        print(f"❌ 测试失败: {e}")
        import traceback
        traceback.print_exc()
        return None, None

# 运行测试
model, tokenizer = test_trained_model()

# ========== Cell 13:本地部署助手包装器 ==========

测试

go 复制代码
# ========== 完整打包:你的虚拟助手 ==========
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig

class YourVirtualSelf:
    """你的虚拟自我完整版"""
    
    def __init__(self, model_path="./my_qq_virtual_self_enhanced"):
        print(f"🎭 加载虚拟的你...")
        
        try:
            self.model = AutoModelForCausalLM.from_pretrained(
                model_path,
                torch_dtype=torch.float16,
                device_map="auto",
                trust_remote_code=True
            )
            self.tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
            
            if self.tokenizer.pad_token is None:
                self.tokenizer.pad_token = self.tokenizer.eos_token
            
            print("✅ 加载成功!")
            print("  特点:= _ =,简洁回复,有点小情绪")
            
            # 功能演示
            self.demo()
            
        except Exception as e:
            print(f"❌ 加载失败: {e}")
            self.model = None
    
    def demo(self):
        """展示能力"""
        print("\n🎯 能力展示:")
        
        demos = [
            ("春节到了想说什么?", "春节 快乐。。。"),
            ("朋友分享感情经历怎么回应?", "别逗比了=_="),
            ("晚安", "午梦佳人~"),
        ]
        
        for q, expected in demos:
            response = self.chat(q, temperature=0.1)
            match = "✅" if expected in response else "⚠️"
            print(f"  {match} {q}")
            print(f"      {response}")
    
    def chat(self, message, temperature=0.6):
        """基本聊天"""
        if self.model is None:
            return "模型未加载"
        
        messages = [{"role": "user", "content": message}]
        text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        inputs = self.tokenizer(text, return_tensors="pt").to(self.model.device)
        
        generation_config = GenerationConfig(
            max_new_tokens=70,
            temperature=temperature,
            top_p=0.95,
            do_sample=True,
            repetition_penalty=1.01,
            pad_token_id=self.tokenizer.eos_token_id,
        )
        
        with torch.no_grad():
            outputs = self.model.generate(**inputs, generation_config=generation_config)
        
        response = self.tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
        return response.strip()
    
    def interactive(self):
        """交互式聊天"""
        if self.model is None:
            print("无法启动聊天")
            return
        
        print("\n" + "="*50)
        print("💬 与虚拟的你对话")
        print("提示:")
        print("  - 直接问训练过的问题:得到准确回答")
        print("  - 简单日常聊天:得到有风格的回复")
        print("  - 复杂/专业问题:可能得到有趣但不准确的回答")
        print("  - 输入 quit 退出")
        print("="*50)
        
        history = []
        
        while True:
            try:
                user_input = input("\n你: ").strip()
                
                if user_input.lower() in ['quit', 'exit', '退出', 'bye']:
                    print("🎭 下次见~ =_=")
                    break
                
                if not user_input:
                    continue
                
                # 简单历史管理
                if len(history) > 4:
                    history = history[-2:]  # 只保留最近一轮
                
                response = self.chat(user_input)
                print(f"🎭: {response}")
                
                # 记录对话
                history.extend([user_input, response])
                
            except KeyboardInterrupt:
                print("\n\n对话结束")
                break
            except Exception as e:
                print(f"错误: {e}")

# 一键启动你的虚拟助手
print("🚀 准备启动你的虚拟自我助手...")
my_virtual_self = YourVirtualSelf()

if my_virtual_self.model is not None:
    if input("\n🤔 开始对话吗? (y/n): ").lower() == 'y':
        my_virtual_self.interactive()
    else:
        print("✅ 模型已就绪!随时可以对话。")
        
        # 快速测试几个问题
        print("\n📝 快速测试:")
        test_q = input("问一个问题测试: ").strip()
        if test_q:
            response = my_virtual_self.chat(test_q)
            print(f"🎭 回答: {response}")
else:
    print("❌ 无法加载模型")

print("\n" + "="*50)
print("🎉 完成!你的个性化AI助手已准备就绪")
print("  特点:有记忆、有个性、不同于标准AI")
print("  使用方法:直接对话即可")
print("="*50)
相关推荐
啊吧怪不啊吧2 小时前
机器学习模型部署全流程实战:从训练完成到上线可用
大数据·人工智能·机器学习
其美杰布-富贵-李2 小时前
PyTorch Optimizer 与 Scheduler 指南
人工智能·pytorch·python·优化·训练
Suahi2 小时前
【HuggingFace LLM】经典NLP Tasks数据流转
人工智能·自然语言处理
西***63472 小时前
破局信息孤岛 赋能城市智治——分布式可视化系统驱动智慧城市指挥中心升级
人工智能·分布式·智慧城市
zhaodiandiandian2 小时前
AI智能体重构产业生态,从效率革命到体验升级
人工智能·microsoft
weixin_409383122 小时前
强化lora训练 这次好点 下次在训练数据增加正常对话
人工智能·深度学习·机器学习·qwen
喜欢吃豆2 小时前
大语言模型混合专家(MoE)架构深度技术综述
人工智能·语言模型·架构·moe
老蒋新思维2 小时前
创客匠人:当知识IP遇上系统化AI,变现效率如何实现阶跃式突破?
大数据·网络·人工智能·网络协议·tcp/ip·重构·创客匠人
有一个好名字2 小时前
Spring AI 工具调用(Tool Calling):解锁智能应用新能力
java·人工智能·spring