GameGPT 初赛方案设计 训练入口+主入口

十、训练入口

train.py

import os, json, joblib

import numpy as np

from pathlib import Path

from collections import Counter

from parse_log import parse_log_file

from features import GameFeatureExtractor

from predictors import MLPredictor

════════════════════════════════════════════════════

TrainingSampleBuilder

════════════════════════════════════════════════════

class TrainingSampleBuilder:

CATEGORY_INTENT = {

'Action': '交战',

'Fire': '交战',

'SkillStart': '技能交战',

'Grenade': '投弹交战',

'BeingResuce': '被救援/等待救援',

'Looting': '搜寻物资'

}

CATEGORY_ACTIONS = {

'Action': ['靠近敌人', '开镜瞄准', '开火射击'],

'Fire': ['开火射击', '开镜瞄准', '换弹'],

'SkillStart': ['使用技能', '开镜瞄准', '开火射击'],

'Grenade': ['投掷手雷', '靠近敌人', '开火射击'],

'BeingResuce': ['原地不动/潜伏', '被救援中'],

'Looting': ['移动至物资点', '搜寻物资', '拾取物品']

}

def init(self, train_root: str, extractor: GameFeatureExtractor):

self.train_root = Path(train_root)

self.extractor = extractor

def build_all(self) -> list:

samples = []

for cat, intent in self.CATEGORY_INTENT.items():

folder = self.train_root / cat

if not folder.exists():

print(f" ⚠ 文件夹不存在: {folder}")

continue

files = list(folder.glob('*.txt'))

print(f" {cat:15s} → {len(files):4d} 个文件")

for f in files:

try:

s = self._build_one(f, cat)

if s: samples.append(s)

except Exception as e:

print(f" 处理失败 {f.name}: {e}")

return samples

def _build_one(self, filepath: Path, category: str) -> dict:

parsed = parse_log_file(str(filepath))

if not parsed['player_states']:

return None

player_team_map = {

i['player_id']: i['team_id']

for i in parsed['game_info']

}

确定主玩家

if parsed['actions']:

counts = Counter(a.player_id for a in parsed['actions'])

main_id = counts.most_common(1)[0][0]

elif parsed['game_info']:

main_id = parsed['game_info'][0]['player_id']

else:

return None

提取特征

features = {}

features.update(self.extractor.extract_player_features(

parsed['player_states'], main_id, {}

))

features.update(self.extractor.extract_spatial_features(

parsed['player_states'], main_id, player_team_map

))

features.update(self.extractor.extract_action_features(

parsed['actions'], main_id

))

return {

'file': str(filepath),

'category': category,

'main_player': main_id,

'features': features,

'label': {

'intent': self.CATEGORY_INTENT[category],

'actions': self.CATEGORY_ACTIONS[category]

}

}

════════════════════════════════════════════════════

训练主函数

════════════════════════════════════════════════════

def main():

print("=" * 60)

print(" 三角洲 GameGPT 训练流程")

print("=" * 60)

os.makedirs('models', exist_ok=True)

Step1: 特征提取器

print("\n[1/3] 初始化特征提取器")

extractor = GameFeatureExtractor()

Step2: 构造训练样本

print("\n[2/3] 构造训练样本")

builder = TrainingSampleBuilder(

train_root='data/train',

extractor=extractor

)

samples = builder.build_all()

print(f"\n 共构造样本: {len(samples)} 个")

类别分布

cat_dist = Counter(s['category'] for s in samples)

print(" 类别分布:")

for cat, cnt in sorted(cat_dist.items()):

print(f" {cat:15s}: {cnt}")

保存样本元数据

meta = [{

'file': s['file'],

'category': s['category'],

'intent': s['label']['intent']

} for s in samples]

with open('models/samples_meta.json', 'w', encoding='utf-8') as f:

json.dump(meta, f, ensure_ascii=False, indent=2)

Step3: 训练ML分类器

print("\n[3/3] 训练ML分类器")

ml = MLPredictor()

ml.train(samples)

print("\n ✓ 训练完成!")

print(" 模型文件:")

print(" models/intent_clf.pkl")

print(" models/intent_encoder.pkl")

print(" models/samples_meta.json")

if name == 'main':

main()

十一、主入口

main.py ── 统一入口

import argparse

import sys

def parse_args():

parser = argparse.ArgumentParser(

description='三角洲 GameGPT 初赛方案'

)

subparsers = parser.add_subparsers(dest='mode')

── train 子命令 ───────────────────────────────

train_p = subparsers.add_parser('train', help='训练ML分类器')

train_p.add_argument(

'--train_dir', default='data/train',

help='训练数据根目录'

)

── infer 子命令 ───────────────────────────────

infer_p = subparsers.add_parser('infer', help='批量推理')

infer_p.add_argument(

'--test_dir', default='data/test',

help='测试txt文件目录'

)

infer_p.add_argument(

'--output', default='output/submit.xlsx',

help='输出xlsx路径'

)

infer_p.add_argument(

'--backend', default='auto',

choices=['auto', 'ollama', 'vllm', 'transformers'],

help='LLM后端选择'

)

infer_p.add_argument(

'--model_name', default='qwen2.5:7b',

help='Ollama/vLLM模型名称'

)

infer_p.add_argument(

'--model_path', default=None,

help='本地模型路径(transformers后端使用)'

)

infer_p.add_argument(

'--ollama_url', default='http://localhost:11434',

help='Ollama服务地址'

)

infer_p.add_argument(

'--vllm_url', default='http://localhost:8000/v1',

help='vLLM服务地址'

)

infer_p.add_argument(

'--no_ml', action='store_true',

help='不使用ML分类器'

)

infer_p.add_argument(

'--no_debug', action='store_true',

help='关闭详细日志'

)

── eval 子命令(用训练集交叉验证)────────────

eval_p = subparsers.add_parser('eval', help='在训练集上评估')

eval_p.add_argument(

'--train_dir', default='data/train'

)

eval_p.add_argument(

'--backend', default='auto'

)

eval_p.add_argument(

'--model_name', default='qwen2.5:7b'

)

eval_p.add_argument(

'--model_path', default=None

)

return parser.parse_args()

def run_train(args):

"""训练流程"""

import train as train_module

可自定义训练目录

import os

if hasattr(args, 'train_dir') and args.train_dir != 'data/train':

os.environ['TRAIN_DIR'] = args.train_dir

train_module.main()

def run_infer(args):

"""推理流程"""

from inference import run_inference

run_inference(

test_dir = args.test_dir,

output_path= args.output,

model_name = args.model_name,

model_path = args.model_path,

backend = args.backend,

use_ml = not args.no_ml,

debug_log = not args.no_debug

)

def run_eval(args):

"""

在训练集上进行评估

随机抽取20%作为验证集,计算意图准确率

"""

import random

import json

from pathlib import Path

from collections import defaultdict

from parse_log import parse_log_file

from features import GameFeatureExtractor

from local_llm import LocalLLMPredictor

from predictors import RuleBasedPredictor, MLPredictor, EnsemblePredictor

from inference import (

determine_main_player,

build_compact_text,

process_single_file

)

import joblib

print("=" * 60)

print(" 训练集评估流程")

print("=" * 60)

CATEGORY_INTENT = {

'Action': '交战',

'Fire': '交战',

'SkillStart': '技能交战',

'Grenade': '投弹交战',

'BeingResuce': '被救援/等待救援',

'Looting': '搜寻物资'

}

train_root = Path(args.train_dir)

extractor = GameFeatureExtractor()

收集验证样本(每类取20%)

val_samples = []

for cat, intent in CATEGORY_INTENT.items():

folder = train_root / cat

if not folder.exists():

continue

files = list(folder.glob('*.txt'))

random.shuffle(files)

val_files = files[:max(1, int(len(files) * 0.2))]

for f in val_files:

val_samples.append({'file': f, 'true_intent': intent})

print(f"验证样本数: {len(val_samples)}")

初始化预测器

rule_p = RuleBasedPredictor()

ml_p = None

if Path('models/intent_clf.pkl').exists():

ml_p = MLPredictor()

ml_p.intent_clf = joblib.load('models/intent_clf.pkl')

ml_p.intent_encoder = joblib.load('models/intent_encoder.pkl')

llm_p = LocalLLMPredictor(

backend = args.backend,

model_name = args.model_name,

model_path = args.model_path

)

predictor = EnsemblePredictor(

rule_predictor = rule_p,

ml_predictor = ml_p,

llm_predictor = llm_p,

weights = {

'rule': 0.15,

'ml': 0.25 if ml_p else 0.0,

'llm': 0.60 if ml_p else 0.85

}

)

逐条评估

correct, total = 0, 0

per_class = defaultdict(lambda: {'correct': 0, 'total': 0})

errors = []

for item in val_samples:

try:

parsed = parse_log_file(str(item['file']))

main_id = determine_main_player(parsed)

ptmap = {

i['player_id']: i['team_id']

for i in parsed['game_info']

}

feats = {}

feats.update(extractor.extract_player_features(

parsed['player_states'], main_id, {}

))

feats.update(extractor.extract_spatial_features(

parsed['player_states'], main_id, ptmap

))

feats.update(extractor.extract_action_features(

parsed['actions'], main_id

))

text = build_compact_text(

parsed, main_id, ptmap, feats

)

pred = predictor.predict(feats, text, parsed['actions'])

true_intent = item['true_intent']

pred_intent = pred['intent']

is_correct = (true_intent == pred_intent)

if is_correct:

correct += 1

total += 1

per_class[true_intent]['total'] += 1

per_class[true_intent]['correct'] += int(is_correct)

if not is_correct:

errors.append({

'file': str(item['file']),

'true': true_intent,

'pred': pred_intent

})

except Exception as e:

print(f" 评估失败 {item['file']}: {e}")

total += 1

打印评估结果

print("\n" + "=" * 50)

print(f" 总体准确率: {correct}/{total} = {correct/max(total,1)*100:.2f}%")

print("=" * 50)

print(" 各类准确率:")

for intent, stat in sorted(per_class.items()):

acc = stat['correct'] / max(stat['total'], 1) * 100

bar = '█' * int(acc / 5)

print(f" {intent:20s} {acc:5.1f}% [{bar}]")

print(f"\n 错误样本数: {len(errors)}")

if errors[:3]:

print(" 错误示例:")

for e in errors[:3]:

print(f" 文件: {Path(e['file']).name}")

print(f" 真实: {e['true']} 预测: {e['pred']}")

保存评估报告

import os

os.makedirs('output', exist_ok=True)

with open('output/eval_report.json', 'w', encoding='utf-8') as f:

json.dump({

'accuracy': correct / max(total, 1),

'per_class': dict(per_class),

'errors': errors

}, f, ensure_ascii=False, indent=2)

print("\n 评估报告已保存至 output/eval_report.json")

if name == 'main':

args = parse_args()

if args.mode == 'train':

run_train(args)

elif args.mode == 'infer':

run_infer(args)

elif args.mode == 'eval':

run_eval(args)

else:

print("请指定子命令: train / infer / eval")

print("示例:")

print(" python main.py train")

print(" python main.py infer --backend ollama --model_name qwen2.5:7b")

print(" python main.py eval --backend ollama --model_name qwen2.5:7b")

sys.exit(1)

相关推荐
余衫马1 小时前
Microsoft Semantic Kernel 入门指南
人工智能·microsoft·.net·agent·智能体
Abbylolo1 小时前
PyCharm 中接入 Cursor AI:通过 ACP 实现无缝协作
ide·python·pycharm
AC梦1 小时前
在Claude Code中接入Deepseek-v4模型
vscode·ai
组合缺一1 小时前
OpenClaw vs SolonCode:绑定飞书与钉钉,到底谁更简单?
ai·钉钉·飞书·ai编程·数字员工·openclaw·soloncode
call me by ur name1 小时前
Polymarket开发文档——交易
ai
积跬步,慕至千里1 小时前
解决 Conda 环境在 Jupyter Notebook 中不显示的问题(含重复 Kernel 排查)
windows·jupyter·conda
qq_283720051 小时前
保姆级 Claude Code 安装教程
python·pygame
2301_769340671 小时前
HTML函数运行时触控屏失灵是硬件故障吗_输入层兼容性测试【详解】
jvm·数据库·python
chenyuhao20241 小时前
AI agent 开发之嵌入模型和提示词 前置知识
人工智能·深度学习·算法·langchain·agent·ai应用开发