用的数据集是LLaMA-Factory/data/glaive_toolcall_zh_demo.json
调试过程
LLamafactory支持function的训练数据
model_args
shell
ModelArguments(model_name_or_path='/root/zhangyitao/Qwen3-0_6B/', adapter_name_or_path=None, adapter_folder=None, cache_dir=None, use_fast_tokenizer=True, resize_vocab=False, split_special_tokens=False, add_tokens=None, add_special_tokens=None, new_special_tokens_config=None, init_special_tokens='noise_init', model_revision='main', low_cpu_mem_usage=True, rope_scaling=None, flash_attn='fa2', shift_attn=False, mixture_of_depths=None, use_unsloth=False, use_unsloth_gc=False, enable_liger_kernel=True, moe_aux_loss_coef=None, disable_gradient_checkpointing=False, use_reentrant_gc=True, upcast_layernorm=False, upcast_lmhead_output=False, train_from_scratch=False, infer_backend='huggingface', offload_folder='offload', use_kv_cache=True, use_v1_kernels=False, infer_dtype='auto', hf_hub_token=None, ms_hub_token=None, om_hub_token=None, print_param_status=False, trust_remote_code=True, quantization_method='bnb', quantization_bit=None, quantization_type='nf4', double_quantization=True, quantization_device_map=None, image_max_pixels=589824, image_min_pixels=1024, image_do_pan_and_scan=False, crop_to_patches=False, video_max_pixels=65536, video_min_pixels=256, video_fps=2.0, video_maxlen=128, use_audio_in_video=False, audio_sampling_rate=16000, export_dir=None, export_size=5, export_device='cpu', export_quantization_bit=None, export_quantization_dataset=None, export_quantization_nsamples=128, export_quantization_maxlen=1024, export_legacy_format=False, export_hub_model_id=None, use_kt=False, kt_optimize_rule=None, cpu_infer=32, chunk_size=8192, mode='normal', kt_maxlen=4096, kt_use_cuda_graph=True, kt_mode='normal', kt_force_think=False, vllm_maxlen=4096, vllm_gpu_util=0.7, vllm_enforce_eager=False, vllm_max_lora_rank=32, vllm_config=None, sglang_maxlen=4096, sglang_mem_fraction=0.7, sglang_tp_size=-1, sglang_config=None, sglang_lora_backend='triton', compute_dtype=torch.bfloat16, device_map={'': device(type='cuda', index=0)}, model_max_length=5000, block_diag_attn=False
data_args
shell
DataArguments(template='qwen', dataset=['glaive_toolcall_zh_demo'], eval_dataset=None, dataset_dir='data', media_dir='data', cutoff_len=5000, train_on_prompt=False, mask_history=False, streaming=False, buffer_size=16384, mix_strategy='concat', interleave_probs=None, overwrite_cache=True, preprocessing_batch_size=1000, preprocessing_num_workers=32, max_samples=None, eval_num_beams=None, ignore_pad_token_for_loss=True, val_size=0.0, eval_on_each_dataset=False, packing=False, neat_packing=False, tool_format=None, default_system=None, enable_thinking=True, tokenized_path=None, data_shared_file_system=False
tokenizer_module
shell
{'tokenizer': Qwen2TokenizerFast(name_or_path='/root/zhangyitao/Qwen3-0_6B/', vocab_size=151643, model_max_length=131072, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'eos_token': '<|im_end|>', 'pad_token': '<|endoftext|>', 'additional_special_tokens': ['<|im_start|>', '<|im_end|>', '<|object_ref_start|>', '<|object_ref_end|>', '<|box_start|>', '<|box_end|>', '<|quad_start|>', '<|quad_end|>', '<|vision_start|>', '<|vision_end|>', '<|vision_pad|>', '<|image_pad|>', '<|video_pad|>']}, clean_up_tokenization_spaces=False, added_tokens_decoder={
151643: AddedToken("<|endoftext|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151644: AddedToken("<|im_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151645: AddedToken("<|im_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151646: AddedToken("<|object_ref_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151647: AddedToken("<|object_ref_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151648: AddedToken("<|box_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151649: AddedToken("<|box_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151650: AddedToken("<|quad_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151651: AddedToken("<|quad_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151652: AddedToken("<|vision_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151653: AddedToken("<|vision_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151654: AddedToken("<|vision_pad|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151655: AddedToken("<|image_pad|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151656: AddedToken("<|video_pad|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151657: AddedToken("<tool_call>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151658: AddedToken("</tool_call>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151659: AddedToken("<|fim_prefix|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151660: AddedToken("<|fim_middle|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151661: AddedToken("<|fim_suffix|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151662: AddedToken("<|fim_pad|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151663: AddedToken("<|repo_name|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151664: AddedToken("<|file_sep|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151665: AddedToken("<tool_response>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151666: AddedToken("</tool_response>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151667: AddedToken("<think>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151668: AddedToken("</think>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
}
), 'processor': None}
template (这里是在get_template_and_fix_tokenizer里面直接从字典拿的) format_observation这里把observation映射到了user!
shell
Template(format_user=StringFormatter(slots=['<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n'], tool_format=None), format_assistant=StringFormatter(slots=['{{content}}<|im_end|>\n'], tool_format=None), format_system=StringFormatter(slots=['<|im_start|>system\n{{content}}<|im_end|>\n'], tool_format=None), format_function=FunctionFormatter(slots=['{{content}}<|im_end|>\n'], tool_format='qwen'), format_observation=StringFormatter(slots=['<|im_start|>user\n<tool_response>\n{{content}}\n</tool_response><|im_end|>\n<|im_start|>assistant\n'], tool_format=None), format_tools=ToolFormatter(slots=[], tool_format='qwen'), format_prefix=EmptyFormatter(slots=[], tool_format=None), default_system='You are Qwen, created by Alibaba Cloud. You are a helpful assistant.', stop_words=['<|im_end|>'], thought_words=('<think>\n', '\n</think>\n\n'), tool_call_words=('<tool_call>', '</tool_call>'), efficient_eos=False, replace_eos=True, replace_jinja_template=False, enable_thinking=True, mm_plugin=BasePlugin(image_token=None, video_token=None, audio_token=None, expand_mm_tokens=True))
dataset_module
shell
{'train_dataset': Dataset({
features: ['input_ids', 'attention_mask', 'labels', 'images', 'videos', 'audios'],
num_rows: 300
})}
dataset_module["train_dataset"]
shell
Dataset({
features: ['input_ids', 'attention_mask', 'labels', 'images', 'videos', 'audios'],
num_rows: 300
})
_get_merged_dataset内部会加载数据集,真正encode是在_get_preprocessed_dataset,
example
shell
training example:
input_ids:
[151644, 8948, 198, 2610, 525, 1207, 16948, 11, 3465, 553, 54364, 14817, 13, 1446, 525, 264, 10950, 17847, 382, 2, 13852, 271, 2610, 1231, 1618, 825, 476, 803, 5746, 311, 7789, 448, 279, 1196, 3239, 382, 2610, 525, 3897, 448, 729, 32628, 2878, 366, 15918, 1472, 15918, 29, 11874, 9492, 510, 27, 15918, 397, 4913, 1313, 788, 330, 1688, 497, 330, 1688, 788, 5212, 606, 788, 330, 19366, 39225, 497, 330, 4684, 788, 330, 43959, 107717, 497, 330, 13786, 788, 5212, 1313, 788, 330, 1700, 497, 330, 13193, 788, 5212, 11049, 1269, 788, 5212, 1313, 788, 330, 917, 497, 330, 4684, 788, 330, 100017, 29991, 14345, 330, 3615, 788, 5212, 1313, 788, 330, 1653, 497, 330, 3615, 788, 5212, 1313, 788, 330, 1700, 497, 330, 13193, 788, 5212, 606, 788, 5212, 1313, 788, 330, 917, 497, 330, 4684, 788, 330, 785, 1509, 829, 14345, 330, 13523, 788, 5212, 1313, 788, 330, 11662, 497, 330, 4684, 788, 330, 785, 12194, 315, 279, 1509, 14345, 330, 6555, 788, 5212, 1313, 788, 330, 4082, 497, 330, 4684, 788, 330, 785, 3349, 817, 4982, 9207, 2137, 330, 6279, 788, 4383, 606, 497, 330, 13523, 497, 330, 6555, 1341, 3417, 2137, 330, 6279, 788, 4383, 11049, 1269, 497, 330, 3615, 1341, 3417, 532, 4913, 1313, 788, 330, 1688, 497, 330, 1688, 788, 5212, 606, 788, 330, 19366, 10122, 497, 330, 4684, 788, 330, 43959, 107898, 32867, 497, 330, 13786, 788, 5212, 1313, 788, 330, 1700, 497, 330, 13193, 788, 5212, 4129, 788, 5212, 1313, 788, 330, 11662, 497, 330, 4684, 788, 330, 32867, 9370, 98402, 9207, 2137, 330, 6279, 788, 4383, 4129, 1341, 3417, 532, 522, 15918, 1339, 2461, 1817, 729, 1618, 11, 470, 264, 2951, 1633, 448, 729, 829, 323, 5977, 2878, 220, 151657, 151658, 11874, 9492, 510, 151657, 198, 4913, 606, 788, 366, 1688, 11494, 8066, 330, 16370, 788, 366, 2116, 56080, 40432, 31296, 151658, 151645, 198, 151644, 872, 198, 35946, 85106, 17714, 13079, 49628, 43959, 104298, 107717, 1773, 42411, 103946, 34187, 17, 18947, 104167, 3837, 103991, 3, 16, 3837, 101034, 18, 99408, 112622, 3837, 73157, 99408, 3, 15, 13, 20, 1773, 151645, 198, 151644, 77091, 198, 151657, 198, 4913, 606, 788, 330, 19366, 39225, 497, 330, 16370, 788, 5212, 11049, 1269, 788, 330, 111028, 13935, 42140, 100208, 497, 330, 3615, 788, 61753, 606, 788, 330, 104167, 497, 330, 13523, 788, 220, 17, 11, 330, 6555, 788, 220, 16, 2137, 5212, 606, 788, 330, 112622, 497, 330, 13523, 788, 220, 18, 11, 330, 6555, 788, 220, 15, 13, 20, 25439, 11248, 151658, 151645, 198, 151644, 872, 198, 151665, 198, 4913, 22217, 842, 788, 330, 62931, 16, 17, 18, 19, 20, 497, 330, 11049, 1269, 788, 330, 111028, 13935, 42140, 100208, 497, 330, 3615, 788, 61753, 606, 788, 330, 104167, 497, 330, 13523, 788, 220, 17, 11, 330, 6555, 788, 220, 16, 11, 330, 5035, 788, 220, 17, 2137, 5212, 606, 788, 330, 112622, 497, 330, 13523, 788, 220, 18, 11, 330, 6555, 788, 220, 15, 13, 20, 11, 330, 5035, 788, 220, 16, 13, 20, 72441, 330, 5035, 788, 220, 18, 13, 20, 11, 330, 2829, 788, 330, 43959, 16707, 151666, 151645, 198, 151644, 77091, 198, 107717, 36667, 19108, 43959, 1773, 107717, 72640, 17714, 62931, 16, 17, 18, 19, 20, 1773, 111028, 13935, 42140, 100208, 9370, 59743, 80094, 17714, 3, 18, 13, 20, 1773, 107717, 102298, 17, 18947, 104167, 3837, 59743, 80094, 17714, 3, 17, 3837, 101034, 18, 99408, 112622, 3837, 59743, 80094, 17714, 3, 16, 13, 20, 1773, 151645, 198]
inputs:
<|im_start|>system
You are Qwen, created by Alibaba Cloud. You are a helpful assistant.
# Tools
You may call one or more functions to assist with the user query.
You are provided with function signatures within <tools></tools> XML tags:
<tools>
{"type": "function", "function": {"name": "generate_invoice", "description": "生成发票", "parameters": {"type": "object", "properties": {"customer_name": {"type": "string", "description": "客户名称"}, "items": {"type": "array", "items": {"type": "object", "properties": {"name": {"type": "string", "description": "The item name"}, "quantity": {"type": "integer", "description": "The quantity of the item"}, "price": {"type": "number", "description": "The price per unit"}}, "required": ["name", "quantity", "price"]}}}, "required": ["customer_name", "items"]}}}
{"type": "function", "function": {"name": "generate_password", "description": "生成随机密码", "parameters": {"type": "object", "properties": {"length": {"type": "integer", "description": "密码的长度"}}, "required": ["length"]}}}
</tools>
For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
<tool_call>
{"name": <function-name>, "arguments": <args-json-object>}
</tool_call><|im_end|>
<|im_start|>user
我需要为John Doe生成一张发票。他购买了2个苹果,每个$1,以及3根香蕉,每根$0.5。<|im_end|>
<|im_start|>assistant
<tool_call>
{"name": "generate_invoice", "arguments": {"customer_name": "约翰·多伊", "items": [{"name": "苹果", "quantity": 2, "price": 1}, {"name": "香蕉", "quantity": 3, "price": 0.5}]}}
</tool_call><|im_end|>
<|im_start|>user
<tool_response>
{"invoice_id": "INV12345", "customer_name": "约翰·多伊", "items": [{"name": "苹果", "quantity": 2, "price": 1, "total": 2}, {"name": "香蕉", "quantity": 3, "price": 0.5, "total": 1.5}], "total": 3.5, "status": "生成"}
</tool_response><|im_end|>
<|im_start|>assistant
发票已成功生成。发票编号为INV12345。约翰·多伊的总金额为$3.5。发票包含2个苹果,总金额为$2,以及3根香蕉,总金额为$1.5。<|im_end|>
label_ids:
[-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 151657, 198, 4913, 606, 788, 330, 19366, 39225, 497, 330, 16370, 788, 5212, 11049, 1269, 788, 330, 111028, 13935, 42140, 100208, 497, 330, 3615, 788, 61753, 606, 788, 330, 104167, 497, 330, 13523, 788, 220, 17, 11, 330, 6555, 788, 220, 16, 2137, 5212, 606, 788, 330, 112622, 497, 330, 13523, 788, 220, 18, 11, 330, 6555, 788, 220, 15, 13, 20, 25439, 11248, 151658, 151645, 198, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 107717, 36667, 19108, 43959, 1773, 107717, 72640, 17714, 62931, 16, 17, 18, 19, 20, 1773, 111028, 13935, 42140, 100208, 9370, 59743, 80094, 17714, 3, 18, 13, 20, 1773, 107717, 102298, 17, 18947, 104167, 3837, 59743, 80094, 17714, 3, 17, 3837, 101034, 18, 99408, 112622, 3837, 59743, 80094, 17714, 3, 16, 13, 20, 1773, 151645, 198]
labels: 刚好两段输出,就是这样
<tool_call>
{"name": "generate_invoice", "arguments": {"customer_name": "约翰·多伊", "items": [{"name": "苹果", "quantity": 2, "price": 1}, {"name": "香蕉", "quantity": 3, "price": 0.5}]}}
</tool_call><|im_end|>
发票已成功生成。发票编号为INV12345。约翰·多伊的总金额为$3.5。发票包含2个苹果,总金额为$2,以及3根香蕉,总金额为$1.5。<|im_end|>
model
shell
Qwen3ForCausalLM(
(model): Qwen3Model(
(embed_tokens): Embedding(151936, 1024)
(layers): ModuleList(
(0-27): 28 x Qwen3DecoderLayer(
(self_attn): Qwen3Attention(
(q_proj): Linear(in_features=1024, out_features=2048, bias=False)
(k_proj): Linear(in_features=1024, out_features=1024, bias=False)
(v_proj): Linear(in_features=1024, out_features=1024, bias=False)
(o_proj): Linear(in_features=2048, out_features=1024, bias=False)
(q_norm): LigerRMSNorm((128,), eps=1e-06, offset=0.0, in_place=True, row_mode=None)
(k_norm): LigerRMSNorm((128,), eps=1e-06, offset=0.0, in_place=True, row_mode=None)
)
(mlp): LigerSwiGLUMLP(
(gate_proj): Linear(in_features=1024, out_features=3072, bias=False)
(up_proj): Linear(in_features=1024, out_features=3072, bias=False)
(down_proj): Linear(in_features=3072, out_features=1024, bias=False)
)
(input_layernorm): LigerRMSNorm((1024,), eps=1e-06, offset=0.0, in_place=True, row_mode=None)
(post_attention_layernorm): LigerRMSNorm((1024,), eps=1e-06, offset=0.0, in_place=True, row_mode=None)
)
)
(norm): LigerRMSNorm((1024,), eps=1e-06, offset=0.0, in_place=True, row_mode=None)
(rotary_emb): Qwen3RotaryEmbedding()
)
(lm_head): Linear(in_features=1024, out_features=151936, bias=False)
)
data_collator
shell
SFTDataCollatorWith4DAttentionMask(tokenizer=Qwen2TokenizerFast(name_or_path='/root/zhangyitao/Qwen3-0_6B/', vocab_size=151643, model_max_length=131072, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'eos_token': '<|im_end|>', 'pad_token': '<|endoftext|>', 'additional_special_tokens': ['<|im_start|>', '<|im_end|>', '<|object_ref_start|>', '<|object_ref_end|>', '<|box_start|>', '<|box_end|>', '<|quad_start|>', '<|quad_end|>', '<|vision_start|>', '<|vision_end|>', '<|vision_pad|>', '<|image_pad|>', '<|video_pad|>']}, clean_up_tokenization_spaces=False, added_tokens_decoder={
151643: AddedToken("<|endoftext|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151644: AddedToken("<|im_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151645: AddedToken("<|im_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151646: AddedToken("<|object_ref_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151647: AddedToken("<|object_ref_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151648: AddedToken("<|box_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151649: AddedToken("<|box_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151650: AddedToken("<|quad_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151651: AddedToken("<|quad_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151652: AddedToken("<|vision_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151653: AddedToken("<|vision_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151654: AddedToken("<|vision_pad|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151655: AddedToken("<|image_pad|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151656: AddedToken("<|video_pad|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151657: AddedToken("<tool_call>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151658: AddedToken("</tool_call>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151659: AddedToken("<|fim_prefix|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151660: AddedToken("<|fim_middle|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151661: AddedToken("<|fim_suffix|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151662: AddedToken("<|fim_pad|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151663: AddedToken("<|repo_name|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151664: AddedToken("<|file_sep|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151665: AddedToken("<tool_response>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151666: AddedToken("</tool_response>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151667: AddedToken("<think>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151668: AddedToken("</think>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
}
), model=Qwen3ForCausalLM(
(model): Qwen3Model(
(embed_tokens): Embedding(151936, 1024)
(layers): ModuleList(
(0-27): 28 x Qwen3DecoderLayer(
(self_attn): Qwen3Attention(
(q_proj): Linear(in_features=1024, out_features=2048, bias=False)
(k_proj): Linear(in_features=1024, out_features=1024, bias=False)
(v_proj): Linear(in_features=1024, out_features=1024, bias=False)
(o_proj): Linear(in_features=2048, out_features=1024, bias=False)
(q_norm): LigerRMSNorm((128,), eps=1e-06, offset=0.0, in_place=True, row_mode=None)
(k_norm): LigerRMSNorm((128,), eps=1e-06, offset=0.0, in_place=True, row_mode=None)
)
(mlp): LigerSwiGLUMLP(
(gate_proj): Linear(in_features=1024, out_features=3072, bias=False)
(up_proj): Linear(in_features=1024, out_features=3072, bias=False)
(down_proj): Linear(in_features=3072, out_features=1024, bias=False)
)
(input_layernorm): LigerRMSNorm((1024,), eps=1e-06, offset=0.0, in_place=True, row_mode=None)
(post_attention_layernorm): LigerRMSNorm((1024,), eps=1e-06, offset=0.0, in_place=True, row_mode=None)
)
)
(norm): LigerRMSNorm((1024,), eps=1e-06, offset=0.0, in_place=True, row_mode=None)
(rotary_emb): Qwen3RotaryEmbedding()
)
(lm_head): Linear(in_features=1024, out_features=151936, bias=False)
), padding=True, max_length=None, pad_to_multiple_of=8, label_pad_token_id=-100, return_tensors='pt', template=Template(format_user=StringFormatter(slots=['<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n'], tool_format=None), format_assistant=StringFormatter(slots=['{{content}}<|im_end|>\n'], tool_format=None), format_system=StringFormatter(slots=['<|im_start|>system\n{{content}}<|im_end|>\n'], tool_format=None), format_function=FunctionFormatter(slots=['{{content}}<|im_end|>\n'], tool_format='qwen'), format_observation=StringFormatter(slots=['<|im_start|>user\n<tool_response>\n{{content}}\n</tool_response><|im_end|>\n<|im_start|>assistant\n'], tool_format=None), format_tools=ToolFormatter(slots=[], tool_format='qwen'), format_prefix=EmptyFormatter(slots=[], tool_format=None), default_system='You are Qwen, created by Alibaba Cloud. You are a helpful assistant.', stop_words=['<|im_end|>'], thought_words=('<think>\n', '\n</think>\n\n'), tool_call_words=('<tool_call>', '</tool_call>'), efficient_eos=False, replace_eos=True, replace_jinja_template=False, enable_thinking=True, mm_plugin=BasePlugin(image_token=None, video_token=None, audio_token=None, expand_mm_tokens=True)), processor=None, block_diag_attn=False, attn_implementation='sdpa', compute_dtype=torch.bfloat16)
第一个样本的输入示例
观察可以发现,observation这里被替换成了user,猜测应该是数据集map的时候,有一些特殊的替换?
shell
'<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>\n{"type": "function", "function": {"name": "generate_invoice", "description": "生成发票", "parameters": {"type": "object", "properties": {"customer_name": {"type": "string", "description": "客户名称"}, "items": {"type": "array", "items": {"type": "object", "properties": {"name": {"type": "string", "description": "The item name"}, "quantity": {"type": "integer", "description": "The quantity of the item"}, "price": {"type": "number", "description": "The price per unit"}}, "required": ["name", "quantity", "price"]}}}, "required": ["customer_name", "items"]}}}\n{"type": "function", "function": {"name": "generate_password", "description": "生成随机密码", "parameters": {"type": "object", "properties": {"length": {"type": "integer", "description": "密码的长度"}}, "required": ["length"]}}}\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{"name": <function-name>, "arguments": <args-json-object>}\n</tool_call><|im_end|>\n<|im_start|>user\n我需要为John Doe生成一张发票。他购买了2个苹果,每个$1,以及3根香蕉,每根$0.5。<|im_end|>\n<|im_start|>assistant\n<tool_call>\n{"name": "generate_invoice", "arguments": {"customer_name": "约翰·多伊", "items": [{"name": "苹果", "quantity": 2, "price": 1}, {"name": "香蕉", "quantity": 3, "price": 0.5}]}}\n</tool_call><|im_end|>\n<|im_start|>user\n<tool_response>\n{"invoice_id": "INV12345", "customer_name": "约翰·多伊", "items": [{"name": "苹果", "quantity": 2, "price": 1, "total": 2}, {"name": "香蕉", "quantity": 3, "price": 0.5, "total": 1.5}], "total": 3.5, "status": "生成"}\n</tool_response><|im_end|>\n<|im_start|>assistant\n发票已成功生成。发票编号为INV12345。约翰·多伊的总金额为$3.5。发票包含2个苹果,总金额为$2,以及3根香蕉,总金额为$1.5。<|im_end|>\n'
labels解码后
'<tool_call>\n{"name": "generate_invoice", "arguments": {"customer_name": "约翰·多伊", "items": [{"name": "苹果", "quantity": 2, "price": 1}, {"name": "香蕉", "quantity": 3, "price": 0.5}]}}\n</tool_call><|im_end|>\n发票已成功生成。发票编号为INV12345。约翰·多伊的总金额为$3.5。发票包含2个苹果,总金额为$2,以及3根香蕉,总金额为$1.5。<|im_end|>\n'
[151657, 198, 4913, 606, 788, 330, 19366, 39225, 497, 330, 16370, 788, 5212, 11049, 1269, 788, 330, 111028, 13935, 42140, 100208, 497, 330, 3615, 788, 61753, 606, 788, 330, 104167, 497, 330, 13523, 788, 220, 17, 11, 330, 6555, 788, 220, 16, 2137, 5212, 606, 788, 330, 112622, 497, 330, 13523, 788, 220, 18, 11, 330, 6555, 788, 220, 15, 13, 20, 25439, 11248, 151658, 151645, 198]
'<tool_call>\n{"name": "generate_invoice", "arguments": {"customer_name": "约翰·多伊", "items": [{"name": "苹果", "quantity": 2, "price": 1}, {"name": "香蕉", "quantity": 3, "price": 0.5}]}}\n</tool_call><|im_end|>\n'
[107717, 36667, 19108, 43959, 1773, 107717, 72640, 17714, 62931, 16, 17, 18, 19, 20, 1773, 111028, 13935, 42140, 100208, 9370, 59743, 80094, 17714, 3, 18, 13, 20, 1773, 107717, 102298, 17, 18947, 104167, 3837, 59743, 80094, 17714, 3, 17, 3837, 101034, 18, 99408, 112622, 3837, 59743, 80094, 17714, 3, 16, 13, 20, 1773, 151645, 198]
'发票已成功生成。发票编号为INV12345。约翰·多伊的总金额为$3.5。发票包含2个苹果,总金额为$2,以及3根香蕉,总金额为$1.5。<|im_end|>\n'
微调完了以后部署
shell
CUDA_VISIBLE_DEVICES=0,1,2,3 /root/sunhao_11_nfs1/guorui/vllm090/bin/python -m vllm.entrypoints.openai.api_server --tensor-parallel-size 1 --dtype bfloat16 --trust-remote-code --host 0.0.0.0 --model /root/zhangyitao/output --max-model-len 8192 --served-model-name Qwen --gpu-memory-utilization 0.3 --enable-prefix-caching --tokenizer-mode auto --port 21111 --enable-auto-tool-choice --tool-call-parser hermes --chat-template /root/zhangyitao/Qwen3-0_6B/qwen3_nonthinking.jinja
KeyError: 'invalid tool call parser: qwen (chose from { deepseek_v3,granite-20b-fc,granite,hermes,internlm,jamba,llama4_pythonic,llama4_json,llama3_json,minimax,mistral,phi4_mini_json,pythonic,xlam })'不能选qwen?
# 原始
CUDA_VISIBLE_DEVICES=0,1,2,3 /root/sunhao_11_nfs1/guorui/vllm090/bin/python -m vllm.entrypoints.openai.api_server --tensor-parallel-size 1 --dtype bfloat16 --trust-remote-code --host 0.0.0.0 --model /root/zhangyitao/output/full --max-model-len 8192 --served-model-name Qwen --gpu-memory-utilization 0.3 --enable-prefix-caching --tokenizer-mode auto --port 21111 --enable-auto-tool-choice --tool-call-parser hermes --chat-template /root/zhangyitao/Qwen3-0_6B/qwen3_nonthinking.jinja
# full微调
CUDA_VISIBLE_DEVICES=0,1,2,3 /root/sunhao_11_nfs1/guorui/vllm090/bin/python -m vllm.entrypoints.openai.api_server --tensor-parallel-size 1 --dtype bfloat16 --trust-remote-code --host 0.0.0.0 --model /root/zhangyitao/Qwen3-0_6B --max-model-len 8192 --served-model-name Qwen --gpu-memory-utilization 0.3 --enable-prefix-caching --tokenizer-mode auto --port 21111 --enable-auto-tool-choice --tool-call-parser hermes --chat-template /root/zhangyitao/Qwen3-0_6B/qwen3_nonthinking.jinja
# lora merge
CUDA_VISIBLE_DEVICES=0,1,2,3 /root/sunhao_11_nfs1/guorui/vllm090/bin/python -m vllm.entrypoints.openai.api_server --tensor-parallel-size 1 --dtype bfloat16 --trust-remote-code --host 0.0.0.0 --model /root/zhangyitao/output/lora_merge --max-model-len 8192 --served-model-name Qwen --gpu-memory-utilization 0.3 --enable-prefix-caching --tokenizer-mode auto --port 21111 --enable-auto-tool-choice --tool-call-parser hermes --chat-template /root/zhangyitao/Qwen3-0_6B/qwen3_nonthinking.jinja
dataset_converter
shell
SharegptDatasetConverter(dataset_attr=glaive_toolcall_zh_demo.json, data_args=DataArguments(template='qwen', dataset=['glaive_toolcall_zh_demo'], eval_dataset=None, dataset_dir='data', media_dir='data', cutoff_len=5000, train_on_prompt=False, mask_history=False, streaming=False, buffer_size=16384, mix_strategy='concat', interleave_probs=None, overwrite_cache=True, preprocessing_batch_size=1000, preprocessing_num_workers=32, max_samples=None, eval_num_beams=None, ignore_pad_token_for_loss=True, val_size=0.0, eval_on_each_dataset=False, packing=False, neat_packing=False, tool_format=None, default_system=None, enable_thinking=True, tokenized_path=None, data_shared_file_system=False))
data_args
shell
DataArguments(template='qwen', dataset=['glaive_toolcall_zh_demo'], eval_dataset=None, dataset_dir='data', media_dir='data', cutoff_len=5000, train_on_prompt=False, mask_history=False, streaming=False, buffer_size=16384, mix_strategy='concat', interleave_probs=None, overwrite_cache=True, preprocessing_batch_size=1000, preprocessing_num_workers=32, max_samples=None, eval_num_beams=None, ignore_pad_token_for_loss=True, val_size=0.0, eval_on_each_dataset=False, packing=False, neat_packing=False, tool_format=None, default_system=None, enable_thinking=True, tokenized_path=None, data_shared_file_system=False)
数据格式转换,最后有一个align_dataset,实际就是把dataset格式转换成指定的promt格式,map实现的
shell
#转换前:
{'conversations': [{'from': 'human', 'value': '我需要为John Doe生成一张发票。他购买了2个苹果,每个$1,以及3根香蕉,每根$0.5。'}, {'from': 'function_call', 'value': '{"name": "generate_invoice", "arguments": {"customer_name": "约翰·多伊", "items": [{"name": "苹果", "quantity": 2, "price": 1}, {"name": "香蕉", "quantity": 3, "price": 0.5}]}}'}, {'from': 'observation', 'value': '{"invoice_id": "INV12345", "customer_name": "约翰·多伊", "items": [{"name": "苹果", "quantity": 2, "price": 1, "total": 2}, {"name": "香蕉", "quantity": 3, "price": 0.5, "total": 1.5}], "total": 3.5, "status": "生成"}'}, {'from': 'gpt', 'value': '发票已成功生成。发票编号为INV12345。约翰·多伊的总金额为$3.5。发票包含2个苹果,总金额为$2,以及3根香蕉,总金额为$1.5。'}], 'tools': '[{"name": "generate_invoice", "description": "生成发票", "parameters": {"type": "object", "properties": {"customer_name": {"type": "string", "description": "客户名称"}, "items": {"type": "array", "items": {"type": "object", "properties": {"name": {"type": "string", "description": "The item name"}, "quantity": {"type": "integer", "description": "The quantity of the item"}, "price": {"type": "number", "description": "The price per unit"}}, "required": ["name", "quantity", "price"]}}}, "required": ["customer_name", "items"]}}, {"name": "generate_password", "description": "生成随机密码", "parameters": {"type": "object", "properties": {"length": {"type": "integer", "description": "密码的长度"}}, "required": ["length"]}}]'}
##转换后:
{'_prompt': [{'content': '我需要为John Doe生成一张发票。他购买了2个苹果,每个$1,以及3根香蕉,每根$0.5。', 'role': 'user'}, {'content': '{"name": "generate_invoice", "arguments": {"customer_name": "约翰·多伊", "items": [{"name": "苹果", "quantity": 2, "price": 1}, {"name": "香蕉", "quantity": 3, "price": 0.5}]}}', 'role': 'function'}, {'content': '{"invoice_id": "INV12345", "customer_name": "约翰·多伊", "items": [{"name": "苹果", "quantity": 2, "price": 1, "total": 2}, {"name": "香蕉", "quantity": 3, "price": 0.5, "total": 1.5}], "total": 3.5, "status": "生成"}', 'role': 'observation'}], '_response': [{'content': '发票已成功生成。发票编号为INV12345。约翰·多伊的总金额为$3.5。发票包含2个苹果,总金额为$2,以及3根香蕉,总金额为$1.5。', 'role': 'assistant'}], '_system': '', '_tools': '[{"name": "generate_invoice", "description": "生成发票", "parameters": {"type": "object", "properties": {"customer_name": {"type": "string", "description": "客户名称"}, "items": {"type": "array", "items": {"type": "object", "properties": {"name": {"type": "string", "description": "The item name"}, "quantity": {"type": "integer", "description": "The quantity of the item"}, "price": {"type": "number", "description": "The price per unit"}}, "required": ["name", "quantity", "price"]}}}, "required": ["customer_name", "items"]}}, {"name": "generate_password", "description": "生成随机密码", "parameters": {"type": "object", "properties": {"length": {"type": "integer", "description": "密码的长度"}}, "required": ["length"]}}]', '_images': None, '_videos': None, '_audios': None}
# 格式化打印,可以发现每个prompt组成其实就是历史对话,依次是user function observa
key = _prompt,value = [{'content': '我需要为John Doe生成一张发票。他购买了2个苹果,每个$1,以及3根香蕉,每根$0.5。', 'role': 'user'}, {'content': '{"name": "generate_invoice", "arguments": {"customer_name": "约翰·多伊", "items": [{"name": "苹果", "quantity": 2, "price": 1}, {"name": "香蕉", "quantity": 3, "price": 0.5}]}}', 'role': 'function'}, {'content': '{"invoice_id": "INV12345", "customer_name": "约翰·多伊", "items": [{"name": "苹果", "quantity": 2, "price": 1, "total": 2}, {"name": "香蕉", "quantity": 3, "price": 0.5, "total": 1.5}], "total": 3.5, "status": "生成"}', 'role': 'observation'}]
key = _response,value = [{'content': '发票已成功生成。发票编号为INV12345。约翰·多伊的总金额为$3.5。发票包含2个苹果,总金额为$2,以及3根香蕉,总金额为$1.5。', 'role': 'assistant'}]
key = _system,value =
key = _tools,value = [{"name": "generate_invoice", "description": "生成发票", "parameters": {"type": "object", "properties": {"customer_name": {"type": "string", "description": "客户名称"}, "items": {"type": "array", "items": {"type": "object", "properties": {"name": {"type": "string", "description": "The item name"}, "quantity": {"type": "integer", "description": "The quantity of the item"}, "price": {"type": "number", "description": "The price per unit"}}, "required": ["name", "quantity", "price"]}}}, "required": ["customer_name", "items"]}}, {"name": "generate_password", "description": "生成随机密码", "parameters": {"type": "object", "properties": {"length": {"type": "integer", "description": "密码的长度"}}, "required": ["length"]}}]
key = _images,value = None
key = _videos,value = None
key = _audios,value = None
第二个例子
shell
'<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n<|im_start|>user\n假设你有一个需要随机数的Java程序,范围在0到10之间。你可以使用什么代码片段来生成这样的数字?\n\n下面是一个示例代码片段,它使用Math.random()方法来生成一个在0到1之间的随机数,然后缩放并四舍五入结果,得到一个在0到10之间的随机数:\n\n```java\nint randomNumber = (int) (Math.random() * 11);\n```\n\n在Java中是否有更有效的方法来生成随机数?你会如何修改代码以生成具有不同范围或分布的随机数?<|im_end|>\n<|im_start|>assistant\n您提供的代码片段是Java中生成随机数的一种常见且高效的方法。然而,如果您需要对随机数生成有更多的控制,您可以使用java.util.Random类,它提供了更多生成随机数的选项。\n\n要生成一个在不同范围内的随机数,您可以修改代码以调整缩放因子。例如,要生成一个在1到10之间的随机数,您可以使用以下代码:\n\nint randomNumber = (int) (Math.random() * 10) + 1;\n\n要生成具有不同分布的随机数,您可以使用各种技术,如正态分布、指数分布或自定义分布。生成正态分布的一种方法是使用Box-Muller变换。这里有一个代码示例,它生成具有正态分布的随机数:\n\nRandom random = new Random();\ndouble u1 = random.nextDouble();\ndouble u2 = random.nextDouble();\ndouble z = Math.sqrt(-2 * Math.log(u1)) * Math.cos(2 * Math.PI * u2);\ndouble mean = 5;\ndouble stdDev = 2;\nint randomNumber = (int) (z * stdDev + mean);\n\n这段代码生成一个具有均值5和标准差2的正态分布随机数。您可以调整均值和标准差来生成具有不同分布的随机数。<|im_end|>\n<|im_start|>user\n你能解释一下算法分析中的大O记法概念吗?<|im_end|>\n<|im_start|>assistant\n是的,大O表示法是一种描述算法性能随输入数据大小增加的方法。它用于估计算法在最坏情况下的时间复杂度或空间复杂度。大O中的"O"代表"数量级",这意味着随着输入大小的增加,算法所需的操作数以一定的速率增长。\n\n例如,如果我们有一个需要n步才能解决大小为n的问题的算法,那么时间复杂度可以表示为O(n),这意味着操作数随着输入大小线性增长。如果我们有一个需要n^2步才能解决大小为n的问题的算法,那么时间复杂度可以表示为O(n^2),这意味着操作数随着输入大小二次方增长。\n\n还有其他几种大O表示法,如O(log n)、O(n log n)、O(2^n)等,它们代表了随着输入大小增加的不同操作数增长速率。通过分析算法的大O表示法,我们可以比较不同算法的性能,并选择给定问题最有效的算法。\n\n您是否有关于大O表示法或算法分析的特定问题?<|im_end|>\n'