(base) root@huawei:/disk1/models# pwd
/disk1/models
(base) root@huawei:/disk1/models# cat /etc/issue
Ubuntu 20.04 LTS \n \l
(base) root@huawei:/disk1/models# free -g
total used free shared buff/cache available
Mem: 754 8 389 0 356 741
Swap: 7 0 7
(base) root@huawei:/disk1/models# lscpu|grep CPU
CPU op-mode(s): 64-bit
CPU(s): 192
On-line CPU(s) list: 0-191
NUMA node0 CPU(s): 0-23
NUMA node1 CPU(s): 24-47
NUMA node2 CPU(s): 48-71
NUMA node3 CPU(s): 72-95
NUMA node4 CPU(s): 96-119
NUMA node5 CPU(s): 120-143
NUMA node6 CPU(s): 144-167
NUMA node7 CPU(s): 168-191
(base) root@huawei:/disk1/models# df -hT
Filesystem Type Size Used Avail Use% Mounted on
udev devtmpfs 377G 0 377G 0% /dev
tmpfs tmpfs 76G 4.6M 76G 1% /run
/dev/sda2 ext4 439G 159G 258G 39% /
tmpfs tmpfs 378G 4.3M 378G 1% /dev/shm
tmpfs tmpfs 5.0M 0 5.0M 0% /run/lock
tmpfs tmpfs 378G 0 378G 0% /sys/fs/cgroup
/dev/sda1 vfat 511M 3.5M 508M 1% /boot/efi
/dev/loop7 squashfs 49M 49M 0 100% /snap/core18/2848
/dev/loop0 squashfs 69M 69M 0 100% /snap/core22/1720
/dev/loop6 squashfs 100M 100M 0 100% /snap/lxd/31572
/dev/loop2 squashfs 101M 101M 0 100% /snap/lxd/31822
/dev/loop3 squashfs 39M 39M 0 100% /snap/snapd/23546
/dev/loop4 squashfs 69M 69M 0 100% /snap/core22/1752
/dev/loop5 squashfs 49M 49M 0 100% /snap/core18/2857
overlay overlay 439G 159G 258G 39% /var/lib/docker/overlay2/3fb838ad167298740a56ca0038f073f7e3a212a7b4d5e7f295b85bd7130428aa/merged
/dev/loop1 squashfs 39M 39M 0 100% /snap/snapd/23772
/dev/mapper/testvg-testlv ext4 1.5T 226G 1.2T 17% /disk1
overlay overlay 439G 159G 258G 39% /var/lib/docker/overlay2/27007413f47cdafb51bbef36aa09298d95f6f9870d2ba16f3f74dfcbf1d7f5a9/merged
tmpfs tmpfs 76G 0 76G 0% /run/user/0
(base) root@huawei:/disk1/models# npu-smi info
+------------------------------------------------------------------------------------------------+
| npu-smi 23.0.0 Version: 23.0.0 |
+---------------------------+---------------+----------------------------------------------------+
| NPU Name | Health | Power(W) Temp(C) Hugepages-Usage(page)|
| Chip | Bus-Id | AICore(%) Memory-Usage(MB) HBM-Usage(MB) |
+===========================+===============+====================================================+
| 0 910PremiumA | OK | 98.6 75 0 / 0 |
| 0 | 0000:C1:00.0 | 0 1225 / 13553 1 / 32768 |
+===========================+===============+====================================================+
| 1 910PremiumA | OK | 102.6 75 0 / 0 |
| 0 | 0000:81:00.0 | 0 1973 / 15665 1 / 32768 |
+===========================+===============+====================================================+
| 2 910PremiumA | OK | 102.4 75 0 / 0 |
| 0 | 0000:41:00.0 | 0 2237 / 15665 1 / 32768 |
+===========================+===============+====================================================+
| 3 910PremiumA | OK | 100.0 75 0 / 0 |
| 0 | 0000:01:00.0 | 0 2944 / 15567 1 / 32768 |
+===========================+===============+====================================================+
| 4 910PremiumA | OK | 100.4 74 0 / 0 |
| 0 | 0000:C2:00.0 | 0 1415 / 13553 1 / 32768 |
+===========================+===============+====================================================+
| 5 910PremiumA | OK | 104.7 75 0 / 0 |
| 0 | 0000:82:00.0 | 0 1708 / 15665 1 / 32768 |
+===========================+===============+====================================================+
| 6 910PremiumA | OK | 101.1 75 0 / 0 |
| 0 | 0000:42:00.0 | 0 2342 / 15665 0 / 32768 |
+===========================+===============+====================================================+
| 7 910PremiumA | OK | 99.3 75 0 / 0 |
| 0 | 0000:02:00.0 | 0 2898 / 15567 1 / 32768 |
+===========================+===============+====================================================+
+---------------------------+---------------+----------------------------------------------------+
| NPU Chip | Process id | Process name | Process memory(MB) |
+===========================+===============+====================================================+
| No running processes found in NPU 0 |
+===========================+===============+====================================================+
| No running processes found in NPU 1 |
+===========================+===============+====================================================+
| No running processes found in NPU 2 |
+===========================+===============+====================================================+
| No running processes found in NPU 3 |
+===========================+===============+====================================================+
| No running processes found in NPU 4 |
+===========================+===============+====================================================+
| No running processes found in NPU 5 |
+===========================+===============+====================================================+
| No running processes found in NPU 6 |
+===========================+===============+====================================================+
| No running processes found in NPU 7 |
+===========================+===============+====================================================+
(base) root@huawei:/disk1/models# ll /disk1/models
total 220140
drwxrwxrwx 5 root root 4096 Mar 7 07:37 ./
drwxr-xr-x 4 root root 4096 Mar 7 06:11 ../
-rw-r--r-- 1 root root 4807602 Mar 7 01:59 Ascend-mindie-atb-models_1.0.0_linux-aarch64_py310_torch2.1.0-abi0.tar.gz
-rw-r--r-- 1 root root 4944832 Mar 7 01:59 Ascend-mindie-atb-models_1.0.0_linux-aarch64_py310_torch2.1.0-abi1.tar.gz
-rw-r--r-- 1 root root 4813371 Mar 7 01:59 Ascend-mindie-atb-models_1.0.0_linux-aarch64_py310_torch2.3.1-abi0.tar.gz
-rw-r--r-- 1 root root 4734426 Mar 7 01:59 Ascend-mindie-atb-models_1.0.0_linux-aarch64_py310_torch2.3.1-abi1.tar.gz
-rw-r--r-- 1 root root 4808762 Mar 7 01:59 Ascend-mindie-atb-models_1.0.0_linux-aarch64_py311_torch2.1.0-abi0.tar.gz
-rw-r--r-- 1 root root 4945450 Mar 7 01:59 Ascend-mindie-atb-models_1.0.0_linux-aarch64_py311_torch2.1.0-abi1.tar.gz
-rw-r--r-- 1 root root 4813791 Mar 7 01:59 Ascend-mindie-atb-models_1.0.0_linux-aarch64_py311_torch2.3.1-abi0.tar.gz
-rw-r--r-- 1 root root 4734373 Mar 7 01:59 Ascend-mindie-atb-models_1.0.0_linux-aarch64_py311_torch2.3.1-abi1.tar.gz
drwxrwxrwx 3 root root 4096 Mar 6 00:56 deepseek-ai/
-rw------- 1 root root 368 Mar 7 07:36 .msc
drwxrwxrwx 7 root root 4096 Mar 7 07:38 Qwen/
drwxrwxrwx 4 root root 4096 Mar 7 07:36 ._____temp/
-rw-r--r-- 1 root root 84138364 Oct 6 2023 torch-2.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
-rw-r--r-- 1 root root 89791945 Jul 24 2024 torch-2.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
-rw-r--r-- 1 root root 12845038 Mar 7 01:30 torch_npu-2.4.0.post2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
(base) root@huawei:/disk1/models#
运行容器:
docker run -it -d --name mindie-910a-t71 --ipc=host --net=host --shm-size=200g \
--device=/dev/davinci0 \
--device=/dev/davinci1 \
--device=/dev/davinci2 \
--device=/dev/davinci3 \
--device=/dev/davinci4 \
--device=/dev/davinci5 \
--device=/dev/davinci6 \
--device=/dev/davinci7 \
--device=/dev/davinci_manager \
--device=/dev/hisi_hdc \
--device=/dev/devmm_svm \
--entrypoint=bash \
-w /usr/local/Ascend/mindie/latest/mindie-llm/logs \
-v /usr/local/dcmi:/usr/local/dcmi \
-v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \
-v /usr/local/sbin:/usr/local/sbin \
-v /usr/local/Ascend/driver/lib64/common:/usr/local/Ascend/driver/lib64/common \
-v /usr/local/Ascend/driver/lib64/driver:/usr/local/Ascend/driver/lib64/driver \
-v /etc/hccn.conf:/etc/hccn.conf \
-v /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime \
-v /etc/ascend_install.info:/etc/ascend_install.info \
-v /etc/vnpu.cfg:/etc/vnpu.cfg \
-v /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info \
-v /disk1/models:/models \
进入容器测试
docker exec -it mindie-910a-t71 bash
另外一个模型也可运行:
docker run -it -d --name mindie-910a-t65 --ipc=host --net=host --shm-size=200g \
--device=/dev/davinci0 \
--device=/dev/davinci1 \
--device=/dev/davinci2 \
--device=/dev/davinci3 \
--device=/dev/davinci4 \
--device=/dev/davinci5 \
--device=/dev/davinci6 \
--device=/dev/davinci7 \
--device=/dev/davinci_manager \
--device=/dev/hisi_hdc \
--device=/dev/devmm_svm \
--entrypoint=bash \
-w /usr/local/Ascend/mindie/latest/mindie-llm/logs \
-v /usr/local/dcmi:/usr/local/dcmi \
-v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \
-v /usr/local/sbin:/usr/local/sbin \
-v /usr/local/Ascend/driver/lib64/common:/usr/local/Ascend/driver/lib64/common \
-v /usr/local/Ascend/driver/lib64/driver:/usr/local/Ascend/driver/lib64/driver \
-v /etc/hccn.conf:/etc/hccn.conf \
-v /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime \
-v /etc/ascend_install.info:/etc/ascend_install.info \
-v /etc/vnpu.cfg:/etc/vnpu.cfg \
-v /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info \
-v /disk1/models:/models \
swr.cn-central-221.ovaijisuan.com/wh-aicc-fae/mindie:910a-ascend_23.0.0-cann_8.0.rc3-py_3.10-ubuntu_22.04-aarch64-mindie_1.0.t65
docker exec -it mindie-910a-t65 bash
torchrun --nproc_per_node 2 --master_port 20030 -m examples.run_pa --model_path /models/Qwen/Qwen2___5-7B-Instruct --input_texts "你好,请介绍一下武汉" --max_batch_size 2

测试结果:
1.运行Qwen2.5-7B-Instruct正常:
(Python310) root@huawei:/usr/local/Ascend/atb-models# torchrun --nproc_per_node 2 --master_port 20030 -m examples.run_pa --model_path /models/Qwen/Qwen2___5-7B-Instruct --input_texts "你好,请介绍一下武汉" --max_batch_size 2
2025-03-07 16:32:36,351\] torch.distributed.run: \[WARNING
2025-03-07 16:32:36,351\] torch.distributed.run: \[WARNING\] \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*  
\[2025-03-07 16:32:36,351\] torch.distributed.run: \[WARNING\] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.   
\[2025-03-07 16:32:36,351\] torch.distributed.run: \[WARNING\] \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*  
\[2025-03-07 16:32:46,307\] \[22204\] \[281473125748752\] \[llm\] \[INFO\]\[logging.py-227\] : Skip binding cpu.  
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.  
\[2025-03-07 16:32:46,855\] \[22204\] \[281473125748752\] \[llm\] \[INFO\]\[logging.py-227\] : model_runner.quantize: None, model_runner.kv_quant_type: None, model_runner.fa_quant_type: None, model_runner.dtype: torch.float16  
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.  
\[2025-03-07 16:32:54,824\] \[22204\] \[281473125748752\] \[llm\] \[INFO\]\[dist.py-81\] : initialize_distributed has been Set  
\[2025-03-07 16:32:54,826\] \[22204\] \[281473125748752\] \[llm\] \[INFO\]\[logging.py-227\] : init tokenizer done: Qwen2TokenizerFast(name_or_path='/models/Qwen/Qwen2___5-7B-Instruct', vocab_size=151643, model_max_length=131072, is_fast=True, padding_side='left', truncation_side='right', special_tokens={'eos_token': '\<\|im_end\|\>', 'pad_token': '\<\|endoftext\|\>', 'additional_special_tokens': \['\<\|im_start\|\>', '\<\|im_end\|\>', '\<\|object_ref_start\|\>', '\<\|object_ref_end\|\>', '\<\|box_start\|\>', '\<\|box_end\|\>', '\<\|quad_start\|\>', '\<\|quad_end\|\>', '\<\|vision_start\|\>', '\<\|vision_end\|\>', '\<\|vision_pad\|\>', '\<\|image_pad\|\>', '\<\|video_pad\|\>'\]}, clean_up_tokenization_spaces=False), added_tokens_decoder={  
151643: AddedToken("\<\|endoftext\|\>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),  
151644: AddedToken("\<\|im_start\|\>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),  
151645: AddedToken("\<\|im_end\|\>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),  
151646: AddedToken("\<\|object_ref_start\|\>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),  
151647: AddedToken("\<\|object_ref_end\|\>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),  
151648: AddedToken("\<\|box_start\|\>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),  
151649: AddedToken("\<\|box_end\|\>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),  
151650: AddedToken("\<\|quad_start\|\>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),  
151651: AddedToken("\<\|quad_end\|\>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),  
151652: AddedToken("\<\|vision_start\|\>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),  
151653: AddedToken("\<\|vision_end\|\>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),  
151654: AddedToken("\<\|vision_pad\|\>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),  
151655: AddedToken("\<\|image_pad\|\>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),  
151656: AddedToken("\<\|video_pad\|\>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),  
151657: AddedToken("\
2025-03-07 16:36:38,408\] torch.distributed.run: \[WARNING\] \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*  
\[2025-03-07 16:36:38,408\] torch.distributed.run: \[WARNING\] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.   
\[2025-03-07 16:36:38,408\] torch.distributed.run: \[WARNING\] \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*  
\[2025-03-07 16:36:49,200\] \[24163\] \[281473876656144\] \[llm\] \[INFO\]\[logging.py-227\] : Skip binding cpu.  
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.  
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.  
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.  
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.  
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.  
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.  
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.  
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.  
\[2025-03-07 16:36:49,770\] \[24163\] \[281473876656144\] \[llm\] \[INFO\]\[logging.py-227\] : model_runner.quantize: None, model_runner.kv_quant_type: None, model_runner.fa_quant_type: None, model_runner.dtype: torch.float16  
\[2025-03-07 16:36:57,840\] \[24166\] \[281473450606608\] \[llm\] \[INFO\]\[dist.py-81\] : initialize_distributed has been Set  
\[2025-03-07 16:36:58,085\] \[24167\] \[281473718341648\] \[llm\] \[INFO\]\[dist.py-81\] : initialize_distributed has been Set  
\[2025-03-07 16:36:58,084\] \[24170\] \[281472927670288\] \[llm\] \[INFO\]\[dist.py-81\] : initialize_distributed has been Set  
\[2025-03-07 16:36:58,111\] \[24168\] \[281473527169040\] \[llm\] \[INFO\]\[dist.py-81\] : initialize_distributed has been Set  
\[2025-03-07 16:36:58,285\] \[24166\] \[281473450606608\] \[llm\] \[INFO\]\[flash_causal_qwen2.py-122\] : \>\>\>\> qwen_QwenDecoderModel is called.  
\[2025-03-07 16:36:58,472\] \[24167\] \[281473718341648\] \[llm\] \[INFO\]\[flash_causal_qwen2.py-122\] : \>\>\>\> qwen_QwenDecoderModel is called.  
\[2025-03-07 16:36:58,579\] \[24170\] \[281472927670288\] \[llm\] \[INFO\]\[flash_causal_qwen2.py-122\] : \>\>\>\> qwen_QwenDecoderModel is called.  
\[2025-03-07 16:36:58,598\] \[24168\] \[281473527169040\] \[llm\] \[INFO\]\[flash_causal_qwen2.py-122\] : \>\>\>\> qwen_QwenDecoderModel is called.  
\[2025-03-07 16:36:58,637\] \[24164\] \[281473344917520\] \[llm\] \[INFO\]\[dist.py-81\] : initialize_distributed has been Set  
\[2025-03-07 16:36:58,698\] \[24169\] \[281472867508240\] \[llm\] \[INFO\]\[dist.py-81\] : initialize_distributed has been Set  
\[2025-03-07 16:36:58,975\] \[24163\] \[281473876656144\] \[llm\] \[INFO\]\[dist.py-81\] : initialize_distributed has been Set  
\[2025-03-07 16:36:59,001\] \[24163\] \[281473876656144\] \[llm\] \[INFO\]\[logging.py-227\] : init tokenizer done: Qwen2TokenizerFast(name_or_path='/models/Qwen/Qwen2___5-72B-Instruct-GPTQ-Int4', vocab_size=151643, model_max_length=131072, is_fast=True, padding_side='left', truncation_side='right', special_tokens={'eos_token': '\<\|im_end\|\>', 'pad_token': '\<\|endoftext\|\>', 'additional_special_tokens': \['\<\|im_start\|\>', '\<\|im_end\|\>', '\<\|object_ref_start\|\>', '\<\|object_ref_end\|\>', '\<\|box_start\|\>', '\<\|box_end\|\>', '\<\|quad_start\|\>', '\<\|quad_end\|\>', '\<\|vision_start\|\>', '\<\|vision_end\|\>', '\<\|vision_pad\|\>', '\<\|image_pad\|\>', '\<\|video_pad\|\>'\]}, clean_up_tokenization_spaces=False), added_tokens_decoder={  
151643: AddedToken("\<\|endoftext\|\>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),  
151644: AddedToken("\<\|im_start\|\>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),  
151645: AddedToken("\<\|im_end\|\>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),  
151646: AddedToken("\<\|object_ref_start\|\>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),  
151647: AddedToken("\<\|object_ref_end\|\>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),  
151648: AddedToken("\<\|box_start\|\>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),  
151649: AddedToken("\<\|box_end\|\>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),  
151650: AddedToken("\<\|quad_start\|\>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),  
151651: AddedToken("\<\|quad_end\|\>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),  
151652: AddedToken("\<\|vision_start\|\>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),  
151653: AddedToken("\<\|vision_end\|\>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),  
151654: AddedToken("\<\|vision_pad\|\>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),  
151655: AddedToken("\<\|image_pad\|\>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),  
151656: AddedToken("\<\|video_pad\|\>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),  
151657: AddedToken("\
File "/usr/local/Ascend/atb-models/atb_llm/utils/weights.py", line 657, in <listcomp>
w = [self.get_sharded(f"{p}.weight", dim=dim, gqa_size=gqa_size) for p in prefixes]
File "/usr/local/Ascend/atb-models/atb_llm/utils/weights.py", line 331, in get_sharded
slice_ = self._get_slice(tensor_name)
File "/usr/local/Ascend/atb-models/atb_llm/utils/weights.py", line 718, in _get_slice
filename, tensor_name = self.get_filename(tensor_name)
File "/usr/local/Ascend/atb-models/atb_llm/utils/weights.py", line 146, in get_filename
raise AssertionError(f"weight {tensor_name} does not exist")
AssertionError: weight model.layers.0.self_attn.q_proj.weight does not exist
2025-03-07 16:36:59,423\] \[24163\] \[281473876656144\] \[llm\] \[INFO\]\[flash_causal_qwen2.py-122\] : \>\>\>\> qwen_QwenDecoderModel is called.  
Traceback (most recent call last):  
File "/root/miniconda3/envs/Python310/lib/python3.10/runpy.py", line 196, in _run_module_as_main  
return _run_code(code, main_globals, None,  
File "/root/miniconda3/envs/Python310/lib/python3.10/runpy.py", line 86, in _run_code  
exec(code, run_globals)  
File "/usr/local/Ascend/atb-models/examples/run_pa.py", line 500, in \
File "/usr/local/Ascend/atb-models/atb_llm/utils/weights.py", line 657, in <listcomp>
w = [self.get_sharded(f"{p}.weight", dim=dim, gqa_size=gqa_size) for p in prefixes]
File "/usr/local/Ascend/atb-models/atb_llm/utils/weights.py", line 331, in get_sharded
slice_ = self._get_slice(tensor_name)
File "/usr/local/Ascend/atb-models/atb_llm/utils/weights.py", line 718, in _get_slice
filename, tensor_name = self.get_filename(tensor_name)
File "/usr/local/Ascend/atb-models/atb_llm/utils/weights.py", line 146, in get_filename
raise AssertionError(f"weight {tensor_name} does not exist")
AssertionError: weight model.layers.0.self_attn.q_proj.weight does not exist
Traceback (most recent call last):
File "/root/miniconda3/envs/Python310/lib/python3.10/runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/root/miniconda3/envs/Python310/lib/python3.10/runpy.py", line 86, in _run_code
exec(code, run_globals)
File "/usr/local/Ascend/atb-models/examples/run_pa.py", line 500, in <module>
pa_runner = PARunner(**input_dict)
File "/usr/local/Ascend/atb-models/examples/run_pa.py", line 97, in init
self.model.load_weights(**kw_args)
File "/usr/local/Ascend/atb-models/atb_llm/runner/model_runner.py", line 161, in load_weights
self.model = self.model_cls(self.config,
File "/usr/local/Ascend/atb-models/atb_llm/models/qwen2/flash_causal_qwen2.py", line 32, in init
self.transformer = FlashQwenModel(config, weights, model_prefix=model_prefix, lmhead_prefix=lmhead_prefix)
File "/usr/local/Ascend/atb-models/atb_llm/models/qwen2/modeling_qwen2.py", line 407, in init
File "/usr/local/Ascend/atb-models/atb_llm/models/qwen2/modeling_qwen2.py", line 408, in \
File "/usr/local/Ascend/atb-models/atb_llm/utils/weights.py", line 657, in <listcomp>
w = [self.get_sharded(f"{p}.weight", dim=dim, gqa_size=gqa_size) for p in prefixes]
File "/usr/local/Ascend/atb-models/atb_llm/utils/weights.py", line 331, in get_sharded
slice_ = self._get_slice(tensor_name)
File "/usr/local/Ascend/atb-models/atb_llm/utils/weights.py", line 718, in _get_slice
filename, tensor_name = self.get_filename(tensor_name)
File "/usr/local/Ascend/atb-models/atb_llm/utils/weights.py", line 146, in get_filename
raise AssertionError(f"weight {tensor_name} does not exist")
AssertionError: weight model.layers.0.self_attn.q_proj.weight does not exist
2025-03-07 16:36:59,662\] \[24165\] \[281473872347152\] \[llm\] \[INFO\]\[flash_causal_qwen2.py-122\] : \>\>\>\> qwen_QwenDecoderModel is called.  
Traceback (most recent call last):  
File "/root/miniconda3/envs/Python310/lib/python3.10/runpy.py", line 196, in _run_module_as_main  
return _run_code(code, main_globals, None,  
File "/root/miniconda3/envs/Python310/lib/python3.10/runpy.py", line 86, in _run_code  
exec(code, run_globals)  
File "/usr/local/Ascend/atb-models/examples/run_pa.py", line 500, in \
File "/usr/local/Ascend/atb-models/atb_llm/utils/weights.py", line 657, in <listcomp>
w = [self.get_sharded(f"{p}.weight", dim=dim, gqa_size=gqa_size) for p in prefixes]
File "/usr/local/Ascend/atb-models/atb_llm/utils/weights.py", line 331, in get_sharded
slice_ = self._get_slice(tensor_name)
File "/usr/local/Ascend/atb-models/atb_llm/utils/weights.py", line 718, in _get_slice
filename, tensor_name = self.get_filename(tensor_name)
File "/usr/local/Ascend/atb-models/atb_llm/utils/weights.py", line 146, in get_filename
raise AssertionError(f"weight {tensor_name} does not exist")
AssertionError: weight model.layers.0.self_attn.q_proj.weight does not exist
Traceback (most recent call last):
File "/root/miniconda3/envs/Python310/lib/python3.10/runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/root/miniconda3/envs/Python310/lib/python3.10/runpy.py", line 86, in _run_code
exec(code, run_globals)
File "/usr/local/Ascend/atb-models/examples/run_pa.py", line 500, in <module>
pa_runner = PARunner(**input_dict)
File "/usr/local/Ascend/atb-models/examples/run_pa.py", line 97, in init
self.model.load_weights(**kw_args)
File "/usr/local/Ascend/atb-models/atb_llm/runner/model_runner.py", line 161, in load_weights
self.model = self.model_cls(self.config,
File "/usr/local/Ascend/atb-models/atb_llm/models/qwen2/flash_causal_qwen2.py", line 32, in init
self.transformer = FlashQwenModel(config, weights, model_prefix=model_prefix, lmhead_prefix=lmhead_prefix)
File "/usr/local/Ascend/atb-models/atb_llm/models/qwen2/modeling_qwen2.py", line 407, in init
File "/usr/local/Ascend/atb-models/atb_llm/models/qwen2/modeling_qwen2.py", line 408, in \
File "/usr/local/Ascend/atb-models/atb_llm/utils/weights.py", line 657, in <listcomp>
w = [self.get_sharded(f"{p}.weight", dim=dim, gqa_size=gqa_size) for p in prefixes]
File "/usr/local/Ascend/atb-models/atb_llm/utils/weights.py", line 331, in get_sharded
slice_ = self._get_slice(tensor_name)
File "/usr/local/Ascend/atb-models/atb_llm/utils/weights.py", line 718, in _get_slice
filename, tensor_name = self.get_filename(tensor_name)
File "/usr/local/Ascend/atb-models/atb_llm/utils/weights.py", line 146, in get_filename
raise AssertionError(f"weight {tensor_name} does not exist")
AssertionError: weight model.layers.0.self_attn.q_proj.weight does not exist
Traceback (most recent call last):
File "/root/miniconda3/envs/Python310/lib/python3.10/runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/root/miniconda3/envs/Python310/lib/python3.10/runpy.py", line 86, in _run_code
exec(code, run_globals)
File "/usr/local/Ascend/atb-models/examples/run_pa.py", line 500, in <module>
pa_runner = PARunner(**input_dict)
File "/usr/local/Ascend/atb-models/examples/run_pa.py", line 97, in init
self.model.load_weights(**kw_args)
File "/usr/local/Ascend/atb-models/atb_llm/runner/model_runner.py", line 161, in load_weights
self.model = self.model_cls(self.config,
File "/usr/local/Ascend/atb-models/atb_llm/models/qwen2/flash_causal_qwen2.py", line 32, in init
self.transformer = FlashQwenModel(config, weights, model_prefix=model_prefix, lmhead_prefix=lmhead_prefix)
File "/usr/local/Ascend/atb-models/atb_llm/models/qwen2/modeling_qwen2.py", line 407, in init
File "/usr/local/Ascend/atb-models/atb_llm/models/qwen2/modeling_qwen2.py", line 408, in \
File "/usr/local/Ascend/atb-models/atb_llm/utils/weights.py", line 657, in <listcomp>
w = [self.get_sharded(f"{p}.weight", dim=dim, gqa_size=gqa_size) for p in prefixes]
File "/usr/local/Ascend/atb-models/atb_llm/utils/weights.py", line 331, in get_sharded
slice_ = self._get_slice(tensor_name)
File "/usr/local/Ascend/atb-models/atb_llm/utils/weights.py", line 718, in _get_slice
filename, tensor_name = self.get_filename(tensor_name)
File "/usr/local/Ascend/atb-models/atb_llm/utils/weights.py", line 146, in get_filename
raise AssertionError(f"weight {tensor_name} does not exist")
AssertionError: weight model.layers.0.self_attn.q_proj.weight does not exist
Traceback (most recent call last):
File "/root/miniconda3/envs/Python310/lib/python3.10/runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/root/miniconda3/envs/Python310/lib/python3.10/runpy.py", line 86, in _run_code
exec(code, run_globals)
File "/usr/local/Ascend/atb-models/examples/run_pa.py", line 500, in <module>
pa_runner = PARunner(**input_dict)
File "/usr/local/Ascend/atb-models/examples/run_pa.py", line 97, in init
self.model.load_weights(**kw_args)
File "/usr/local/Ascend/atb-models/atb_llm/runner/model_runner.py", line 161, in load_weights
self.model = self.model_cls(self.config,
File "/usr/local/Ascend/atb-models/atb_llm/models/qwen2/flash_causal_qwen2.py", line 32, in init
self.transformer = FlashQwenModel(config, weights, model_prefix=model_prefix, lmhead_prefix=lmhead_prefix)
File "/usr/local/Ascend/atb-models/atb_llm/models/qwen2/modeling_qwen2.py", line 407, in init
File "/usr/local/Ascend/atb-models/atb_llm/models/qwen2/modeling_qwen2.py", line 408, in \
File "/usr/local/Ascend/atb-models/atb_llm/utils/weights.py", line 657, in <listcomp>
w = [self.get_sharded(f"{p}.weight", dim=dim, gqa_size=gqa_size) for p in prefixes]
File "/usr/local/Ascend/atb-models/atb_llm/utils/weights.py", line 331, in get_sharded
slice_ = self._get_slice(tensor_name)
File "/usr/local/Ascend/atb-models/atb_llm/utils/weights.py", line 718, in _get_slice
filename, tensor_name = self.get_filename(tensor_name)
File "/usr/local/Ascend/atb-models/atb_llm/utils/weights.py", line 146, in get_filename
raise AssertionError(f"weight {tensor_name} does not exist")
AssertionError: weight model.layers.0.self_attn.q_proj.weight does not exist
Traceback (most recent call last):
File "/root/miniconda3/envs/Python310/lib/python3.10/runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/root/miniconda3/envs/Python310/lib/python3.10/runpy.py", line 86, in _run_code
exec(code, run_globals)
File "/usr/local/Ascend/atb-models/examples/run_pa.py", line 500, in <module>
pa_runner = PARunner(**input_dict)
File "/usr/local/Ascend/atb-models/examples/run_pa.py", line 97, in init
self.model.load_weights(**kw_args)
File "/usr/local/Ascend/atb-models/atb_llm/runner/model_runner.py", line 161, in load_weights
self.model = self.model_cls(self.config,
File "/usr/local/Ascend/atb-models/atb_llm/models/qwen2/flash_causal_qwen2.py", line 32, in init
self.transformer = FlashQwenModel(config, weights, model_prefix=model_prefix, lmhead_prefix=lmhead_prefix)
File "/usr/local/Ascend/atb-models/atb_llm/models/qwen2/modeling_qwen2.py", line 407, in init
File "/usr/local/Ascend/atb-models/atb_llm/models/qwen2/modeling_qwen2.py", line 408, in \
File "/usr/local/Ascend/atb-models/atb_llm/utils/weights.py", line 657, in <listcomp>
w = [self.get_sharded(f"{p}.weight", dim=dim, gqa_size=gqa_size) for p in prefixes]
File "/usr/local/Ascend/atb-models/atb_llm/utils/weights.py", line 331, in get_sharded
slice_ = self._get_slice(tensor_name)
File "/usr/local/Ascend/atb-models/atb_llm/utils/weights.py", line 718, in _get_slice
filename, tensor_name = self.get_filename(tensor_name)
File "/usr/local/Ascend/atb-models/atb_llm/utils/weights.py", line 146, in get_filename
raise AssertionError(f"weight {tensor_name} does not exist")
AssertionError: weight model.layers.0.self_attn.q_proj.weight does not exist
ERROR\] 2025-03-07-16:37:05 (PID:24166, Device:3, RankID:-1) ERR99999 UNKNOWN application exception  
\[ERROR\] 2025-03-07-16:37:05 (PID:24168, Device:5, RankID:-1) ERR99999 UNKNOWN application exception  
\[ERROR\] 2025-03-07-16:37:05 (PID:24170, Device:7, RankID:-1) ERR99999 UNKNOWN application exception  
\[ERROR\] 2025-03-07-16:37:05 (PID:24167, Device:4, RankID:-1) ERR99999 UNKNOWN application exception  
\[ERROR\] 2025-03-07-16:37:06 (PID:24164, Device:1, RankID:-1) ERR99999 UNKNOWN application exception  
\[ERROR\] 2025-03-07-16:37:06 (PID:24163, Device:0, RankID:-1) ERR99999 UNKNOWN application exception  
\[ERROR\] 2025-03-07-16:37:06 (PID:24169, Device:6, RankID:-1) ERR99999 UNKNOWN application exception  
\[ERROR\] 2025-03-07-16:37:07 (PID:24165, Device:2, RankID:-1) ERR99999 UNKNOWN application exception  
\[2025-03-07 16:37:13,455\] torch.distributed.elastic.multiprocessing.api: \[WARNING\] Sending process 24163 closing signal SIGTERM  
\[2025-03-07 16:37:13,487\] torch.distributed.elastic.multiprocessing.api: \[ERROR\] failed (exitcode: 1) local_rank: 1 (pid: 24164) of binary: /root/miniconda3/envs/Python310/bin/python  
Traceback (most recent call last):  
File "/root/miniconda3/envs/Python310/bin/torchrun", line 8, in \