通过伪造NPU设备,让AscendSpeed在没有安装torch_npu的环境中跑起来
背景: 我想在GPU上运行AscendSpeed框架,因为没有torch_npu、deepspeed_npu,又不想一个个注释掉
方法:
- 1.本文本通过创建一个FakeDevice 类来伪造 NPU(Neural Processing Unit)的行为。
- 2.它将伪造的NPU接口注入到sys.modules,使得在没有实际NPU硬件的情况下,可以模拟NPU相关操作。
- 3.这在开发和测试代码时特别有用,即使没有实际的NPU硬件环境,也可以模拟NPU调用。
代码
python
import sys
import torch
class FakeDevice(object):
def __init__(self, name=""):
self.name = name
def __getattr__(self, item):
return FakeDevice(f"{self.name}.{item}")
def __call__(self, *args, **kwargs):
print(f"run fake: {self.name}")
return 0
# 实例化设备
torch.npu = FakeDevice("torch.npu")
fake_torch_npu = FakeDevice("torch_npu")
fake_deepspeed_npu = FakeDevice("deepspeed_npu")
# 更新sys.modules
sys.modules.update({
"torch.npu": torch.npu,
"torch.npu.contrib": torch.npu.contrib,
"torch_npu": fake_torch_npu,
"torch_npu.utils": fake_torch_npu.utils,
"torch_npu.contrib": fake_torch_npu.contrib,
"torch_npu.testing": fake_torch_npu.testing,
"torch_npu.testing.testcase": fake_torch_npu.testing.testcase,
"deepspeed_npu": fake_deepspeed_npu
})
import torch.npu
import torch_npu
from torch_npu.utils import cpp_extension
from torch_npu.contrib import transfer_to_npu
from torch_npu.testing.testcase import TestCase, run_tests
import deepspeed_npu
torch_npu.npu_clear_float_status(1)
torch_npu.npu_get_float_status(1)
torch_npu.npu_apply_adam_w(1)
torch_npu.fast_gelu(1 + 1)
torch_npu.npu_scaled_masked_softmax(1, 1, 1, False)
device = torch.npu.current_device()
torch.npu.synchronize()
torch.npu.set_compile_mode(jit_compile=True)
输出
bash
run fake: torch_npu.npu_clear_float_status
run fake: torch_npu.npu_get_float_status
run fake: torch_npu.npu_apply_adam_w
run fake: torch_npu.fast_gelu
run fake: torch_npu.npu_scaled_masked_softmax
run fake: torch.npu.current_device
run fake: torch.npu.synchronize
run fake: torch.npu.set_compile_mode