1、下载镜像
docker pull docker.1ms.run/vllm/vllm-openai-rocm:v0.18.0
2、自定义docker镜像
FROM docker.1ms.run/vllm/vllm-openai-rocm:v0.18.0
RUN pip install modelscope>=1.18.1
docker build -t my-vllm-with-modelscope:v0.18.0 .
3、运行
docker run --rm \
--name vllm_qwen35 \
--device=/dev/kfd --device=/dev/dri \
--security-opt seccomp=unconfined \
--group-add video \
--ipc=host \
-p 8000:8000 \
-v /home/vllm:/root/.cache/modelscope \
-e VLLM_USE_MODELSCOPE=True \
my-vllm-with-modelscope:nightly \
--model tclf90/Qwen3.5-9B-AWQ \
--language-model-only \
--tensor-parallel-size 1 \
--reasoning-parser qwen3 \
--enable-prefix-caching \
--max-model-len 32768