概述
本文旨在研究 docker 挂载gpu以及对应gpu的监控方式
nvidia
全部挂载
json
docker run -it \
--pid=host \
--gpus all \
--net=host \
--runtime=nvidia \
--name {container_name} \
{image}
测试
随意找一个镜像测试
shell
docker run -it \
--pid=host \
--gpus all \
--net=host \
--runtime=nvidia \
--name test12 \
nvcr.io/nvidia/pytorch:23.10-py3
执行 inspect
docker inspect test12
json
[
{
"Id": "45e8ad500d1549cbc0cc0818382de228cc0df2f628ef127b4e1ad5ccf18a8540",
"Created": "2025-03-11T05:46:57.712488757Z",
"Path": "/opt/nvidia/nvidia_entrypoint.sh",
"Args": [],
"State": {
"Status": "running",
"Running": true,
"Paused": false,
"Restarting": false,
"OOMKilled": false,
"Dead": false,
"Pid": 3197492,
"ExitCode": 0,
"Error": "",
"StartedAt": "2025-03-11T05:46:58.139557674Z",
"FinishedAt": "0001-01-01T00:00:00Z"
},
"Image": "sha256:d6779f3e7f3ff94074d022ba1ba182c9e0727edb1c5d4dc514d3dc17251b8b1a",
"ResolvConfPath": "/var/lib/docker/containers/45e8ad500d1549cbc0cc0818382de228cc0df2f628ef127b4e1ad5ccf18a8540/resolv.conf",
"HostnamePath": "/var/lib/docker/containers/45e8ad500d1549cbc0cc0818382de228cc0df2f628ef127b4e1ad5ccf18a8540/hostname",
"HostsPath": "/var/lib/docker/containers/45e8ad500d1549cbc0cc0818382de228cc0df2f628ef127b4e1ad5ccf18a8540/hosts",
"LogPath": "/var/lib/docker/containers/45e8ad500d1549cbc0cc0818382de228cc0df2f628ef127b4e1ad5ccf18a8540/45e8ad500d1549cbc0cc0818382de228cc0df2f628ef127b4e1ad5ccf18a8540-json.log",
"Name": "/test12",
"RestartCount": 0,
"Driver": "overlay2",
"Platform": "linux",
"MountLabel": "",
"ProcessLabel": "",
"AppArmorProfile": "",
"ExecIDs": null,
"HostConfig": {
"Binds": null,
"ContainerIDFile": "",
"LogConfig": {
"Type": "json-file",
"Config": {
"max-size": "100m"
}
},
"NetworkMode": "host",
"PortBindings": {},
"RestartPolicy": {
"Name": "no",
"MaximumRetryCount": 0
},
"AutoRemove": false,
"VolumeDriver": "",
"VolumesFrom": null,
"CapAdd": null,
"CapDrop": null,
"CgroupnsMode": "host",
"Dns": [],
"DnsOptions": [],
"DnsSearch": [],
"ExtraHosts": null,
"GroupAdd": null,
"IpcMode": "private",
"Cgroup": "",
"Links": null,
"OomScoreAdj": 0,
"PidMode": "host",
"Privileged": false,
"PublishAllPorts": false,
"ReadonlyRootfs": false,
"SecurityOpt": [
"label=disable"
],
"UTSMode": "",
"UsernsMode": "",
"ShmSize": 67108864,
"Runtime": "nvidia",
"ConsoleSize": [
0,
0
],
"Isolation": "",
"CpuShares": 0,
"Memory": 0,
"NanoCpus": 0,
"CgroupParent": "",
"BlkioWeight": 0,
"BlkioWeightDevice": [],
"BlkioDeviceReadBps": null,
"BlkioDeviceWriteBps": null,
"BlkioDeviceReadIOps": null,
"BlkioDeviceWriteIOps": null,
"CpuPeriod": 0,
"CpuQuota": 0,
"CpuRealtimePeriod": 0,
"CpuRealtimeRuntime": 0,
"CpusetCpus": "",
"CpusetMems": "",
"Devices": [],
"DeviceCgroupRules": null,
"DeviceRequests": [
{
"Driver": "",
"Count": -1,
"DeviceIDs": null,
"Capabilities": [
[
"gpu"
]
],
"Options": {}
}
],
"KernelMemory": 0,
"KernelMemoryTCP": 0,
"MemoryReservation": 0,
"MemorySwap": 0,
"MemorySwappiness": null,
"OomKillDisable": false,
"PidsLimit": null,
"Ulimits": null,
"CpuCount": 0,
"CpuPercent": 0,
"IOMaximumIOps": 0,
"IOMaximumBandwidth": 0,
"MaskedPaths": [
"/proc/asound",
"/proc/acpi",
"/proc/kcore",
"/proc/keys",
"/proc/latency_stats",
"/proc/timer_list",
"/proc/timer_stats",
"/proc/sched_debug",
"/proc/scsi",
"/sys/firmware"
],
"ReadonlyPaths": [
"/proc/bus",
"/proc/fs",
"/proc/irq",
"/proc/sys",
"/proc/sysrq-trigger"
]
},
"GraphDriver": {
"Data": {
"LowerDir": "/var/lib/docker/overlay2/07d5caa66e6366371f0eeaf269c96e18e77e907576cb3c722fdca8bf02c7d5c1-init/diff:/var/lib/docker/overlay2/64d25d17b5c23171848fd097c73b56d697b9c65a573c462ecf8086e18ef98fbc/diff:/var/lib/docker/overlay2/8f5106a7c51fddf2a79439555f5f6b19212440b01fefddd9294abe504e1c5b40/diff:/var/lib/docker/overlay2/bd4b2a63b0a7afc5f75de408e10c74c9e15f42f77afa62b774f25de413c4828c/diff:/var/lib/docker/overlay2/dac1e84bf26868b74abb435849b0cc244969f41cb59676dd0024682d5167f891/diff:/var/lib/docker/overlay2/c8c19520caca1e8cf2db164081735d3af9e2adfd3b49c17a2d4a0c041fba953d/diff:/var/lib/docker/overlay2/8ccc09746ee30445d32c409a1c87143976daf93775d44ada037447b92c3eed72/diff:/var/lib/docker/overlay2/8755106c41aac903a214d69c80af288afc1d1d6b3d49c25af65db5f2019ca249/diff:/var/lib/docker/overlay2/2f267c46a768db582f20879ee3544b21f23d8de6b25a568ced4e1ecd608d26d3/diff:/var/lib/docker/overlay2/c8905beeb4e21b33cfa7f60ca4a50316fffdd9cef24ef69b84333e5447099186/diff:/var/lib/docker/overlay2/47ad4c10ad21762489de18729fe57a86ad8c90d3e3c707553e2538cddd9c154d/diff:/var/lib/docker/overlay2/e00868e9c8fb61b6fe0fd3ae593523aaea4ea06f3c57d517ee1b30e58d272d39/diff:/var/lib/docker/overlay2/db17358a619b78dbfbb3d824fa124a85d5be40499a2de9b5b82629ab46ef4e7f/diff:/var/lib/docker/overlay2/11d4cea65202cca6967333134fe2645544db79c57d98622dc3c1328bf093c383/diff:/var/lib/docker/overlay2/4abb837395a5ed265c79a752eb89cc798e5350307d025add3cc7b905aa06fb81/diff:/var/lib/docker/overlay2/02eab8bc79907617dbcbb401e733f46dda4fe07991bf4f971a32bc943502ba22/diff:/var/lib/docker/overlay2/277eb0df021711b355bd589d054cb36acd79e3d2ce7d5f20013f875bb04729e4/diff:/var/lib/docker/overlay2/82b5aec1c0bcaaccc7d4b1113cd8bad420bbc7fe3384542f3e80fe571f8103a6/diff:/var/lib/docker/overlay2/b04a5d89c997725e65ef17a2bbd5e3dae9170e5f20e40123a2303e238c2c3b35/diff:/var/lib/docker/overlay2/74f90462669aee42207483fcae81b25a2400d4b473474f0c70815a8dddaba846/diff:/var/lib/docker/overlay2/42a7a8f352f4bebd547da1778616ce493a5e1c4eb06d40c88435db91d2f572c8/diff:/var/lib/docker/overlay2/9d2513ee8eacae107e7a10fe8b9282d9c861f291bd94c06e2961303ca680d8e7/diff:/var/lib/docker/overlay2/bb73e32a967896c8425472d5ad179520f92a62b4b3dd30f22d9fd2d9029feb0a/diff:/var/lib/docker/overlay2/7024feda63298dc19ac3e0a0a5ec3e57231c9a0b28f9998876db0beafa936714/diff:/var/lib/docker/overlay2/2e7f3efa18f61f190adeb0cb2a3a23e7ae15be2b10105c4d1ba28cf5348494a8/diff:/var/lib/docker/overlay2/c9fd367042a1ea51aef628cd67442ff13e750c45b41a448cb96a8b786a23093d/diff:/var/lib/docker/overlay2/06ae74ce35c8df2c4b92a1c293c66c3dd79c4acb205f40cae83b5bd61a866e9e/diff:/var/lib/docker/overlay2/4579be1918b893594b3178a2edfd7ca57becdb4eb81cea43a2591a2b2ff7e682/diff:/var/lib/docker/overlay2/44a1daf401223e9e80d9c27ee40cdc85f9e90f7bbfdb5066f1daa4d96e405a17/diff:/var/lib/docker/overlay2/e644f95c4c54a213f9af15a03cfab1a8c673de98f95023fd509ed2c7a879b64e/diff:/var/lib/docker/overlay2/86e565a0ca483a65473250b08c955156b7ae557d4cd660deb1c179d0a1247ee1/diff:/var/lib/docker/overlay2/d42a2918680332319e5c3aab3b591f01910e490f525067e33eb7d57f981ea628/diff:/var/lib/docker/overlay2/8c8c7a4160c306e2a08913ece2be76cd39e484b7eb49521b89b035df1b6a1a5d/diff:/var/lib/docker/overlay2/c44ce04392238fe9a5e1bd81f2759e1081dbf67ce3f723d0700af28068a041d0/diff:/var/lib/docker/overlay2/4263ace7d69e17b449f74450ff5ab58a4462aaf090457bc627200090f6ea5524/diff:/var/lib/docker/overlay2/430fe4b683fc5db8ac0278c81faf509a0b371592ee3fd16103576d1270a36016/diff:/var/lib/docker/overlay2/a4cd15de7460633b379d8c87b7a8ab4f73c5f3394e7f3b245c5d672e3990126f/diff:/var/lib/docker/overlay2/bf08f5ccaf2077103cf57a98dcc4136a07877c3f1841b4b3e0dc6dea783c691e/diff:/var/lib/docker/overlay2/8ec26ffda73a6319f8a984a993689d943320a7a9f55bac65ff8bb40141bfea06/diff:/var/lib/docker/overlay2/bd10c0cf3a06fb0cb0160d7f89c3c1c1fa34d6afa9ee8bdaa111897e63c76b25/diff:/var/lib/docker/overlay2/045d29f11ca30a258f2552fe4308d8a0bf39f5e0787f06df1cb0ba6ee6d37fcf/diff:/var/lib/docker/overlay2/fccebb131478674d0820e89c2310859d6542e36833fff78d25ab90c96c148624/diff:/var/lib/docker/overlay2/4e02d20c8368e2af9d6cd7f8cf14e9b558ac8394ce63b2fc2dc8d8edecce6281/diff:/var/lib/docker/overlay2/1d6d58a7edc87c6ad4ec4d124873d8f8339eecd759a2bc5e57d94c7c7c05ba44/diff:/var/lib/docker/overlay2/e147a7465ac81712b4be86f8f16b2b81e5b75b7bd42d395b8c90aa28db83e364/diff:/var/lib/docker/overlay2/8dc61f2857539fc559c43e69cfbc1770e99d8342245f4092edd60ca2618a7033/diff:/var/lib/docker/overlay2/02985b542b0b4b473792151068243f8ae49778da9317d86378eee88b38628838/diff:/var/lib/docker/overlay2/77146faf1a10b1374af16888444c0cbecbfd2f58d795633e6005530ec27f7e1a/diff:/var/lib/docker/overlay2/b801ef2be93c0c4bda2aaac3e4f637b6deb49664923fe6f6a4c27085629eba90/diff:/var/lib/docker/overlay2/8e605460c4b16351710adb9efd2eee1db057ff558a2807bfdfcd162fca4d556a/diff:/var/lib/docker/overlay2/d4ed2d0c8175a1b66bfcc5ab337cdcd08e985c192abb38c7509336e652d6fea2/diff:/var/lib/docker/overlay2/52edaf653c39fb8481bdbb3986262a48aedea1c2b2a1e99509df0cd6643e3dd8/diff:/var/lib/docker/overlay2/8b92a57c0259be37176211c5c69e0c6adf95faadc3c2e908c49dc60329b59df9/diff",
"MergedDir": "/var/lib/docker/overlay2/07d5caa66e6366371f0eeaf269c96e18e77e907576cb3c722fdca8bf02c7d5c1/merged",
"UpperDir": "/var/lib/docker/overlay2/07d5caa66e6366371f0eeaf269c96e18e77e907576cb3c722fdca8bf02c7d5c1/diff",
"WorkDir": "/var/lib/docker/overlay2/07d5caa66e6366371f0eeaf269c96e18e77e907576cb3c722fdca8bf02c7d5c1/work"
},
"Name": "overlay2"
},
"Mounts": [],
"Config": {
"Hostname": "v02e09045.cloud.sqa.na131",
"Domainname": "",
"User": "",
"AttachStdin": true,
"AttachStdout": true,
"AttachStderr": true,
"ExposedPorts": {
"6006/tcp": {},
"8888/tcp": {}
},
"Tty": true,
"OpenStdin": true,
"StdinOnce": true,
"Env": [
"PATH=/usr/local/lib/python3.10/dist-packages/torch_tensorrt/bin:/usr/local/mpi/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/ucx/bin:/opt/tensorrt/bin",
"CUDA_VERSION=12.2.2.009",
"CUDA_DRIVER_VERSION=535.104.05",
"CUDA_CACHE_DISABLE=1",
"NVIDIA_REQUIRE_JETPACK_HOST_MOUNTS=",
"_CUDA_COMPAT_PATH=/usr/local/cuda/compat",
"ENV=/etc/shinit_v2",
"BASH_ENV=/etc/bash.bashrc",
"SHELL=/bin/bash",
"NVIDIA_REQUIRE_CUDA=cuda>=9.0",
"NCCL_VERSION=2.19.3",
"CUBLAS_VERSION=12.2.5.6",
"CUFFT_VERSION=11.0.8.103",
"CURAND_VERSION=10.3.3.141",
"CUSPARSE_VERSION=12.1.2.141",
"CUSOLVER_VERSION=11.5.2.141",
"CUTENSOR_VERSION=1.7.0.1",
"NPP_VERSION=12.2.1.4",
"NVJPEG_VERSION=12.2.2.4",
"CUDNN_VERSION=8.9.5.29",
"TRT_VERSION=8.6.1.6+cuda12.0.1.011",
"TRTOSS_VERSION=23.10",
"NSIGHT_SYSTEMS_VERSION=2023.3.1.92",
"NSIGHT_COMPUTE_VERSION=2023.2.2.3",
"DALI_VERSION=1.30.0",
"DALI_BUILD=9783408",
"POLYGRAPHY_VERSION=0.49.0",
"TRANSFORMER_ENGINE_VERSION=0.12",
"LD_LIBRARY_PATH=/usr/local/lib/python3.10/dist-packages/torch/lib:/usr/local/lib/python3.10/dist-packages/torch_tensorrt/lib:/usr/local/cuda/compat/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64",
"NVIDIA_VISIBLE_DEVICES=all",
"NVIDIA_DRIVER_CAPABILITIES=compute,utility,video",
"NVIDIA_PRODUCT_NAME=PyTorch",
"GDRCOPY_VERSION=2.3",
"HPCX_VERSION=2.16rc4",
"MOFED_VERSION=5.4-rdmacore39.0",
"OPENUCX_VERSION=1.15.0",
"OPENMPI_VERSION=4.1.5rc2",
"RDMACORE_VERSION=39.0",
"OPAL_PREFIX=/opt/hpcx/ompi",
"OMPI_MCA_coll_hcoll_enable=0",
"LIBRARY_PATH=/usr/local/cuda/lib64/stubs:",
"PYTORCH_BUILD_VERSION=2.1.0a0+32f93b1",
"PYTORCH_VERSION=2.1.0a0+32f93b1",
"PYTORCH_BUILD_NUMBER=0",
"NVIDIA_PYTORCH_VERSION=23.10",
"PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python",
"SETUPTOOLS_USE_DISTUTILS=stdlib",
"OPENBLAS_VERSION=0.3.23",
"PYTHONIOENCODING=utf-8",
"LC_ALL=C.UTF-8",
"PIP_DEFAULT_TIMEOUT=100",
"NVM_DIR=/usr/local/nvm",
"JUPYTER_PORT=8888",
"TENSORBOARD_PORT=6006",
"UCC_CL_BASIC_TLS=^sharp",
"TORCH_CUDA_ARCH_LIST=5.2 6.0 6.1 7.0 7.2 7.5 8.0 8.6 8.7 9.0+PTX",
"PYTORCH_HOME=/opt/pytorch/pytorch",
"CUDA_HOME=/usr/local/cuda",
"TORCH_ALLOW_TF32_CUBLAS_OVERRIDE=1",
"USE_EXPERIMENTAL_CUDNN_V8_API=1",
"COCOAPI_VERSION=2.0+nv0.7.3",
"TORCH_CUDNN_V8_API_ENABLED=1",
"CUDA_MODULE_LOADING=LAZY",
"NVIDIA_BUILD_ID=71422337"
],
"Cmd": null,
"Image": "nvcr.io/nvidia/pytorch:23.10-py3",
"Volumes": null,
"WorkingDir": "/workspace",
"Entrypoint": [
"/opt/nvidia/nvidia_entrypoint.sh"
],
"OnBuild": null,
"Labels": {
"com.nvidia.build.id": "71422337",
"com.nvidia.build.ref": "798008b068e6dbd0088bab08098b0fce963b87b3",
"com.nvidia.cublas.version": "12.2.5.6",
"com.nvidia.cuda.version": "9.0",
"com.nvidia.cudnn.version": "8.9.5.29",
"com.nvidia.cufft.version": "11.0.8.103",
"com.nvidia.curand.version": "10.3.3.141",
"com.nvidia.cusolver.version": "11.5.2.141",
"com.nvidia.cusparse.version": "12.1.2.141",
"com.nvidia.cutensor.version": "1.7.0.1",
"com.nvidia.nccl.version": "2.19.3",
"com.nvidia.npp.version": "12.2.1.4",
"com.nvidia.nsightcompute.version": "2023.2.2.3",
"com.nvidia.nsightsystems.version": "2023.3.1.92",
"com.nvidia.nvjpeg.version": "12.2.2.4",
"com.nvidia.pytorch.version": "2.1.0a0+32f93b1",
"com.nvidia.tensorrt.version": "8.6.1.6+cuda12.0.1.011",
"com.nvidia.tensorrtoss.version": "23.10",
"com.nvidia.volumes.needed": "nvidia_driver",
"org.opencontainers.image.ref.name": "ubuntu",
"org.opencontainers.image.version": "22.04"
}
},
"NetworkSettings": {
"Bridge": "",
"SandboxID": "c86446404df6d834fa63eabca6291170b3a5d43f6e5c9fd3059e511055447296",
"HairpinMode": false,
"LinkLocalIPv6Address": "",
"LinkLocalIPv6PrefixLen": 0,
"Ports": {},
"SandboxKey": "/var/run/docker/netns/default",
"SecondaryIPAddresses": null,
"SecondaryIPv6Addresses": null,
"EndpointID": "",
"Gateway": "",
"GlobalIPv6Address": "",
"GlobalIPv6PrefixLen": 0,
"IPAddress": "",
"IPPrefixLen": 0,
"IPv6Gateway": "",
"MacAddress": "",
"Networks": {
"host": {
"IPAMConfig": null,
"Links": null,
"Aliases": null,
"NetworkID": "cbcf14650dcab62566be305c55d23d6486a24ec5d1c4f2ed6d830545cd78e979",
"EndpointID": "4adeef2913d3b9ed198fecff3a6d5ff67d45c53e7c7a8b34976d381d9c5db138",
"Gateway": "",
"IPAddress": "",
"IPPrefixLen": 0,
"IPv6Gateway": "",
"GlobalIPv6Address": "",
"GlobalIPv6PrefixLen": 0,
"MacAddress": "",
"DriverOpts": null
}
}
}
}
]
可看到
json
"DeviceRequests": [
{
"Driver": "",
"Count": -1,
"DeviceIDs": null,
"Capabilities": [
[
"gpu"
]
],
"Options": {}
}
],
挂载任意两卡
shell
docker run -it \
--pid=host \
--gpus 2 \
--net=host \
--runtime=nvidia \
--name {container_name} \
{image}
测试
shell
docker run -it \
--pid=host \
--gpus 2 \
--net=host \
--runtime=nvidia \
--name test12 \
nvcr.io/nvidia/pytorch:23.10-py3
执行 inspect
docker inspect test12 可找到下面内容
json
"DeviceRequests": [
{
"Driver": "",
"Count": 2,
"DeviceIDs": null,
"Capabilities": [
[
"gpu"
]
],
"Options": {}
}
],
挂载指定两卡(1、2 卡)
ini
docker run -it \
--pid=host \
--gpus [1,2] \
--net=host \
--runtime=nvidia \
--name {container_name} \
{image}
测试
shell
docker run -it \
--pid=host \
--gpus '"device=1,2"'\
--net=host \
--runtime=nvidia \
--name test12 \
nvcr.io/nvidia/pytorch:23.10-py3
docker inpsect
可以看到
json
"DeviceRequests": [
{
"Driver": "",
"Count": 0,
"DeviceIDs": [
"1",
"2"
],
"Capabilities": [
[
"gpu"
]
],
"Options": {}
}
],Ï
amd
特权
json
docker run -it \
--pid=host \
--net=host \
--privileged=true \
--name {container_name} \
{image}
指定卡
json
docker run -it \
--pid=host \
--net=host \
--device /dev/dri/renderD128 \
--name {container_name} \
{image}
测试
shell
docker run -it \
--pid=host \
--net=host \
--device /dev/dri/renderD128 \
--name test1 \
rocm/pytorch-nightly:latest
docker inspect
json
[
{
"Id": "023f9bf810215002076641ed765f278e8a4fe8b14c1f822d5286e68b9fcf04a0",
"Created": "2025-03-11T06:11:12.740009122Z",
"Path": "bash",
"Args": [],
"State": {
"Status": "running",
"Running": true,
"Paused": false,
"Restarting": false,
"OOMKilled": false,
"Dead": false,
"Pid": 454755,
"ExitCode": 0,
"Error": "",
"StartedAt": "2025-03-11T06:11:12.826029911Z",
"FinishedAt": "0001-01-01T00:00:00Z"
},
"Image": "sha256:a318773e0f87b6650b9dfefc96f6cb19427fc3f3e22cfaa529ca822bd9dc19fb",
"ResolvConfPath": "/data2/docker/containers/023f9bf810215002076641ed765f278e8a4fe8b14c1f822d5286e68b9fcf04a0/resolv.conf",
"HostnamePath": "/data2/docker/containers/023f9bf810215002076641ed765f278e8a4fe8b14c1f822d5286e68b9fcf04a0/hostname",
"HostsPath": "/data2/docker/containers/023f9bf810215002076641ed765f278e8a4fe8b14c1f822d5286e68b9fcf04a0/hosts",
"LogPath": "/data2/docker/containers/023f9bf810215002076641ed765f278e8a4fe8b14c1f822d5286e68b9fcf04a0/023f9bf810215002076641ed765f278e8a4fe8b14c1f822d5286e68b9fcf04a0-json.log",
"Name": "/test1",
"RestartCount": 0,
"Driver": "overlay2",
"Platform": "linux",
"MountLabel": "",
"ProcessLabel": "",
"AppArmorProfile": "",
"ExecIDs": null,
"HostConfig": {
"Binds": null,
"ContainerIDFile": "",
"LogConfig": {
"Type": "json-file",
"Config": {}
},
"NetworkMode": "host",
"PortBindings": {},
"RestartPolicy": {
"Name": "no",
"MaximumRetryCount": 0
},
"AutoRemove": false,
"VolumeDriver": "",
"VolumesFrom": null,
"ConsoleSize": [
52,
173
],
"CapAdd": null,
"CapDrop": null,
"CgroupnsMode": "host",
"Dns": [],
"DnsOptions": [],
"DnsSearch": [],
"ExtraHosts": null,
"GroupAdd": null,
"IpcMode": "private",
"Cgroup": "",
"Links": null,
"OomScoreAdj": 0,
"PidMode": "host",
"Privileged": false,
"PublishAllPorts": false,
"ReadonlyRootfs": false,
"SecurityOpt": [
"label=disable"
],
"UTSMode": "",
"UsernsMode": "",
"ShmSize": 67108864,
"Runtime": "runc",
"Isolation": "",
"CpuShares": 0,
"Memory": 0,
"NanoCpus": 0,
"CgroupParent": "",
"BlkioWeight": 0,
"BlkioWeightDevice": [],
"BlkioDeviceReadBps": [],
"BlkioDeviceWriteBps": [],
"BlkioDeviceReadIOps": [],
"BlkioDeviceWriteIOps": [],
"CpuPeriod": 0,
"CpuQuota": 0,
"CpuRealtimePeriod": 0,
"CpuRealtimeRuntime": 0,
"CpusetCpus": "",
"CpusetMems": "",
"Devices": [
{
"PathOnHost": "/dev/dri/renderD128",
"PathInContainer": "/dev/dri/renderD128",
"CgroupPermissions": "rwm"
}
],
"DeviceCgroupRules": null,
"DeviceRequests": null,
"MemoryReservation": 0,
"MemorySwap": 0,
"MemorySwappiness": null,
"OomKillDisable": false,
"PidsLimit": null,
"Ulimits": null,
"CpuCount": 0,
"CpuPercent": 0,
"IOMaximumIOps": 0,
"IOMaximumBandwidth": 0,
"MaskedPaths": [
"/proc/asound",
"/proc/acpi",
"/proc/kcore",
"/proc/keys",
"/proc/latency_stats",
"/proc/timer_list",
"/proc/timer_stats",
"/proc/sched_debug",
"/proc/scsi",
"/sys/firmware",
"/sys/devices/virtual/powercap"
],
"ReadonlyPaths": [
"/proc/bus",
"/proc/fs",
"/proc/irq",
"/proc/sys",
"/proc/sysrq-trigger"
]
},
"GraphDriver": {
"Data": {
"LowerDir": "/data2/docker/overlay2/643286cd46fb6046d18656539c6f27a44288d53233a3e6daf56555c2ff3b0a5a-init/diff:/data2/docker/overlay2/0b0027efc88542f6ec4ed176b6b860b560375f59ce789c50f1ffda55b25dde6f/diff:/data2/docker/overlay2/a3f8e49621db1587e939afc8b52fb40b8f6d64946dc04eef7619f064c99db9ff/diff:/data2/docker/overlay2/bcd80773ac1d0d2e3ef61fb9319ec2f797b1fb06cad8970214ce512f6f29868a/diff:/data2/docker/overlay2/2a68be4daa711d1689e8c3a017a53237b49e3dc3781d4ac4af0f9628bcba2327/diff:/data2/docker/overlay2/097825e94e1e3484474967bc8ddbce7e95f2e7d02541e65ba70d68ceacf1cb94/diff:/data2/docker/overlay2/6527ade89ae0de015975d95e13c3cccb93b422af1c377eb8506fbbd13e759729/diff:/data2/docker/overlay2/0a34b7ae4b651435981256971c3d500e59713a24ac4fcc3a0d5c3d7713455ae5/diff:/data2/docker/overlay2/fa00108cf796397c8e0234ca190e8a4a7fc883fb6548c9f810f6e7efb1eff977/diff:/data2/docker/overlay2/04e2ea81316681bc1dc0681ff2adb6cbc9665e1ec37f5576908bd573be86bc04/diff:/data2/docker/overlay2/eb6f5e3b9c7bd234319c88864fc72b5ce7094318ee848305d50e3f3e63bbe022/diff:/data2/docker/overlay2/b54111c73a24812c1486d788f0881cd3c80e4147db07b80a07e634c21cd71568/diff:/data2/docker/overlay2/3d5f387580197c3e4ea8ec15fe7ecaa3876554d9d58379d7088dd00b62192846/diff:/data2/docker/overlay2/91056a04865510cd794ac7e54040bcfeb0fc43f846b9c2c09821a133141b9172/diff:/data2/docker/overlay2/6c0a466eb8435e5a4f050fb1ae80e335ea66494221beead0a97f30556ae866cf/diff:/data2/docker/overlay2/61abcef1f7517c94bc4586fc7f34fb7989d13dd22d62dd45ecf40c7fbaf1abd4/diff:/data2/docker/overlay2/72be8fea3fca82b3d7f9f81077249195fff96e220dc2a4182aab710144103fc3/diff:/data2/docker/overlay2/1fbd6ba796096197eaa10dbd204ed14276691cfc747af3e789f9cffc779a3028/diff:/data2/docker/overlay2/2f66c19d96511afc54898eeca18338bfe9b1350247aa207cf41d329414e12c58/diff:/data2/docker/overlay2/922c4eee6499982178db141af8d73282ef81a2a6b3ffe56c70755e311daf2570/diff:/data2/docker/overlay2/92a5262cd27a22b6f6aa5d54118f4e2f78cd299f20d275cd520ff0355cc88c95/diff:/data2/docker/overlay2/12432d04cf4cc2e7cd1dfc0942cece7997e53dcf73d66072cfa7f0f8d36522a6/diff:/data2/docker/overlay2/6e7685095b78d395a4e48d90c063c2be9d5fa4841c9e4f161be4f1c75f3f299a/diff:/data2/docker/overlay2/687c35c00b2a6bcb08e6afa51fca2c18edd0599bfcb253f624fc8752e5841f33/diff:/data2/docker/overlay2/0317b330d4c1ab3eb2eee82e464bd96a925fbd91b8e41db3f01176a1526ac619/diff:/data2/docker/overlay2/c808dcc3c7b6a68ae8a5c6ae7f2ea896615d3a2d0e29441a968ae6ae612aac33/diff:/data2/docker/overlay2/66686b534647a674e5769bf76eb03dcf9552f4dfba152160a9d3e2101b1a006d/diff:/data2/docker/overlay2/8f1d410e067b2c18bcf8e682a92ce0b3c02800f84e2222c6b8f7e3621e1e0fa7/diff:/data2/docker/overlay2/86bb4d16485fe5232bcb2edbaf8490f83d196b40d14c7e6f52341d9e751db998/diff:/data2/docker/overlay2/d6dbfd9aeee6cc56fc354dc5e7612ced2afb1c19e6655c3ea1ae928af31f548a/diff:/data2/docker/overlay2/abba997c9b37546953d879a2b14fa388309656b1c7e46b1472b9d24e278e2d02/diff:/data2/docker/overlay2/6701eb577c0ec82d81b132cc1dbf70e55556ff6a85852a0682a672afdbddaf1a/diff:/data2/docker/overlay2/dd22db925bccad2029d8c7d8a00f9e2521b900594b21fe95240b5d6598be18e7/diff:/data2/docker/overlay2/66666f8365d19d755b4845d5ef06f0f7eb2066d1cc953b7d0c600097b54a562e/diff:/data2/docker/overlay2/e23ea54486ef06a9fc5b957c1fac8169916fe1adcbe6f40bdddd3beff1b4d0df/diff:/data2/docker/overlay2/41ee6c10ba1414b9f85d0cb52fdf9d9e611ab8ff3af644703cd5c4bc39f9222e/diff:/data2/docker/overlay2/18b2b426a29ed3ed9252d0d4157b62ea2a826edd7986b493e38d5f45fd86a34a/diff:/data2/docker/overlay2/e6a5910942ef86a23d4d074e94ec4e4180f99e68f4b153673750a3acbecaf73c/diff:/data2/docker/overlay2/132402cf08254a1381e86ed0a8e0415b3bcf58627514273ecf7bd87d13be862c/diff:/data2/docker/overlay2/12d05c178d3d48ce6b34317c5eed013a6c49e55a16bc5821cf5d125f1cf8f124/diff:/data2/docker/overlay2/03c0e151d259fa916e7c934dcbd0355406eff5fef0ff207069b24769ca3bef4d/diff:/data2/docker/overlay2/35561c928d8b054e610191f523c69ebb18f7b9971d94d83614c7daa5f7bb9999/diff:/data2/docker/overlay2/d029bfd9b91492f1dded342b43a011b537fd86ae1eede44f8fbfe26eb95eebbd/diff:/data2/docker/overlay2/9230e7959b6dde952ca6cf780d009fc01a5f104fe5fe3dfc540b6f09609acc6a/diff:/data2/docker/overlay2/921a6d3b272e6aebd6a34f1b8fa9023af7adeea52dbdd8117705a05e9ca6c205/diff:/data2/docker/overlay2/1c999a09c69b84be12f55b53bd498dcd9a64c9d9a1776687be0d10db3be85256/diff:/data2/docker/overlay2/402ee67d5bbceeaaa40d9c15fdcc129d07c2d19fbd924d4806178933e6c67a2b/diff:/data2/docker/overlay2/1a9055787aaca2d2c2c89761c7de8412a1df83466e8c128a4e4e1f75d4bc6b37/diff:/data2/docker/overlay2/ba15618b940db452d6902e34d7b687e10fc02672e2eb100e7f6f9ff5f9cab302/diff:/data2/docker/overlay2/93386b717de67b68e6747b899405c5269e6abb9ea368e90c39b180a91fd342cc/diff:/data2/docker/overlay2/fff622b7570e42f656f43360b67692f7f01decf88b2c16c494e4d46f3df46f10/diff:/data2/docker/overlay2/1caec3da03b6dc4a777f6b020fc854c1b8ad8bc14f0814252c673c3bfaa6b3a7/diff:/data2/docker/overlay2/0349592ee25b867d39d17f082b8a829e9202d9ebc5be3d4d7785e3471156b180/diff:/data2/docker/overlay2/604ccc9ae02a4fb8ebb997dbb9cb0c145e8a1e6688daf417f49429ef9027a357/diff:/data2/docker/overlay2/a7ea38f39b6407b5d9364da9b3b79029cd1e516aeddd1cc12b97c5efe2dc719f/diff:/data2/docker/overlay2/4ab0056959565ae52668e9c453e4241ed7eefb7dc3ef8b3912195bccc8051f72/diff:/data2/docker/overlay2/4128e0be1facbd281abbcdbe83e0dc952bf99d9834035f0b42eb7c46fd8afb8e/diff:/data2/docker/overlay2/40ff59c2d58be700cc1352f964b395f298cebc582b6bc64e3967811b50722315/diff:/data2/docker/overlay2/18487dde5acc2f151c19fa21210f552f52b4cd33c9e82368452f5f807ce15495/diff:/data2/docker/overlay2/c86ba8d6968ed48ea8ce65ef4264e7fc625a35f72a1d5d9a91f5f2d02d503555/diff:/data2/docker/overlay2/3f8ff1e760d92544a3c5c8a40a020a089d6faf1dbe66fd57e0dee6a0198df398/diff:/data2/docker/overlay2/4b373c4a570114f7772644711a7c1eca0aa382286eecd04e6226c0c31baeb683/diff:/data2/docker/overlay2/59a0b66dbfe2aa1fbdc6c989490357f5790519b08b292a92a64393b9441b6f43/diff:/data2/docker/overlay2/aa1f7c05d29d2aa9a635823d52dbae3c47a1e8f6b4c542966d45a881a12d5a65/diff:/data2/docker/overlay2/78e9f40b495e36cec30e97325190778af07549c1aa169c7826be2f42dd37c8c9/diff:/data2/docker/overlay2/ec0ff89cb8a4fb0ac5d553f1e7201ecfd8677ecd8e88c43fd4f43e5d867d559b/diff:/data2/docker/overlay2/8ec20b6116718ebebd9a70a4ae7f179d1ea64bb962d8dba89a5039c1f7695ebb/diff:/data2/docker/overlay2/d6ac13bbf92b53422cb7caa76fd852100cec7bedd84f58828185aa61abc35118/diff",
"MergedDir": "/data2/docker/overlay2/643286cd46fb6046d18656539c6f27a44288d53233a3e6daf56555c2ff3b0a5a/merged",
"UpperDir": "/data2/docker/overlay2/643286cd46fb6046d18656539c6f27a44288d53233a3e6daf56555c2ff3b0a5a/diff",
"WorkDir": "/data2/docker/overlay2/643286cd46fb6046d18656539c6f27a44288d53233a3e6daf56555c2ff3b0a5a/work"
},
"Name": "overlay2"
},
"Mounts": [],
"Config": {
"Hostname": "x08e09287.cloud.sqa.na131",
"Domainname": "",
"User": "root",
"AttachStdin": true,
"AttachStdout": true,
"AttachStderr": true,
"Tty": true,
"OpenStdin": true,
"StdinOnce": true,
"Env": [
"PATH=/opt/cache/bin:/opt/rocm/llvm/bin:/opt/rocm/opencl/bin:/opt/rocm/hip/bin:/opt/rocm/hcc/bin:/opt/rocm/bin:/opt/conda/envs/py_3.10/bin:/opt/conda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"DEBIAN_FRONTEND=noninteractive",
"PYTORCH_ROCM_ARCH=gfx906;gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx942",
"ANACONDA_PYTHON_VERSION=3.10",
"INSTALLED_PROTOBUF=yes",
"INSTALLED_DB=yes",
"INSTALLED_VISION=yes",
"ROCM_PATH=/opt/rocm",
"MAGMA_HOME=/opt/rocm/magma",
"LANG=C.UTF-8",
"LC_ALL=C.UTF-8",
"UCX_COMMIT=",
"UCC_COMMIT=",
"UCX_HOME=/usr",
"UCC_HOME=/usr",
"OPENSSL_ROOT_DIR=/opt/openssl",
"OPENSSL_DIR=/opt/openssl",
"BUILD_ENVIRONMENT=pytorch-linux-focal-rocm6.3-py3.10"
],
"Cmd": [
"bash"
],
"Image": "rocm/pytorch-nightly:latest",
"Volumes": null,
"WorkingDir": "/tmp",
"Entrypoint": null,
"OnBuild": null,
"Labels": {
"org.opencontainers.image.ref.name": "ubuntu",
"org.opencontainers.image.version": "20.04"
}
},
"NetworkSettings": {
"Bridge": "",
"SandboxID": "27d952b1f1e4907a56e430375efcb5651c7f85c788f8723debb66b37f63542ab",
"HairpinMode": false,
"LinkLocalIPv6Address": "",
"LinkLocalIPv6PrefixLen": 0,
"Ports": {},
"SandboxKey": "/var/run/docker/netns/default",
"SecondaryIPAddresses": null,
"SecondaryIPv6Addresses": null,
"EndpointID": "",
"Gateway": "",
"GlobalIPv6Address": "",
"GlobalIPv6PrefixLen": 0,
"IPAddress": "",
"IPPrefixLen": 0,
"IPv6Gateway": "",
"MacAddress": "",
"Networks": {
"host": {
"IPAMConfig": null,
"Links": null,
"Aliases": null,
"NetworkID": "0b305525498473a7b6af7fe3aef8a249703ec8cea31f6ec58d77de6566d95ab7",
"EndpointID": "a76587a40ce055cb910c7b87f81bdaa3e4f257022eb66921357810334db5dd41",
"Gateway": "",
"IPAddress": "",
"IPPrefixLen": 0,
"IPv6Gateway": "",
"GlobalIPv6Address": "",
"GlobalIPv6PrefixLen": 0,
"MacAddress": "",
"DriverOpts": null
}
}
}
}
]
可以看到
json
"Devices": [
{
"PathOnHost": "/dev/kfd",
"PathInContainer": "/dev/kfd",
"CgroupPermissions": "rwm"
},
{
"PathOnHost": "/dev/dri/renderD128",
"PathInContainer": "/dev/dri/renderD128",
"CgroupPermissions": "rwm"
}
],
关联关系可通过 ls -l /dev/dri/by-path/ 对应卡数, 规律为起始为 128 后面都是+8
所有卡
json
docker run -it \
--pid=host \
--net=host \
--device /dev/dri \
--name {container_name} \
{image}
测试
shell
docker run -it \
--pid=host \
--net=host \
--device /dev/dri \
--name test1 \
rocm/pytorch-nightly:latest
docker inspect
json
"Devices": [
{
"PathOnHost": "/dev/dri",
"PathInContainer": "/dev/dri",
"CgroupPermissions": "rwm"
}
],
PPU
特权
json
docker run -it \
--pid=host \
--net=host \
--privileged=true \
--name {container_name} \
{image}
单卡
css
docker run -it \
--pid=host \
--net=host \
--device /dev/alixpu \
--device /dev/alixpu_ctl \
--device /dev/alixpu_ppu1 \
--name {container_name} \
{image}
测试
shell
docker run -it \
--pid=host \
--net=host \
--device /dev/alixpu \
--device /dev/alixpu_ctl \
--device /dev/alixpu_ppu1 \
--name test12 \
hie-allspark-ppu-dev:1.4.1
docker inspect
json
[
{
"Id": "276090cfdacf341281526f27c84ee2c6854efb9449d523cf121f67ea9977b134",
"Created": "2025-03-11T06:03:39.836133855Z",
"Path": "bash",
"Args": [
"/opt/t-head/entrypoint.sh"
],
"State": {
"Status": "running",
"Running": true,
"Paused": false,
"Restarting": false,
"OOMKilled": false,
"Dead": false,
"Pid": 2182103,
"ExitCode": 0,
"Error": "",
"StartedAt": "2025-03-11T06:03:40.33445056Z",
"FinishedAt": "0001-01-01T00:00:00Z"
},
"Image": "sha256:f276e49c084cdd8cb089d548701feb3ec4272866ff18233b1d9fe57dc851612a",
"ResolvConfPath": "/data2/docker/containers/276090cfdacf341281526f27c84ee2c6854efb9449d523cf121f67ea9977b134/resolv.conf",
"HostnamePath": "/data2/docker/containers/276090cfdacf341281526f27c84ee2c6854efb9449d523cf121f67ea9977b134/hostname",
"HostsPath": "/data2/docker/containers/276090cfdacf341281526f27c84ee2c6854efb9449d523cf121f67ea9977b134/hosts",
"LogPath": "/data2/docker/containers/276090cfdacf341281526f27c84ee2c6854efb9449d523cf121f67ea9977b134/276090cfdacf341281526f27c84ee2c6854efb9449d523cf121f67ea9977b134-json.log",
"Name": "/test12",
"RestartCount": 0,
"Driver": "overlay2",
"Platform": "linux",
"MountLabel": "",
"ProcessLabel": "",
"AppArmorProfile": "",
"ExecIDs": null,
"HostConfig": {
"Binds": null,
"ContainerIDFile": "",
"LogConfig": {
"Type": "json-file",
"Config": {
"max-size": "100m"
}
},
"NetworkMode": "host",
"PortBindings": {},
"RestartPolicy": {
"Name": "no",
"MaximumRetryCount": 0
},
"AutoRemove": false,
"VolumeDriver": "",
"VolumesFrom": null,
"ConsoleSize": [
52,
173
],
"CapAdd": null,
"CapDrop": null,
"CgroupnsMode": "host",
"Dns": [],
"DnsOptions": [],
"DnsSearch": [],
"ExtraHosts": null,
"GroupAdd": null,
"IpcMode": "private",
"Cgroup": "",
"Links": null,
"OomScoreAdj": 0,
"PidMode": "host",
"Privileged": false,
"PublishAllPorts": false,
"ReadonlyRootfs": false,
"SecurityOpt": [
"label=disable"
],
"UTSMode": "",
"UsernsMode": "",
"ShmSize": 67108864,
"Runtime": "nvidia",
"Isolation": "",
"CpuShares": 0,
"Memory": 0,
"NanoCpus": 0,
"CgroupParent": "",
"BlkioWeight": 0,
"BlkioWeightDevice": [],
"BlkioDeviceReadBps": [],
"BlkioDeviceWriteBps": [],
"BlkioDeviceReadIOps": [],
"BlkioDeviceWriteIOps": [],
"CpuPeriod": 0,
"CpuQuota": 0,
"CpuRealtimePeriod": 0,
"CpuRealtimeRuntime": 0,
"CpusetCpus": "",
"CpusetMems": "",
"Devices": [
{
"PathOnHost": "/dev/alixpu",
"PathInContainer": "/dev/alixpu",
"CgroupPermissions": "rwm"
},
{
"PathOnHost": "/dev/alixpu_ctl",
"PathInContainer": "/dev/alixpu_ctl",
"CgroupPermissions": "rwm"
},
{
"PathOnHost": "/dev/alixpu_ppu1",
"PathInContainer": "/dev/alixpu_ppu1",
"CgroupPermissions": "rwm"
}
],
"DeviceCgroupRules": null,
"DeviceRequests": null,
"MemoryReservation": 0,
"MemorySwap": 0,
"MemorySwappiness": null,
"OomKillDisable": false,
"PidsLimit": null,
"Ulimits": [],
"CpuCount": 0,
"CpuPercent": 0,
"IOMaximumIOps": 0,
"IOMaximumBandwidth": 0,
"MaskedPaths": [
"/proc/asound",
"/proc/acpi",
"/proc/kcore",
"/proc/keys",
"/proc/latency_stats",
"/proc/timer_list",
"/proc/timer_stats",
"/proc/sched_debug",
"/proc/scsi",
"/sys/firmware",
"/sys/devices/virtual/powercap"
],
"ReadonlyPaths": [
"/proc/bus",
"/proc/fs",
"/proc/irq",
"/proc/sys",
"/proc/sysrq-trigger"
]
},
"GraphDriver": {
"Data": {
"LowerDir": "/data2/docker/overlay2/76abf9921ab566efb04281476499b325cd6cfbf31f8771dd567eb45a8f43d883-init/diff:/data2/docker/overlay2/m2119tuloyqvwz5tbzel8reaa/diff:/data2/docker/overlay2/mf118a4eyxsr15v8icr5mnzxn/diff:/data2/docker/overlay2/sx0z3fhs0id3ofj7b5jnau014/diff:/data2/docker/overlay2/p07uu4abyv6f4jtqvr00nzn5q/diff:/data2/docker/overlay2/qhepj24feh3y1h5pzxwgqkw5y/diff:/data2/docker/overlay2/n2rguu84avc018l58f7oo3qth/diff:/data2/docker/overlay2/qoowyxls38kcaoqjye70sg8qi/diff:/data2/docker/overlay2/lvlhu47g4ogxya4p0oso9ki2y/diff:/data2/docker/overlay2/7oakiawd5vb9t718w68znn1zw/diff:/data2/docker/overlay2/p5rhbo9k6o432xmqmveyt4s6o/diff:/data2/docker/overlay2/hp97y605k163562bg42dchqmu/diff:/data2/docker/overlay2/ocdrd4fypjampxw2vabdyqc6o/diff:/data2/docker/overlay2/ro45zm5u4w55ro7ji1p3cyxha/diff:/data2/docker/overlay2/eqxdwdk1hcmhwfrs2ufwqm833/diff:/data2/docker/overlay2/6qfu0lkqhkj1zjw92e6bnznnj/diff:/data2/docker/overlay2/ijvwbdabffiejt3510vk2bmpx/diff:/data2/docker/overlay2/zbqbydny9s37gwzl11rwoj8mt/diff:/data2/docker/overlay2/anufygs18q16il333gowvxyc5/diff:/data2/docker/overlay2/f1dd70391fc04fa95b2837ff12d3917359abd06a118d8e90c288f1833fde8a5e/diff:/data2/docker/overlay2/10334ab267a90582bf2e95385e9510325081cc35d5ab40919a7474ac885f40cf/diff:/data2/docker/overlay2/950f2af218f5a9b72f5fa5c034ad64ab77ca997173048f64f90260abf8c926e7/diff:/data2/docker/overlay2/a637934162e1d0bfad69b46d524b5e8985b79a062f3be15165a7bca8b62d3b65/diff:/data2/docker/overlay2/c62654dd5237a7a175d464e7b863115ed3d872b107152dc084f3cafa94d1b249/diff:/data2/docker/overlay2/a082fa01e0a0d06049742f503646e875839470576e004cefdd93f11d7ac94de5/diff:/data2/docker/overlay2/e535b56eb1c2c13a8876cce1aadced699dd0d7403fe9acd2520087e5d566a318/diff:/data2/docker/overlay2/741c73b94d6334c41c83fa1caa0be24bed05b6e43459fd887f9cf0521a0d8e4c/diff:/data2/docker/overlay2/735f67e588bff050ea846cf5ad54f1c2d2211a0a6d6412ca927a80bb93c7e038/diff:/data2/docker/overlay2/e2e95ca24be72e150ec7dc64c0e046f9db152cbf5e3c25932ccd641a587b12d4/diff:/data2/docker/overlay2/d76a6ce3921805d490c3b7cdcc99a7edfceb0906bc41c0f6ecd2dfef1b7b7002/diff:/data2/docker/overlay2/c94bb9ce7785aa836877d4f179f214e46681e2bca741e231af274b26dde7a74a/diff:/data2/docker/overlay2/5a457434f467643f7fc804fd08961d0bf8e97b18fbf51e53f1c37d1fe141a288/diff:/data2/docker/overlay2/bd826ce506e1cd6442a86347c64211c021b98eea53963290430e363bd0b945a4/diff:/data2/docker/overlay2/c403913e9ee17c40415105eaef2124909928d909f70ee969b20818366d208d1a/diff:/data2/docker/overlay2/ec9b40f2fd438f193829e11b6eef80bdb7f6d60d6dd0f1dec44b222fb303ae82/diff:/data2/docker/overlay2/d004b7a5102b20185d42f655efe2da5c3a8a16038ed6303bba8116686eaac900/diff:/data2/docker/overlay2/06502c53a6472992ed1e01c69e87c897c00ea3138665d49bf2bb54d0b6404eb5/diff:/data2/docker/overlay2/28c8c69c6d88cd262f76284590940f0bf4432f4def480007c73d68841c1d28d5/diff:/data2/docker/overlay2/3b5402b5378c359dca44d25f686ee2978ca57be2db0869bb4a07b244ed9fc706/diff:/data2/docker/overlay2/879b6f2c4df506a44e879f833b1ff261628ebe832ecfe177b9041a3f6b36a09d/diff:/data2/docker/overlay2/d70c607ab635aa50e6bd1a63ff760eb71dd640c2ca9aaf35d7a110460f6d44d8/diff:/data2/docker/overlay2/37d5ff02a3e8466d188c80feb49eb91133fa5553bbada64c0c480a4a267c35a3/diff:/data2/docker/overlay2/f4f56f28c5f048d1050ccf06ffea18c70e3d668068a885eeafbefdef17cdaf6f/diff:/data2/docker/overlay2/7a7430d1745a26b21f52cb7e88bc229ecfcc0c5193908e3898f15f2d540bf986/diff:/data2/docker/overlay2/a712c9df016be7237124a823d8b2d3c7cfb15b7adfad7ea00e343081b743dbb9/diff:/data2/docker/overlay2/155fd1d75886fa7e362e332b382187737bca8fee406c43556490a6543492a0df/diff:/data2/docker/overlay2/230415a236baf8d8ce5e9d82f209a60489555d56daa539905090b9cb71450411/diff:/data2/docker/overlay2/1c5a18b7f71ef17534a3305f15cd6e9e5947bb8e7fc06ea09d3bc18b214b06df/diff:/data2/docker/overlay2/11b792de44d349a472e0185ce4cf1cea3b579cd99288a051bf8477efe6302e18/diff",
"MergedDir": "/data2/docker/overlay2/76abf9921ab566efb04281476499b325cd6cfbf31f8771dd567eb45a8f43d883/merged",
"UpperDir": "/data2/docker/overlay2/76abf9921ab566efb04281476499b325cd6cfbf31f8771dd567eb45a8f43d883/diff",
"WorkDir": "/data2/docker/overlay2/76abf9921ab566efb04281476499b325cd6cfbf31f8771dd567eb45a8f43d883/work"
},
"Name": "overlay2"
},
"Mounts": [],
"Config": {
"Hostname": "x08l11243.cloud.sqa.na131",
"Domainname": "",
"User": "",
"AttachStdin": true,
"AttachStdout": true,
"AttachStderr": true,
"Tty": true,
"OpenStdin": true,
"StdinOnce": true,
"Env": [
"PATH=/usr/local/PPU_SDK/CUDA_SDK/bin:/usr/local/PPU_SDK/bin:/usr/local/PPU_SDK/asight/bin:/usr/local/PPU_SDK/ppu-smi/bin:/usr/local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"container=docker",
"TZ=Asia/Shanghai",
"LANG=C.UTF-8",
"PIP_DISABLE_PIP_VERSION_CHECK=1",
"PIP_ROOT_USER_ACTION=ignore",
"LD_LIBRARY_PATH=/usr/local/PPU_SDK/CUDA_SDK/lib64:/usr/local/PPU_SDK/lib:/usr/local/lib:/usr/local/lib/:",
"PPU_SDK=/usr/local/PPU_SDK",
"PPU_PATH=/usr/local/PPU_SDK",
"PPU_HOME=/usr/local/PPU_SDK",
"CUDA_SDK=/usr/local/PPU_SDK/CUDA_SDK",
"CUDA_TOOLKIT_ROOT=/usr/local/PPU_SDK/CUDA_SDK",
"CUDA_PATH=/usr/local/PPU_SDK/CUDA_SDK",
"CUDA_HOME=/usr/local/PPU_SDK/CUDA_SDK",
"CUDNN_HOME=/usr/local/PPU_SDK/CUDA_SDK",
"CUDACXX=/usr/local/PPU_SDK/CUDA_SDK/bin/nvcc",
"CUDA_SDK_VER=cuda-12.3",
"LIBRARY_PATH=/usr/local/PPU_SDK/CUDA_SDK/lib64:/usr/local/PPU_SDK/lib:",
"PPU_VERSION=v1.4.1",
"aliDebug=disable INFO,DEBUG,TRACE,WARNING;",
"PIP_INDEX_URL=https://art-pub.eng.t-head.cn/artifactory/api/pypi/ptgai-pypi_ppu_alios_cu123_index/simple/"
],
"Cmd": null,
"Image": "hie-allspark-ppu-dev:1.4.1",
"Volumes": null,
"WorkingDir": "/root/",
"Entrypoint": [
"bash",
"/opt/t-head/entrypoint.sh"
],
"OnBuild": null,
"Labels": {
"io.buildah.version": "1.24.1",
"org.opencontainers.image.authors": "mx01297629@alibaba-inc.com",
"org.opencontainers.image.vendor": "T-Head Semiconductor Co., Ltd"
}
},
"NetworkSettings": {
"Bridge": "",
"SandboxID": "8c87c34d5d9e474f710d3505396b58417eae5649853cc01426b10d6281438fe0",
"SandboxKey": "/var/run/docker/netns/default",
"Ports": {},
"HairpinMode": false,
"LinkLocalIPv6Address": "",
"LinkLocalIPv6PrefixLen": 0,
"SecondaryIPAddresses": null,
"SecondaryIPv6Addresses": null,
"EndpointID": "",
"Gateway": "",
"GlobalIPv6Address": "",
"GlobalIPv6PrefixLen": 0,
"IPAddress": "",
"IPPrefixLen": 0,
"IPv6Gateway": "",
"MacAddress": "",
"Networks": {
"host": {
"IPAMConfig": null,
"Links": null,
"Aliases": null,
"MacAddress": "",
"NetworkID": "7714ab4a4ba62e9d5c789f09245d874990d6ec6cddf2301d0af721a262d5acfa",
"EndpointID": "babc88e834abfaa200edac5f1b5132c6723d054e2a30ba7ba1d48f65992ffc0c",
"Gateway": "",
"IPAddress": "",
"IPPrefixLen": 0,
"IPv6Gateway": "",
"GlobalIPv6Address": "",
"GlobalIPv6PrefixLen": 0,
"DriverOpts": null,
"DNSNames": null
}
}
}
}
]
可以看到
"Devices":
{
"PathOnHost": "/dev/alixpu",
"PathInContainer": "/dev/alixpu",
"CgroupPermissions": "rwm"
},
{
"PathOnHost": "/dev/alixpu_ctl",
"PathInContainer": "/dev/alixpu_ctl",
"CgroupPermissions": "rwm"
},
{
"PathOnHost": "/dev/alixpu_ppu1",
"PathInContainer": "/dev/alixpu_ppu1",
"CgroupPermissions": "rwm"
}
],
ls /dev 能看到下图, 选卡选择对应的节点
总结
nvidia 和 amd、ppu 的挂载方式不一样, nvidia 主要通过hostconfig 的 DeviceRequests, amd和ppu 主要通过 hostconfig 的device