1 运行 docker 命令报错
root@test:~# docker run --runtime=nvidia -ti -v $(pwd):/workspace -w /workspace -v /nfs:/nfs $@ --privileged -v /var/run/docker.sock:/var/run/docker.sock registry.test.cn/mla/cxx_toolchains:latest
docker: Error response from daemon: Unknown runtime specified nvidia.
See 'docker run --help'.
根据错误提示,查看 nvidia-docker
是否安装
root@test:~# nvidia-docker
nvidia-docker: command not found
root@test:~#
很明显,没有安装
2 执行脚本,安装 nvidia-docker
root@test:~# cat install-nvidia-docker.sh
sudo curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
sudo curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list
sudo apt-get update
sudo apt-get install -y nvidia-docker2
sudo pkill -SIGHUP dockerd
root@test:~#
检查 nvidia-docker
和 nvidia-container-runtim
是否安装成功
root@test:~# which nvidia-docker
/usr/bin/nvidia-docker
root@test:~# which nvidia-container-runtime
/usr/bin/nvidia-container-runtime
root@test:~#
3 编辑 /etc/docker/daemon.json
内容如下
root@test:~# cat /etc/docker/daemon.json
{
"insecure-registries": ["registry.test.cn"],
"max-concurrent-downloads": 10,
"log-driver": "json-file",
"log-level": "warn",
"log-opts": {
"max-size": "10m",
"max-file": "3"
},
"live-restore": true,
"metrics-addr": "0.0.0.0:9323",
"default-runtime": "nvidia",
"experimental": true,
"runtimes": {
"nvidia": {
"path": "/usr/bin/nvidia-container-runtime",
"runtimeArgs": []
}
}
}
root@test:~#
4 重启 docker
root@test:~# systemctl daemon-reload
root@test:~# systemctl restart docker
5 验证
root@test:~# docker run --runtime=nvidia -ti -v $(pwd):/workspace -w /workspace -v /nfs:/nfs $@ --privileged -v /var/run/docker.sock:/var/run/docker.sock registry.test.cn/mla/cxx_toolchains:latest
root@c3a43f4564a8:/workspace#
root@c3a43f4564a8:/workspace# ls
root@c3a43f4564a8:/workspace# pwd
/workspace
root@c3a43f4564a8:/workspace#