dockerfile创建深度学习环境ubuntu
1.新建dockerfile
touch dockerfile
vim dockerfile
ARG CUDA_VERSION
FROM nvidia/cuda:${CUDA_VERSION}-cudnn7-devel-ubuntu16.04
ENV TZ=Asia/Shanghai
apt-get update \
&& ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone \
&& apt-get install tzdata
ENV DEBIAN_FRONTEND=noninteractive
RUN sed -i "s@/archive.ubuntu.com/@/mirrors.tuna.tsinghua.edu.cn/@g" /etc/apt/sources.list \
&& rm -Rf /var/lib/apt/lists/* \
&& apt-get update && apt-get install -y software-properties-common
RUN add-apt-repository ppa:ubuntugis/ppa && \
apt-get update && \
apt-get install -y wget=1.* git=1:2.* python-protobuf=2.* python3-tk=3.* \
jq=1.5* \
build-essential libsqlite3-dev=3.11.* zlib1g-dev=1:1.2.* \
libopencv-dev=2.4.* python-opencv=2.4.* unzip curl && \
apt-get autoremove && apt-get autoclean && apt-get clean
# See https://github.com/mapbox/rasterio/issues/1289
ENV CURL_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt
# Install Python 3.6
RUN wget -q -O ~/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-4.7.12.1-Linux-x86_64.sh && \
chmod +x ~/miniconda.sh && \
~/miniconda.sh -b -p /opt/conda && \
rm ~/miniconda.sh
ENV PATH /opt/conda/bin:$PATH
ENV LD_LIBRARY_PATH /opt/conda/lib/:$LD_LIBRARY_PATH
RUN conda install -y python=3.6
RUN python -m pip install --upgrade pip
CMD ["bash"]
2.创建镜像
docker build -t rv:v1 .
Step 7/57 : RUN add-apt-repository ppa:ubuntugis/ppa && apt-get update && apt-get install -y wget=1.* git=1:2.* python-protobuf=2.* python3-tk=3.* jq=1.5* build-essential libsqlite3-dev=3.11.* zlib1g-dev=1:1.2.* libopencv-dev=2.4.* python-opencv=4.0.* unzip curl && apt-get autoremove && apt-get autoclean && apt-get clean
---> Running in 11b6b89fd18f
Official stable UbuntuGIS packages.
More info: https://launchpad.net/~ubuntugis/+archive/ubuntu/ppa
Error: retrieving gpg key timed out.
The command '/bin/sh -c add-apt-repository ppa:ubuntugis/ppa && apt-get update && apt-get install -y wget=1.* git=1:2.* python-protobuf=2.* python3-tk=3.* jq=1.5* build-essential libsqlite3-dev=3.11.* zlib1g-dev=1:1.2.* libopencv-dev=2.4.* python-opencv=4.0.* unzip curl && apt-get autoremove && apt-get autoclean && apt-get clean' returned a non-zero code: 1
E: Failed to fetch http://ppa.launchpad.net/ubuntugis/ppa/ubuntu/pool/main/p/proj/proj-bin_7.2.1-1~focal0_amd64.deb Unable to connect to ppa.launchpad.net:http: [IP: 91.189.95.85 80]
E: Unable to fetch some archives, maybe run apt-get update or try with --fix-missing?
网络问题 vim /etc/resolv.conf
添加:
nameserver 8.8.8.8
nameserver 223.5.5.5
nameserver 223.6.6.6
tzdata选择时区 交互问题
# 添加环境变量
ENV DEBIAN_FRONTEND=noninteractive
添加tsinghua source
RUN sed -i "s@/archive.ubuntu.com/@/mirrors.tuna.tsinghua.edu.cn/@g" /etc/apt/sources.list \
&& rm -Rf /var/lib/apt/lists/* \
&& apt-get update
Step 14/54 : COPY ./requirements-dev.txt /opt/src/requirements-dev.txt
COPY failed: file not found in build context or excluded by .dockerignore: stat requirements-dev.txt: file does not exist
文件不在当前路径下
3.查看镜像并建立容器
docker images
docker run --name rv01 /bin/bash
docker attach rv01
docker exec -it rv01 /bin/bash
docker run --runtime=nvidia --name rv01 -dit --gpus all -v \
/home/zju/raster/data:/opt/src/data -v /home/zju/raster/code:/opt/src/code -v \
/home/zju/raster/output:/opt/data/output quay.io/azavea/raster-vision:pytorch-0.13.1 /bin/bash
3090TMD
CUDA10.2 和11.4都有??
Traceback (most recent call last):
File "code/test.py", line 39, in <module>
out = model(image_tensor)
File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/opt/conda/lib/python3.7/site-packages/torchvision/models/resnet.py", line 249, in forward
return self._forward_impl(x)
File "/opt/conda/lib/python3.7/site-packages/torchvision/models/resnet.py", line 233, in _forward_impl
x = self.bn1(x)
File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/batchnorm.py", line 147, in forward
self.num_batches_tracked = self.num_batches_tracked + 1 # type: ignore[has-type]
RuntimeError: CUDA error: no kernel image is available for execution on the device
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
#cuda
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64
export PATH=$PATH:/usr/local/cuda/bin
export CUDA_HOME=$CUDA_HOME:/usr/local/cuda
#cuda
apt install nvidia-cuda-toolkit
https://developer.nvidia.com/cuda-toolkit-archive
cd /usr/local/cuda-10.2/bin/
rm -rf /usr/local/cuda-10.2
wget https://developer.download.nvidia.com/compute/cuda/11.4.0/local_installers/cuda_11.4.0_470.42.01_linux.run
chmod +x cuda_11.4.0_470.42.01_linux.run
./cuda_11.4.0_470.42.01_linux.run
import torch
from torchvision import models
import numpy as np
print(torch.__version__)
print(torch.cuda.is_available())
print(torch.version.cuda)
print(torch.backends.cudnn.version())
print(torch.cuda.get_arch_list())
print(torch.cuda.get_device_name(0), '\n')
image = np.random.random(size=[2, 3, 224, 224])
image.dtype = 'float32'
image_tensor = torch.from_numpy(image).cuda()
model = models.resnet50(pretrained=True)
model = model.cuda()
out = model(image_tensor)
print(out)