安装cuda
wget https://developer.download.nvidia.com/compute/cuda/12.2.1/local_installers/cuda_12.2.1_535.86.10_linux.run
chmod +x chmod +x cuda_12.2.1_535.86.10_linux.run
./cuda_12.2.1_535.86.10_linux.run
安装cudnn
必须安装好cuda之后,才能成功安装
dpkg -i cudnn-local-repo-ubuntu2204-8.9.6.50_1.0-1_amd64.deb
cd /var/cudnn-local-repo-ubuntu2204-8.9.6.50/ && dpkg -i *.deb
ldconfig -v | grep cudnn
安装tensorrt
wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/secure/9.1.0/tars/tensorrt-9.1.0.4.linux.x86_64-gnu.cuda-12.2.tar.gz
tar -xvzf tensorrt-9.1.0.4.linux.x86_64-gnu.cuda-12.2.tar.gz
mv TensorRT-9.1.0.4/ /usr/local/tensorrt
rm tensorrt-9.1.0.4.linux.x86_64-gnu.cuda-12.2.tar.gz
# 配置
echo "/usr/local/tensorrt/lib" > /etc/ld.so.conf.d/tensorrt.conf
# 查看
ldconfig -v | grep nvinfer
# 设置环境
echo "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/tensorrt/lib:/usr/local/cuda/lib64/" >> /etc/profile
echo "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/tensorrt/lib:/usr/local/cuda/lib64/" >> ~/.bashrc
source /etc/profile
source ~/.bashrc
学术加速
source /etc/network_turbo
解决报错:
Could NOT find MPI_CXX (missing: MPI_CXX_WORKS) · Issue #2240 · horovod/horovod (github.com)