1.下载安装nccl
1.1 下载仓库安装包后
sudo dpkg -i /home/chen/mydisk/YJY/nccl-repo-ubuntu1604-2.8.3-ga-cuda10.1_1-1_amd64.deb
1.2 更新
sudo apt update
1.3 安装nccl
sudo apt install libnccl2=2.8.3-1+cuda10.1 libnccl-dev=2.8.3-1+cuda10.1
1.4 测试nccl是否安装好
git clone https://github.com/NVIDIA/nccl-tests.git
cd nccl-tests
make
./build/all_reduce_perf -b 8 -e 256M -f 2 -g 'ngpus'
2.下载openmpi
2.1 下载安装包后
tar -xvzf /home/chen/mydisk/YJY/openmpi-4.0.5.tar.gz
cd openmpi-x.x.x
./configure --prefix="/usr/local/openmpi"
make -j 8
sudo make install
2.2 在.bashrc中添加环境变量
sudo gedit ~/.bashrc
export PATH="$PATH:/usr/local/cuda/bin:/usr/local/openmpi/bin"
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/openmpi/lib/"
source ~/.bashrc
打开新的终端使之生效。
3.安装cmake
sudo apt install cmake
3.1 查看cmake版本
cmake --version
4.下载horovod
4.1 安装好nccl之后可使用以下命令安装horovod
HOROVOD_GPU_OPERATIONS=NCCL pip install horovod -i https://pypi.tuna.tsinghua.edu.cn/simple