XGBoost的文档:https://xgboost.readthedocs.io/en/latest/index.html
XGBoost的代码:https://github.com/dmlc/xgboost
使用python版的xgboost,最简单当然是直接安装已经公开发布了的包
pip3 install xgboost
但是这种包一般不是最新的版本,另外呢,默认只是cpu版本的,需要最新版的和支持GPU,最好还是从源码编译后安装。
要能正常使用RTX3090 GPU卡,最好使用最新的CUDA11.1.1,11.1.0虽然使用GPU卡训练基本没问题,但是有的程序编译会有错!
忘了记不清是编译AlexeyAB darknet还是什么程序时遇到过 :
nvcc fatal: Unsupported gpu architecture 'compute_86'
懒得下载和安装那些包的话,直接拉取个nvidia的docker镜像,例如nvidia/cuda:11.1-cudnn8-devel-ubuntu18.04,这样最快捷。
拉取docker镜像后创建容器(我把权限和网络以及共享内存都放开了以方便做各种修改,反正是开发环境不是发布环境没什么安全上的要求随便折腾好了):
docker pull nvidia/cuda:11.1-cudnn8-devel-ubuntu18.04
docker run -it -d --privileged --ipc=host --network=host --name XGBoost_cuda-11.1-cudnn8-devel-ubuntu18.04 --gpus all -v /data/workspace:/workspace nvidia/cuda:11.1-cudnn8-devel-ubuntu18.04
然后进入刚启动的容器,安装一些必须的常用工具:
apt-get install vim git
apt-get install libssl-dev
apt-get install zlib1g
apt-get install zlib1g-dev
apt-get install python-setuptools
cd /workspace
wget https://github.com/Kitware/CMake/releases/download/v3.17.3/cmake-3.17.3.tar.gz
tar xf cmake-3.17.3.tar.gz
cd cmake-3.17.3
./bootstrap
make
make install
cd ..
wget http://www.python.org/ftp/python/3.6.9/Python-3.6.9.tgz
tar xzf Python-3.6.9.tgz
cd Python-3.6.9
./configure --with-ssl --enable-optimizations
make
make install
cd /usr/local/bin
ln -s python3 python
ln -s pip3 pip
cd /workspace
pip install -i http://mirrors.aliyun.com/pypi/simple/ --upgrade pip --trusted-host mirrors.aliyun.com
pip install -i http://mirrors.aliyun.com/pypi/simple/ numpy scipy sklearn matplotlib pandas seaborn bayesian-optimization --trusted-host mirrors.aliyun.com
git clone --recursive https://github.com/dmlc/xgboost
cd xgboost
修改cmake/Utils.cmake,把对RTX3090 GPU的支持的算力配置加上(set(flags "35;50;52;60;61;70;75;80;86")):
function(format_gencode_flags flags out)
if(CMAKE_CUDA_COMPILER_VERSION MATCHES "^([0-9]+\\.[0-9]+)")
set(CUDA_VERSION "${CMAKE_MATCH_1}")
endif()
# Set up architecture flags
if(NOT flags)
if (CUDA_VERSION VERSION_GREATER_EQUAL "11.0")
set(flags "35;50;52;60;61;70;75;80;86")
elseif(CUDA_VERSION VERSION_GREATER_EQUAL "10.0")
set(flags "35;50;52;60;61;70;75")
elseif(CUDA_VERSION VERSION_GREATER_EQUAL "9.0")
set(flags "35;50;52;60;61;70")
else()
set(flags "35;50;52;60;61")
endif()
endif()
然后编译支持GPU的版本(如果需要支持并行,安装NVIDIA NCCL并且把-DUSE_NCCL=ON和-DNCCL_ROOT=... 加上):
mkdir build
cd build
cmake .. -DUSE_CUDA=ON #-DUSE_NCCL=ON -DNCCL_ROOT=/path/to/nccl2
make -j
编译完可以看到当前目录下生成了个xgboost二进制文件
对于python版的xgboost,可以编译生成whl文件后安装它,也可以直接setup,我选择后者:
cd python-package
python setup.py install --use-cuda # --use-nccl
然后你可以在python里用import xgboost测试是否正常,没报错就表示可用了,可以使用python调用xgboost了。
附:
如果运行某些训练或测试程序时用到了pandas读取数据,由于pandas用到了bz2压缩解压算法,可能会报这个没有_bz2这个module的错误:
root@ubuntu-rtx3090:/workspace/xgboost# python demo_ad.py
Traceback (most recent call last):
File "demo_ad.py", line 1, in <module>
\ufeffimport pandas as pd
File "/usr/local/lib/python3.6/site-packages/pandas/__init__.py", line 52, in <module>
from pandas.core.api import (
File "/usr/local/lib/python3.6/site-packages/pandas/core/api.py", line 29, in <module>
from pandas.core.groupby import Grouper, NamedAgg
File "/usr/local/lib/python3.6/site-packages/pandas/core/groupby/__init__.py", line 1, in <module>
from pandas.core.groupby.generic import DataFrameGroupBy, NamedAgg, SeriesGroupBy
File "/usr/local/lib/python3.6/site-packages/pandas/core/groupby/generic.py", line 57, in <module>
from pandas.core.aggregation import (
File "/usr/local/lib/python3.6/site-packages/pandas/core/aggregation.py", line 27, in <module>
from pandas.core.series import FrameOrSeriesUnion, Series
File "/usr/local/lib/python3.6/site-packages/pandas/core/series.py", line 68, in <module>
from pandas.core import algorithms, base, generic, nanops, ops
File "/usr/local/lib/python3.6/site-packages/pandas/core/generic.py", line 102, in <module>
from pandas.io.formats import format as fmt
File "/usr/local/lib/python3.6/site-packages/pandas/io/formats/format.py", line 71, in <module>
from pandas.io.common import stringify_path
File "/usr/local/lib/python3.6/site-packages/pandas/io/common.py", line 3, in <module>
import bz2
File "/usr/local/lib/python3.6/bz2.py", line 23, in <module>
from _bz2 import BZ2Compressor, BZ2Decompressor
ModuleNotFoundError: No module named '_bz2'
首先保证bzip2和bzip2-dev是否安装了,安装后仍然报错的话,如果是python3.7,原因是python3.7下没有对应的_bz2的so文件,从低版本的python3.6下拷贝过来放到对应的位置即可,如果是python3.6,也报这个错,可能是当前你使用的python的路径不是系统默认的路径,而系统之前默认安装的_bz2的so是在/usr/lib/python3.6/lib-dynload/下面,把它从系统默认路径下拷贝过来放到当前使用的python的路径下(例如/usr/local/lib/python3.6/lib-dynload/)即可,例如:
cp /usr/lib/python3.6/lib-dynload/_bz2.cpython-36m-x86_64-linux-gnu.so /usr/local/lib/python3.6/lib-dynload/