1、在联网的机器上下载依赖包
sudo yum install --downloadonly --downloaddir=/usr/local/soft/py3 zlib-devel bzip2-devel openssl openssl-devel ncurses-devel epel-release gcc gcc-c++ xz-devel readline-devel gdbm-devel sqlite-devel tk-devel db4-devel libpcap-devel libffi-devel libcurl-devel libsqlite3-devel
2、打包依赖包
tar -czvf py3_lib.tar.gz py3
3、解压依赖包
tar -xzvf py3_lib.tar.gz
4、安装依赖包
cd /usr/local/soft/py3
rpm -ivh --force ./*
5、下载python的安装包,安装python3
tar zxvf /usr/local/soft/Python-3.6.3.tgz -C /usr/local/python3
./configure --prefix=/usr/local/python3 # 将python3安装在这个目录
make
make install
创建软连接
ln -s /usr/local/python3/bin/python3 /usr/bin/python3
ln -s /usr/local/python3/bin/pip3 /usr/bin/pip3
6、安装pyspider的依赖
备注:
1)whl后缀的文件安装方式:pip3 install ?.whl
2)模块文件的下载地址:https://pypi.org/simple/pycurl/
安装顺序:
pip3 install pip-21.1.2-py3-none-any.whl
pip3 install cssselect-1.1.0-py2.py3-none-any.whl
pip3 install lxml-4.6.3-cp36-cp36m-manylinux1_x86_64.whl
pip3 install pyquery-1.4.3-py3-none-any.whl
pip3 install tornado-4.5.3.tar.gz
pip3 install PyYAML-5.4.1-cp36-cp36m-manylinux1_x86_64.whl
pip3 install defusedxml-0.7.1-py2.py3-none-any.whl
pip3 install six-1.16.0-py2.py3-none-any.whl
pip3 install chardet-4.0.0-py2.py3-none-any.whl
pip3 install typing_extensions-3.10.0.0-py3-none-any.whl
pip3 install zipp-3.4.1-py3-none-any.whl
pip3 install importlib_metadata-4.5.0-py3-none-any.whl
pip3 install click-8.0.1-py3-none-any.whl
pip3 install Werkzeug-0.16.1-py2.py3-none-any.whl
pip3 install itsdangerous-2.0.1-py3-none-any.whl
pip3 install MarkupSafe-2.0.1-cp36-cp36m-manylinux1_x86_64.whl
pip3 install Jinja2-3.0.1-py3-none-any.whl
pip3 install Flask-0.11-py2.py3-none-any.whl
pip3 install Flask_Login-0.5.0-py2.py3-none-any.whl
pip3 install idna-2.10-py2.py3-none-any.whl
pip3 install urllib3-1.26.5-py2.py3-none-any.whl
pip3 install certifi-2021.5.30-py2.py3-none-any.whl
pip3 install requests-2.25.1-py2.py3-none-any.whl
pip3 install tblib-1.7.0-py2.py3-none-any.whl
pip3 install u_msgpack_python-2.7.1-py2.py3-none-any.whl
pip3 install jsmin-2.2.2.tar.gz
pip3 install WsgiDAV-3.0.1-py2.py3-none-any.whl
pip3 install pycurl-7.43.0.6.tar.gz
pip3 install pyspider-0.3.10.tar.gz
pip3 install soupsieve-2.2.1-py3-none-any.whl
pip3 install beautifulsoup4-4.9.3-py3-none-any.whl
pip3 install bs4-0.0.1.tar.gz
pip3 install dataclasses-0.8-py3-none-any.whl
pip3 install fake-useragent-0.1.11.tar.gz
pip3 install json5-0.9.6-py2.py3-none-any.whl
pip3 install PyMySQL-1.0.2-py3-none-any.whl
pip3 install python_dateutil-2.8.1-py2.py3-none-any.whl
pip3 install setuptools-28.8.0-py2.py3-none-any.whl
pip3 install numpy-1.19.5-cp36-cp36m-manylinux1_x86_64.whl
pip3 install pytz-2021.1-py2.py3-none-any.whl
pip3 install pandas-1.1.5-cp36-cp36m-manylinux1_x86_64.whl
7、增加环境变量
vim /etc/profile
export PYTHON_HOME=/usr/local/python3
export PATH=$PYTHON_HOME/bin:$PATH
配置立即生效
source /etc/profile
8、安装phantomjs
1)下载安装包
2)解压到对应目录
tar xvf phantomjs-2.1.1-linux-x86_64.tar
3)针对解压后文件夹重命名为phantomjs
mv phantomjs-2.1.1-linux-x86_64 phantomjs
4)创建软连接
ln -sv /usr/local/soft/phantomjs/bin/phantomjs /usr/bin/phantomjs
9、增加启动配置文件
1)创建pyspider_config.json,如果有现成的直接拷贝一下
cd /usr/local/python3/
vim pyspider_config.json
{
"webui": {
"port": "5000",
"username": "ywhz",
"password": "ywhz",
"need-auth": true
}
}
2)后台启动
nohup pyspider -c /usr/local/python3/pyspider_config.json &
**遇到的问题及解决方案
报错:
ValueError: Invalid configuration:
- Deprecated option 'domaincontroller': use 'http_authenticator.domain_controller' instead.
解决方法如下:
在安装包中找到pyspider的资源包,然后找到webui文件里面的webdav.py文件打开,修改第209行即可。
vim /usr/local/python3/lib/python3.6/site-packages/pyspider/webui/webdav.py
把
'domaincontroller': NeedAuthController(app),
修改为:
'http_authenticator':{
'HTTPAuthenticator':NeedAuthController(app),
},