1 scrapy
# 安装sqlite
yum install sqlite-devel
# 升级python到2.7.12,注意原系统中python版本
tar -zxvf Python-2.7.12.tgz
cd Python-2.7.12
./configure
make all
make install
make clean
make distclean
mv /usr/bin/python /usr/bin/python2.6.6
ln -s /usr/local/bin/python2.7 /usr/bin/python
#yumq切换,因为python升级会导致yum不能使用,将#!/usr/bin/python修改为#!/usr/bin/python2.6.6
vi /usr/bin/yum
# 安装
wget http://pypi.python.org/packages/source/d/distribute/distribute-0.6.49.tar.gz
tar -zxvf distribute-0.6.49.tar.gz
cd distribute-0.6.49
#安装pip
tar -zxvf pip-9.0.1.tar.gz
cd pip-9.0.1
python setup.py install
# 安装爬虫相关系列插件
pip install pyquery
pip install demjson
pip install pyasn1
pip install pyasn1-modules
pip install cryptography
pip install certifi
pip install urllib3==1.21.1
pip install chardet
pip install redis
pip install Pillow
pip install sqlalchemy
pip install scrapy
pip install scrapy-splash
# 安装Twisted
unzip Twisted-17.5.0.zip
cd Twisted-17.5.0
python setup.py install
################################################## 安装mysql,这里省略掉mysql安装过程
# 安装
unzip MySQL-python-1.2.5.zip
cd MySQL-python-1.2.5
python setup.py install
################################################## 建立libmysqlclient的软链接,注意mysql的路径
ln -s /application/mysql56/lib/libmysqlclient.so.18 /usr/lib64/libmysqlclient.so.18
#pip install -U setuptools
#pip install setuptools_scm
2 scrapyd安装
unzip scrapyd-1.2.zip
cd scrapyd-1.2
python setup.py install
mkdir -p /etc/scrapyd
cd /etc/scrapyd
vi scrapyd.conf
################################################
[scrapyd]
eggs_dir = eggs
logs_dir = logs
items_dir =
jobs_to_keep = 5
dbs_dir = dbs
max_proc = 0
max_proc_per_cpu = 4
finished_to_keep = 100
poll_interval = 5.0
bind_address = 0.0.0.0
http_port = 6800
debug = off
runner = scrapyd.runner
application = scrapyd.app.application
launcher = scrapyd.launcher.Launcher
webroot = scrapyd.website.Root
[services]
schedule.json = scrapyd.webservice.Schedule
cancel.json = scrapyd.webservice.Cancel
addversion.json = scrapyd.webservice.AddVersion
listprojects.json = scrapyd.webservice.ListProjects
listversions.json = scrapyd.webservice.ListVersions
listspiders.json = scrapyd.webservice.ListSpiders
delproject.json = scrapyd.webservice.DeleteProject
delversion.json = scrapyd.webservice.DeleteVersion
listjobs.json = scrapyd.webservice.ListJobs
daemonstatus.json = scrapyd.webservice.DaemonStatus
################################################
# 设置scrapyd的开机启动
mkdir -p /var/scrapyd
cd /etc/init.d
vi scrapyd
################################################
# chkconfig: 2345 90 10
# description: redis is a persistent key-value database
PORT=6800
HOME="/var/scrapyd/"
BIN="/usr/local/bin/scrapyd"
pid=`netstat -lnopt | grep :$PORT | awk '/python/{gsub(/\/python/,"",$7);print $7;}'`
start() {
if [ -n "$pid" ]; then
echo "server already start,pid:$pid"
return 0
fi
cd $HOME
# nohup $BIN & 有的网站上写的是这一句,这句本身也能启动scrapyd,但是在用jenkins部署时会提示异常,故建议采用下面的方式。
nohup $BIN >> $HOME/scrapyd.log 2>&1 &
echo "start at port:$PORT"
}
stop() {
if [ -z "$pid" ]; then
echo "not find program on port:$PORT"
return 0
fi
#结束程序,使用讯号2,如果不行可以尝试讯号9强制结束
kill -9 $pid
echo "kill program use signal 9,pid:$pid"
}
status() {
if [ -z "$pid" ]; then
echo "not find program on port:$PORT"
else
echo "program is running,pid:$pid"
fi
}
case $1 in
start)
start
;;
stop)
stop
;;
status)
status
;;
*)
echo "Usage: {start|stop|status}"
;;
esac
exit 0
################################################
chmod +x scrapyd
chkconfig scrapyd on
# 安装守护进程
pip install supervisor
mkdir -p /etc/supervisor/
3 scrapyd-client
# 安装scrapyd-client
mkdir -p /application/pyplugins
cd /application/pyplugins
mkdir scrapyd-client
cd scrapyd-client
wget https://github.com/scrapy/scrapyd-client/archive/master.zip
unzip master.zip
cd scrapyd-client-master
python setup.py install
4 splash安装
# centos中安装docker要求必须是64位操作系统
# 内核要求在3.8以上
# centos6默认内核是2.6,故需要升级
rpm --import https://www.elrepo.org/RPM-GPG-KEY-elrepo.org
rpm -Uvh http://www.elrepo.org/elrepo-release-6-6.el6.elrepo.noarch.rpm
yum -y --enablerepo=elrepo-kernel install kernel-lt
# 修改配置文件
vi /etc/grub.conf
# default=1修改为default=0
reboot
# 安装docker
rpm -Uvh http://download.fedoraproject.org/pub/epel/6/i386/epel-release-6-8.noarch.rpm
yum -y install docker-io
# 启动
service docker start
# 设置开机启动
chkconfig docker on
# 安装splash镜像,已经镜像文件很大,docker save scrapinghub/splash > /application/download/splash.tar
# docker pull scrapinghub/splash 这个命令是从官方下载,要下很久
docker load < /application/splash.tar
# 启动splash
docker run -d -p 8050:8050 --restart=always --name=splash scrapinghub/splash