第1.9章 scrapy之完整工程部署

1 scrapy

# 安装sqlite
yum install sqlite-devel	
# 升级python到2.7.12,注意原系统中python版本
tar -zxvf Python-2.7.12.tgz
cd Python-2.7.12
./configure 
make all
make install
make clean 
make distclean 
mv /usr/bin/python /usr/bin/python2.6.6
ln -s /usr/local/bin/python2.7 /usr/bin/python

#yumq切换,因为python升级会导致yum不能使用,将#!/usr/bin/python修改为#!/usr/bin/python2.6.6
vi /usr/bin/yum


# 安装
wget http://pypi.python.org/packages/source/d/distribute/distribute-0.6.49.tar.gz
tar -zxvf distribute-0.6.49.tar.gz
cd distribute-0.6.49

#安装pip
tar -zxvf pip-9.0.1.tar.gz 
cd pip-9.0.1
python setup.py install 

# 安装爬虫相关系列插件
pip install pyquery
pip install demjson
pip install pyasn1
pip install pyasn1-modules
pip install cryptography
pip install certifi
pip install urllib3==1.21.1
pip install chardet
pip install redis
pip install Pillow
pip install sqlalchemy
pip install scrapy
pip install scrapy-splash

# 安装Twisted
unzip Twisted-17.5.0.zip
cd Twisted-17.5.0
python setup.py install


################################################## 安装mysql,这里省略掉mysql安装过程
# 安装
unzip MySQL-python-1.2.5.zip 
cd MySQL-python-1.2.5
python setup.py install

################################################## 建立libmysqlclient的软链接,注意mysql的路径
ln -s /application/mysql56/lib/libmysqlclient.so.18 /usr/lib64/libmysqlclient.so.18


#pip install -U setuptools
#pip install setuptools_scm

2 scrapyd安装

unzip scrapyd-1.2.zip 
cd scrapyd-1.2
python setup.py install

mkdir -p /etc/scrapyd
cd /etc/scrapyd
vi scrapyd.conf
################################################
[scrapyd]
eggs_dir    = eggs
logs_dir    = logs
items_dir   =
jobs_to_keep = 5
dbs_dir     = dbs
max_proc    = 0
max_proc_per_cpu = 4
finished_to_keep = 100
poll_interval = 5.0
bind_address = 0.0.0.0
http_port   = 6800
debug       = off
runner      = scrapyd.runner
application = scrapyd.app.application
launcher    = scrapyd.launcher.Launcher
webroot     = scrapyd.website.Root

[services]
schedule.json     = scrapyd.webservice.Schedule
cancel.json       = scrapyd.webservice.Cancel
addversion.json   = scrapyd.webservice.AddVersion
listprojects.json = scrapyd.webservice.ListProjects
listversions.json = scrapyd.webservice.ListVersions
listspiders.json  = scrapyd.webservice.ListSpiders
delproject.json   = scrapyd.webservice.DeleteProject
delversion.json   = scrapyd.webservice.DeleteVersion
listjobs.json     = scrapyd.webservice.ListJobs
daemonstatus.json = scrapyd.webservice.DaemonStatus
################################################

# 设置scrapyd的开机启动
mkdir -p /var/scrapyd
cd /etc/init.d
vi scrapyd
################################################
# chkconfig:   2345 90 10
# description:  redis is a persistent key-value database
PORT=6800
HOME="/var/scrapyd/"
BIN="/usr/local/bin/scrapyd"
 
pid=`netstat -lnopt | grep :$PORT | awk '/python/{gsub(/\/python/,"",$7);print $7;}'`
 
start() {
   if [ -n "$pid" ]; then
      echo "server already start,pid:$pid"
      return 0
   fi
 
   cd $HOME
   # nohup $BIN &  有的网站上写的是这一句,这句本身也能启动scrapyd,但是在用jenkins部署时会提示异常,故建议采用下面的方式。
   nohup $BIN >> $HOME/scrapyd.log 2>&1 &
   echo "start at port:$PORT"
}
 
stop() {
   if [ -z "$pid" ]; then
      echo "not find program on port:$PORT"
      return 0
   fi
 
   #结束程序,使用讯号2,如果不行可以尝试讯号9强制结束
   kill -9 $pid
   echo "kill program use signal 9,pid:$pid"
}
 
status() {
   if [ -z "$pid" ]; then
      echo "not find program on port:$PORT"
   else
      echo "program is running,pid:$pid"
   fi
}
 
case $1 in
   start)
      start
   ;;
   stop)
      stop
   ;;
   status)
      status
   ;;
   *)
      echo "Usage: {start|stop|status}"
   ;;
esac
 
exit 0
################################################
chmod +x scrapyd
chkconfig scrapyd on

# 安装守护进程
pip install supervisor
mkdir -p /etc/supervisor/

3 scrapyd-client

# 安装scrapyd-client
mkdir -p /application/pyplugins
cd /application/pyplugins
mkdir scrapyd-client
cd scrapyd-client
wget https://github.com/scrapy/scrapyd-client/archive/master.zip
unzip master.zip
cd scrapyd-client-master
python setup.py install

4 splash安装

# centos中安装docker要求必须是64位操作系统
# 内核要求在3.8以上
# centos6默认内核是2.6,故需要升级
rpm --import https://www.elrepo.org/RPM-GPG-KEY-elrepo.org
rpm -Uvh http://www.elrepo.org/elrepo-release-6-6.el6.elrepo.noarch.rpm
yum -y --enablerepo=elrepo-kernel install kernel-lt
# 修改配置文件
vi /etc/grub.conf
# default=1修改为default=0
reboot
# 安装docker
rpm -Uvh http://download.fedoraproject.org/pub/epel/6/i386/epel-release-6-8.noarch.rpm
yum -y install docker-io
# 启动
service docker start
# 设置开机启动
chkconfig docker on


# 安装splash镜像,已经镜像文件很大,docker save scrapinghub/splash > /application/download/splash.tar
# docker pull scrapinghub/splash 这个命令是从官方下载,要下很久
docker load < /application/splash.tar
# 启动splash
docker run -d -p 8050:8050 --restart=always --name=splash scrapinghub/splash
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

warrah

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值