发表于:http://www.ballooncat.com/scrapy-setup.html
最近在深入折腾scrapy,先放出环境搭建教程一枚,随后放出更多关于scrapy配置,扩展方面的教程.教程为beta版,部分细节在实际环境中可能会有出入.
前言:
1.系统centos5.5,默认安装了python2.4,需要升级到python2.7+
安装教程包括:
sqlite3 + python2.7.3 + mysql5.1.55 + sphinx2.0.6 + easy_install + python_mysql + scrapy + nginx + uwsgi + Redis install guide
安装准备工作
1. 安装开发套件
yum install gcc gcc-c++ zlib zlib-devel make cmake libxml2 libxslt-devel openssl-devel imake automake libtool python-devel ncurses-devel
开始搭建环境:
必须在安装python之前安装
wget -c http://www.sqlite.org/sqlite-autoconf-3071501.tar.gz
tar zxvf sqlite-autoconf-3071501.tar.gz
cd sqlite-autoconf-3071501
./configure
make
make install
cd ~
2. 安装python2.7.3
wget -c http://www.python.org/ftp/python/2.7.3/Python-2.7.3.tgz
tar zxvf Python-2.7.3.tgz
cd Python-2.7.3
./configure
make && make install
mv /usr/bin/python /usr/bin/python_old_2.4
ln -s /usr/local/bin/python2.7 /usr/bin/python
python -V (显示2.7.3版本则为安装成功)
vim /usr/bin/yum
修改#!/usr/bin/python为#!/usr/bin/python2.4
cd ~
3.安装easy_install
wget -q http://peak.telecommunity.com/dist/ez_setup.py
python ez_setup.py
4.安装mysql5.1.55
wget -c http://downloads.mysql.com/archives/mysql-5.1/mysql-5.1.55.tar.gz
tar zxvf mysql-5.1.55.tar.gz
cd mysql-5.1.55
sh BUILD/autorun.sh
./configure --prefix=/usr/local/mysql \
--with-charset=utf8 \
--with-extra-charset=all \
-enable-thread-safe-client \
-enable-assembler \
--with-readline \
--with-big-tables \
--with-named-curses-libs=/usr/lib/libncursesw.so.5
make && make install
ln -s /usr/local/mysql/bin/mysql /usr/local/bin/mysql
ln -s /usr/local/mysql/bin/mysqladmin /usr/local/bin/mysqladmin
groupadd mysql
useradd -g mysql mysql
chown -R mysql.mysql /usr/local/mysql/
cd /usr/local/mysql
mkdir var
chown -R root.mysql .
chown -R mysql /usr/local/mysql/var/
cp /root/mysql-5.1.55/support-files/mysql.server /etc/init.d/mysqld
chmod 700 /etc/init.d/mysqld
chkconfig --add mysqld
chkconfig mysqld on
cp /root/mysql-5.1.55/support-files/my-medium.cnf /etc/my.cnf
/usr/local/mysql/bin/mysql_install_db --user=mysql --datadir=/usr/local/mysql/var
/etc/init.d/mysqld start
mysqladmin -u root password 123123
mysql -u root -p -S /tmp/mysql.sock
cd ~
5.安装sphinx
wget -c http://sphinxsearch.com/files/sphinx-2.0.6-release.tar.gz
tar zxvf sphinx-2.0.6-release.tar.gz
cd sphinx-2.0.6-release
./configure
make && make install
cd /usr/local/etc/
cp sphinx.conf.dist sphinx.conf
cd ~
6.安装mysql_python
wget -c http://downloads.sourceforge.net/project/mysql-python/mysql-python-test/1.2.3c1/MySQL-python-1.2.3c1.tar.gz?use_mirror=nchc
tar xzvf MySQL-python-1.2.3c1.tar.gz
cd MySQL-python-1.2.3c1
ln -s /usr/local/mysql/bin/mysql_config /usr/local/bin/mysql_config
ln -s /usr/local/mysql/lib/mysql/libmysqlclient* /usr/lib
ldconfig
python setup.py install
cd ~
7.安装scrapy
wget -c http://lxml.de/files/lxml-3.0.1.tgz
tar zxvf lxml-3.0.1.tgz
cd lxml-3.0.1
python setup.py install
cd ~
wget -c http://pypi.python.org/packages/source/T/Twisted/Twisted-12.2.0.tar.bz2#md5=9a321b904d01efd695079f8484b37861
tar jxvf Twisted-12.2.0.tar.bz2
cd Twisted-12.2.0
python setup.py install
wget http://pypi.python.org/packages/source/p/pyOpenSSL/pyOpenSSL-0.12.tar.gz
tar zxvf pyOpenSSL-0.12.tar.gz
cd pyOpenSSL-0.12
python setup.py install
easy_install scrapy
cd ~
8.安装uwsgi
wget -c http://projects.unbit.it/downloads/uwsgi-1.4.3.tar.gz
tar zxvf uwsgi-1.4.3.tar.gz
cd uwsgi-1.4.3
python setup.py install
cd ~
9. 安装Redis
wget https://redis.googlecode.com/files/redis-2.6.7.tar.gz
tar zxvf redis-2.6.7.tar.gz
cd redis-2.6.7
make && make install
easy_install redis
10.安装nginx
wget ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-8.30.tar.gz
tar zxvf pcre-8.30.tar.gz
cd pcre-8.30
./configure
make && make install
wget http://sourceforge.net/projects/libpng/files/zlib/1.2.6/zlib-1.2.6.tar.gz/download
tar zxvf zlib-1.2.6.tar.gz
cd zlib-1.2.6
./configure
make && make install
wget http://nginx.org/download/nginx-1.1.9.tar.gz
tar zxvf nginx-1.1.9.tar.gz
cd nginx-1.1.9
mkdir -p /var/tmp/nginx
./configure --prefix=/usr/local/nginx \
--pid-path=/var/run/nginx.pid \
--lock-path=/var/lock/nginx.lock \
--with-http_ssl_module \
--with-http_dav_module \
--with-http_flv_module \
--with-http_realip_module \
--with-http_gzip_static_module \
--with-http_stub_status_module \
--with-mail \
--with-mail_ssl_module \
--with-pcre=../pcre-8.30 \
--with-zlib=../zlib-1.2.6 \
--with-debug \
--http-client-body-temp-path=/var/tmp/nginx/client \
--http-proxy-temp-path=/var/tmp/nginx/proxy \
--http-fastcgi-temp-path=/var/tmp/nginx/fastcgi \
--http-uwsgi-temp-path=/var/tmp/nginx/uwsgi \
--http-scgi-temp-path=/var/tmp/nginx/scgi
make && make install
vim /etc/init.d/nginx
输入:
#!/bin/bash
#
#chkconfig: - 85 15
#description: Nginx is a World Wide Web server.
#processname: nginx
nginx=/usr/local/nginx/sbin/nginx
conf=/usr/local/nginx/conf/nginx.conf
case $1 in
start)
echo -n "Starting Nginx"
$nginx -c $conf
echo " done"
;;
stop)
echo -n "Stopping Nginx"
killall -9 nginx
echo " done"
;;
test)
$nginx -t -c $conf
;;
reload)
echo -n "Reloading Nginx"
ps auxww | grep nginx | grep master | awk '{print $2}' | xargs kill -HUP
echo " done"
;;
restart)
$0 stop
$0 start
;;
show)
ps -aux|grep nginx
;;
*)
echo -n "Usage: $0 {start|restart|reload|stop|test|show}"
;;
esac
chkmod +x /etc/init.d/nginx
chkconfig --add nginx
chkconfig nginx on
11. 配置sphinx:
在/usr/local/sphinx/etc下创建scrapy.conf,写入:
source scrapy_source
{
type = mysql
sql_host = localhost
sql_user = root
sql_pass = 123123
sql_db = python
sql_port = 3306
sql_query_pre = SET NAMES utf8
sql_query = SELECT id,title,keywords,descrip,body FROM `data`
}
index scrapy_index
{
source = scrapy_source
path = /usr/local/sphinx/var/data/scrapy_index
docinfo = extern
mlock = 0
morphology = none
min_word_len = 1
html_strip = 0
}
indexer
{
mem_limit = 128M
}
searchd
{
listen = 9312
read_timeout = 5
max_children = 30
max_matches = 1000
seamless_rotate = 0
preopen_indexes = 0
unlink_old = 1
pid_file = /usr/local/sphinx/var/log/searchd.pid
log = /usr/local/sphinx/var/log/searcd.log
query_log = /usr/local/sphinx/var/log/query.log
}
创建索引:
/usr/local/sphinx/bin/indexer -c ../etc/scrapy.conf --all
重新创建索引:
/usr/local/sphinx/bin/indexer -c ../etc/scrapy.conf --all --rotate
搜索测试:
/usr/local/sphinx/bin/search -c ../etc/scrapy.conf centos
beta0.2:
1. fix几处开发套件遗漏问题
2. fix scrapy安装过程中pyOpenssl问题
3. fix mysql安装步骤先后顺序问题
beta0.3:
1. fix一处python mysql扩展执行时找不到mysql库文件的问题
2. fix python redis扩展安装的遗漏