安装 scrapy
$ sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 627220E7
$ echo 'deb http://archive.scrapy.org/ubuntu scrapy main' | sudo tee /etc/apt/sources.list.d/scrapy.list
$ sudo apt-get update && sudo apt-get install scrapy
安装 pip
$ sudo apt-get install python-pip python-dev build-essential
$ sudo pip install --upgrade pip
$ sudo pip install --upgrade virtualenv
升级 scrapy
sudo apt-get install libssl-dev
sudo pip install --upgrade scrapy
升级 twisted
sudo pip install --upgrade twisted
安装 redis
sudo apt-get install redis-server
sudo pip install scrapy-redis
安装 mysqldb
sudo apt-get install python-dev libmysqlclient-dev
sudo pip install MySQL-python
安装 service-identity
sudo pip install service-identity
升级 pyasn1
sudo pip install pyasn1 --upgrade
安装 unidecode
sudo pip install unidecode
安装 numpy
pip install numpy
安装 tldextract
sudo pip install tldextract
安装 simplemysql
sudo pip install simplemysql
安装 nltk
sudo pip install -U nltk
一下载 nltk 所需要的分词库(all 会下载所有库: 12G 左右,可以对应下载所需要库这里跳过。。。)
python
>>> import nltk
>>> nltk.download('all')
安装 BeautifulSoup4
sudo pip install BeautifulSoup4
安装 dateutil
sudo pip install python-dateutil
安装 pexpect
sudo -E pip install pexpect
安装 yaml
sudo apt-get install python-yaml
or
sudo pip install pyyaml
安装 pyap
sudo pip install pyap
安装 demjson
sudo pip install demjson