下面的例子模拟登录知乎的个人设置页面
设置代理池 scrapy + tor
#安装tor
sudo apt-get install tor
sudo /etc/init.d/tor restart #重启服务
tor --hash-password mypassword #生成hash密码
sudo vim /etc/tor/torrc #编辑加入如下内容
ControlPort 9051
HashedControlPassword
16:xxx #xxx表示刚才生成的hash密码
保存退出
sudo /etc/init.d/tor restart #重启tor
class ZH(CrawlSpider):
name = 'zhihu'
allowed_domains = ['https://www.zhihu.com/']
def start_requests(self):
coo = {}
cookie = {'你的cookie字符串'}
for seg in cookie.split(';'):
key,value = seg.split('=',1)
coo[key] = value
return [scrapy.FormRequest('https://www.zhihu.com/settings/profile',cookies=coo,callback=self.parse)]
def parse(self, response):
print response.body
补充一下:
#将unicode编码转化为utf-8
str = unicode.encode(str,'utf-8')