scrapy爬取微博移动版(二)

微博爬取并发高了就不让访问,这里对接一下cookies池和代理池
middlewares.py

# -*- coding: utf-8 -*-

# Define here the models for your spider middleware
#
# See documentation in:
# https://doc.scrapy.org/en/latest/topics/spider-middleware.html

from scrapy import signals
import requests



import logging
import json
#对接cookies池
class CookiesMiddleware():
    def __init__(self,cookies_url='http://127.0.0.1:5000/weibo/random'):
        self.logger=logging.getLogger(__name__)
        self.cookies_url=cookies_url
    def get_random_cookies(self):
        try:
            response=requests.get(self.cookies_url)
            if response.status_code== 200:
                cookiess=json.loads(response.text)
                cookies={}
                for cookie in cookiess:
                    cookies[cookie['name']]=cookie['value']
                return cookies
        except Exception:
            print('caonima')
            return False
    def process_request(self,request,spider):
        self.logger.debug('正在获取cookies')
        cookies=self.get_random_cookies()
        if cookies:
            request.cookies=cookies
            self.logger.debug('使用cookies'+json.dumps(cookies))




#对接代理池

class ProxyMiddleware():
    def __init__(self,proxy_url='http://127.0.0.1:5002/daili'):
        self.logger=logging.getLogger(__name__)
        self.proxy_url=proxy_url
    def get_random_proxy(self):
        try:
            response=requests.get(self.proxy_url)
            if response.status_code== 200:
                proxy=response.text

                return proxy
        except Exception:
            print('limabi')
            return False
    def process_request(self,request,spider):
        #if request.meta.get('retry_times'):
        proxy=self.get_random_proxy()
        if proxy:
            uri='http://{proxy}'.format(proxy=proxy)
            self.logger.debug('使用代理'+proxy)
            request.meta['proxy']=uri   

setting.py添加DOWNLOADER_MIDDLEWARES

DOWNLOADER_MIDDLEWARES = {
    'weibo.middlewares.WeiboDownloaderMiddleware': 543,
    'weibo.middlewares.CookiesMiddleware': 545,
    'weibo.middlewares.ProxyMiddleware': 546
}

然后设置一下settings.py其他设置,运行爬虫即可

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值