scrapy模拟登陆

使用Cookie模拟已登录状态

import scrapy


class LoginByCookie(scrapy.Spider):
    """
    模拟登录方式一:直接使用cookie登录
        登录慕课网
    """

    name = 'login_by_cookie'
    allowed_domains=['www.imooc.com']
    start_urls=[]

    def start_requests(self):
        """重写start_requests()方法"""

        home_url='https://www.imooc.com/u/2346025'
        login_cookie={'imooc_uuid':'c13c8cb7-442a-430e-a2c1-78d91c347b67',
                      'imooc_isnew_ct':'1515076153',
                      'imooc_isnew':'2',
                      'loginstate':'1',
                      'apsid':'NhMDY2ZDFmODhmYWQ5ZmQ2NDI3ZDg0OTU0NWM3NTQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAMjM0NjAyNQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA4MDkwMjE4MjNAcXEuY29tAAAAAAAAAAAAAAAAAAAAAGI3ZmJjOTUxMTU2YjBlOTVlOTIxYzM1ZDk0OTVmOGNhW3FQWltxUFo%3DYm',
                      'PHPSESSID':'vd48nsltdovbbifsn48pu15763',
                      'IMCDNS':'0',
                      'Hm_lvt_f0cfcccd7b1393990c78efdeebff3968':'1515076155,1515221269,1515746784,1516641134',
                      'Hm_lpvt_f0cfcccd7b1393990c78efdeebff3968':'1516641134',
                      'cvde':'5a661b6d0246d-3'
                      }

        yield scrapy.FormRequest(
            url=home_url,cookies=login_cookie,callback=self.parse_page)

    def parse_page(self,response):
        print(response.body.decode('utf-8'))
        print(response.xpath('//title/text()').extract_first())
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36

使用Form表单数据实现登录

import scrapy


class LoginByFormData(scrapy.Spider):
    """模拟登录方式二: 使用Form表单登录"""

    name = 'login_by_formdata'
    start_urls = ['http://www.example.com/users/login.php']

    def parse(self, response):
        return scrapy.FormRequest.from_response(
            response,
            formdata={'username': 'john', 'password': 'secret'},
            callback=self.after_login
        )
        # 等价于
        # return [scrapy.FormRequest(
        #     url=login_url,
        #     formdata={'username': 'john', 'password': 'secret'},
        #     callback=self.after_login
        # )]

    def after_login(self, response):
        # check login succeed before going on
        if "authentication failed" in response.body:
            self.logger.error("Login failed")
            return

        # continue scraping with authenticated session...
阅读更多

没有更多推荐了,返回首页