scrapy模拟登陆github

最新推荐文章于 2022-11-10 12:06:52 发布

平常心19-3-21

最新推荐文章于 2022-11-10 12:06:52 发布

阅读量359

点赞数

分类专栏： python python爬虫（包含框架）

本文链接：https://blog.csdn.net/qq_41831288/article/details/89839125

版权

python 同时被 2 个专栏收录

28 篇文章 0 订阅

订阅专栏

python爬虫（包含框架）

19 篇文章 3 订阅

订阅专栏

# -*- coding: utf-8 -*-
import scrapy
import re

class GithubSpider(scrapy.Spider):
    name = 'github'
    allowed_domains = ['github.com']
    start_urls = ['https://github.com/login']

    def parse(self, response):
        authenticity_token = response.xpath("//input[@name='authenticity_token']/@value").extract_first()
        utf8 = response.xpath("//input[@name='utf8']/@value").extract_first()
        commit = response.xpath("//input[@name='commit']/@value").extract_first()
        post_data = dict(
            login="zhanghao",
            password="mima",
            authenticity_token=authenticity_token,
            utf8=utf8,
            commit=commit
        )
        yield scrapy.FormRequest(
            "https://github.com/session",
            formdata=post_data,
            callback=self.after_login
        )

    def after_login(self,response):
        # with open("a.html","w",encoding="utf-8") as f:
        #     f.write(response.body.decode())
        print(re.findall("noobpythoner|NoobPythoner",response.body.decode()))