爬虫学习-第二十四篇

import requests
from lxml import etree
from pyquery import PyQuery as pq

cookie = None
headers = None

class Login(object):
    def __init__(self):
        self.login_url = 'https://github.com/login'
        self.post_url = 'https://github.com/session'
        self.logined_url = 'https://github.com/settings/profile'
        self.session = requests.Session()

    def getHeader(self, cookie):
        headers = {
            'Referer': 'https://github.com/login',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36',
            'Host': 'github.com'
            # 'Cookie': cookie
        }
        return headers

    def token(self):
        response = self.session.get(self.login_url, headers=headers)
        selector = etree.HTML(response.text)
        # print('获取cookie:', pq(selector))
        token = selector.xpath('//div//input[2]/@value')
        return token

    def login(self, email, password):
        authenticity_token = self.token()[0]
        # authenticity_token = 'TkEKbneri7pZsiFAuKl94jE1bTzL9B4re0FfD+2ir2ZJsxkQnczgo1cARASQFetg//nOrKCfBkMA1HNoL719Og=='
        post_data = {
            'commit': 'Sign in',
            'utf8': '✓',
            'authenticity_token': authenticity_token,
            'login': email,
            'password': password
        }
        cookie = self.session.cookies
        print('获取的cookie为:', cookie)
        print('authenticity_token:', authenticity_token)
        headers = self.getHeader(cookie)
        response = self.session.post(self.post_url, data=post_data, headers=headers)
        print('登录之后:', response)
        if response.status_code == 200:
            self.dynamics(response.text)
        else:
            return None
        response = self.session.get(self.logined_url, headers=headers)
        print('logined_url:', response)
        if response.status_code == 200:
            self.profile(response.text)

    def dynamics(self, html):
        selector = etree.HTML(html)
        # print('登录成功之后网页:', pq(selector))
        dynamics = selector.xpath('//div[contains(@class, "news")]//div[contains(@class, "alert")]')
        for item in dynamics:
            dynamic = ' '.join(item.xpath('.//div[@class="title"]//text()')).strip()
            print(dynamic)

    def profile(self, html):
        selector = etree.HTML(html)
        # print('profile网页:', pq(selector))
        name = selector.xpath('//input[@id="user_profile_name"]/@value')[0]
        email = selector.xpath('//select[@id="user_profile_email"]/option[@value!=""]/text()')
        print(name, email)


if __name__ == "__main__":
    login = Login()
    login.login(email='username', password='password')

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值