import requests
from lxml import etree
class Login(object):
def __init__(self):
self.headers = {
'Referer': 'https://github.com/',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36',
'Host': 'github.com'
}
self.login_url = 'https://github.com/login'
self.post_url = 'https://github.com/session'
self.session = requests.Session()
def token(self):
response = self.session.get(self.login_url, headers=self.headers)
selector = etree.HTML(response.text)
token = selector.xpath('//*[@id="login"]/div[4]/form/input[1]/@value')[0]
return token
def login(self, email, password):
post_data = {
'commit': 'Sign in',
'authenticity_token': self.token(),
'login': email,
'password': password
}
response = self.session.post(self.post_url, data=post_data, headers=self.headers)
if response.status_code == 200:
self.dynamics(response.text)
def dynamics(self, html):
selector = etree.HTML(html)
usesr_name = selector.xpath('//meta[@name="octolytics-actor-login"]/@content')
print(usesr_name)
self.new_dynamics()
def new_dynamics(self):
"""
你写的header模拟力度不够,会被识别出来
我的关注少,只有一页数据,数据多需要翻页处理
:return:
"""
headers = {
"authority": "github.com",
"accept": "text/html",
"accept-language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
"cache-control": "no-cache",
"dnt": "1",
"pragma": "no-cache",
"referer": "https://github.com/",
"sec-ch-ua": "^\\^Google",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "^\\^Windows^^",
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-origin",
"sec-gpc": "1",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.5666.197 Safari/537.36",
"x-requested-with": "XMLHttpRequest"
}
response = self.session.get("https://github.com/conduit/for_you_feed",
headers=headers)
print(response.status_code)
response.encoding = "utf-8"
div_li = etree.HTML(response.text).xpath('//div[@class="p-3"]')
for div in div_li:
name = div.xpath('string(./section/h5/a//text())')
type = div.xpath('string(./section/h5//span/a/text())')
content = div.xpath('string(./div/section[1]/text())')
sar = div.xpath('string(./div/section[2]//a/text())')
print(name, type, content, sar)
if __name__ == "__main__":
login = Login()
login.login(email='', password='')
03-16