python模拟登录百度贴吧_python模拟登录知乎的问题

最新推荐文章于 2021-02-21 04:34:47 发布

weixin_39844267

最新推荐文章于 2021-02-21 04:34:47 发布

阅读量88

点赞数

文章标签： python模拟登录百度贴吧

这段代码展示了一个使用Scrapy框架实现的知乎登录爬虫。它首先请求验证码，如果需要，用户将输入验证码。然后，爬虫使用提供的用户名和密码尝试登录。登录过程中涉及签名的生成和HTTP头部的设置。成功登录后，爬虫可以进一步抓取受限内容。

摘要由CSDN通过智能技术生成

该楼层疑似违规已被系统折叠隐藏此楼查看此楼

headers = {

'Connection': 'keep-alive',

'Host': 'http://www.zhihu.com',

'Referer': 'https://www.zhihu.com/signup?next=%2F',

'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'

}

grant_type = 'password'

client_id = "c3cef7c66a1843f8b3a9e6a1e3160e20"

x_UDID = 'ANCuT28Zjg2PTn2VG48gf99U - sbL76I8EN4 ='

source = 'com.zhihu.web'

timestamp = str(int(time.time() * 1000))

timestamp2 = str(time.time() * 1000)

captcha_url = 'https://www.zhihu.com/api/v3/oauth/captcha?lang=en'

def start_requests(self):

return [scrapy.Request(self.captcha_url, headers=self.headers,

callback=self.login)]

def login(self, response):

need_cap = json.loads(response.body)['show_captcha']

if need_cap:

print("需要验证码")

yield scrapy.Request(url=self.captcha_url, headers=self.headers,

callback=self.captcha, method='PUT')

else:

print("不需要验证码")

post_url = 'https://www.zhihu.com/api/v3/oauth/sign_in'

post_data = {

"client_id": self.client_id,

"username": "******",

"password": "******",

"source": self.source,

"timestamp": self.timestamp,

"signature": self.get_signature(self.grant_type, self.client_id, self.source, self.timestamp),

"lang": "en",

"ref_source": "homepage",

"captcha": '',

}

yield scrapy.FormRequest(url=post_url, formdata=post_data, headers=self.headers, callback=self.check_login)

def get_signature(self, grant_type, client_id, source, timestamp):

"""处理签名"""

hm = hmac.new(b'd1b964811afb40118a12068ff74a12f4', None, sha1)

hm.update(str.encode(grant_type))

hm.update(str.encode(client_id))

hm.update(str.encode(source))

hm.update(str.encode(timestamp))

return str(hm.hexdigest())

def captcha(self, response):

try:

img = json.loads(response.text)['img_base64']

except ValueError:

print('获取img_base64失败')

else:

img = img.encode('utf8')

img_data = base64.b64decode(img)

with open('captcha.jpg', 'wb') as f:

f.write(img_data)

f.close()

captcha = input('请输入验证码：')

post_data = {'input_text': captcha}

yield scrapy.FormRequest(url=self.captcha_url, formdata=post_data, callback=self.captcha_login,

headers=self.headers)

def captcha_login(self, response):

try:

cap_result = json.loads(response.body)['success']

except ValueError:

print('关于验证码的POST请求响应失败!')

else:

if cap_result:

print('验证成功')

post_url = 'https://www.zhihu.com/api/v3/oauth/sign_in'

post_data = {

"client_id": self.client_id,

"username": "******",

"password": "******",

"grant_type": self.grant_type,

"source": self.source,

"timestamp": self.timestamp,

"signature": self.get_signature(self.grant_type, self.client_id, self.source, self.timestamp),

"lang": "en",

"ref_source": "homepage",

"captcha": '',

}

headers = self.headers

headers.update({

'Origin': 'https://www.zhihu.com',

'Pragma': 'no-cache',

'Cache-Control': 'no-cache',

'x-xsrftoken': 'UiFIIz9fMjuytEYZ7VViRIBKZugpWsEK',

'X-Zse-83': '3_1.1',

'x-requested-with': 'fetch',

})

yield scrapy.FormRequest(url=post_url, formdata=post_data, headers=headers, callback=self.check_login)

def check_login(self, response):

# 验证服务器的返回数据判断是否登录成功

text_json = json.loads(response.text)

if "uid" in text_json:

for url in self.start_urls:

yield scrapy.Request(url, dont_filter=True, headers=self.headers)

weixin_39844267

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
python模拟登录百度贴吧_python模拟登录知乎的问题

该楼层疑似违规已被系统折叠隐藏此楼查看此楼headers = {'Connection': 'keep-alive','Host': 'http://www.zhihu.com','Referer': 'https://www.zhihu.com/signup?next=%2F','User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) A...
复制链接

扫一扫