python模拟登录百度贴吧_python模拟登录知乎的问题

这段代码展示了一个使用Scrapy框架实现的知乎登录爬虫。它首先请求验证码,如果需要,用户将输入验证码。然后,爬虫使用提供的用户名和密码尝试登录。登录过程中涉及签名的生成和HTTP头部的设置。成功登录后,爬虫可以进一步抓取受限内容。
摘要由CSDN通过智能技术生成

该楼层疑似违规已被系统折叠 隐藏此楼查看此楼

headers = {

'Connection': 'keep-alive',

'Host': 'http://www.zhihu.com',

'Referer': 'https://www.zhihu.com/signup?next=%2F',

'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'

}

grant_type = 'password'

client_id = "c3cef7c66a1843f8b3a9e6a1e3160e20"

x_UDID = 'ANCuT28Zjg2PTn2VG48gf99U - sbL76I8EN4 ='

source = 'com.zhihu.web'

timestamp = str(int(time.time() * 1000))

timestamp2 = str(time.time() * 1000)

captcha_url = 'https://www.zhihu.com/api/v3/oauth/captcha?lang=en'

def start_requests(self):

return [scrapy.Request(self.captcha_url, headers=self.headers,

callback=self.login)]

def login(self, response):

need_cap = json.loads(response.body)['show_captcha']

if need_cap:

print("需要验证码")

yield scrapy.Request(url=self.captcha_url, headers=self.headers,

callback=self.captcha, method='PUT')

else:

print("不需要验证码")

post_url = 'https://www.zhihu.com/api/v3/oauth/sign_in'

post_data = {

"client_id": self.client_id,

"username": "******",

"password": "******",

"source": self.source,

"timestamp": self.timestamp,

"signature": self.get_signature(self.grant_type, self.client_id, self.source, self.timestamp),

"lang": "en",

"ref_source": "homepage",

"captcha": '',

}

yield scrapy.FormRequest(url=post_url, formdata=post_data, headers=self.headers, callback=self.check_login)

def get_signature(self, grant_type, client_id, source, timestamp):

"""处理签名"""

hm = hmac.new(b'd1b964811afb40118a12068ff74a12f4', None, sha1)

hm.update(str.encode(grant_type))

hm.update(str.encode(client_id))

hm.update(str.encode(source))

hm.update(str.encode(timestamp))

return str(hm.hexdigest())

def captcha(self, response):

try:

img = json.loads(response.text)['img_base64']

except ValueError:

print('获取img_base64失败')

else:

img = img.encode('utf8')

img_data = base64.b64decode(img)

with open('captcha.jpg', 'wb') as f:

f.write(img_data)

f.close()

captcha = input('请输入验证码:')

post_data = {'input_text': captcha}

yield scrapy.FormRequest(url=self.captcha_url, formdata=post_data, callback=self.captcha_login,

headers=self.headers)

def captcha_login(self, response):

try:

cap_result = json.loads(response.body)['success']

except ValueError:

print('关于验证码的POST请求响应失败!')

else:

if cap_result:

print('验证成功')

post_url = 'https://www.zhihu.com/api/v3/oauth/sign_in'

post_data = {

"client_id": self.client_id,

"username": "******",

"password": "******",

"grant_type": self.grant_type,

"source": self.source,

"timestamp": self.timestamp,

"signature": self.get_signature(self.grant_type, self.client_id, self.source, self.timestamp),

"lang": "en",

"ref_source": "homepage",

"captcha": '',

}

headers = self.headers

headers.update({

'Origin': 'https://www.zhihu.com',

'Pragma': 'no-cache',

'Cache-Control': 'no-cache',

'x-xsrftoken': 'UiFIIz9fMjuytEYZ7VViRIBKZugpWsEK',

'X-Zse-83': '3_1.1',

'x-requested-with': 'fetch',

})

yield scrapy.FormRequest(url=post_url, formdata=post_data, headers=headers, callback=self.check_login)

def check_login(self, response):

# 验证服务器的返回数据判断是否登录成功

text_json = json.loads(response.text)

if "uid" in text_json:

for url in self.start_urls:

yield scrapy.Request(url, dont_filter=True, headers=self.headers)

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值