利用DecryptLogin和百度文字识别进行知乎登录,但在本项目中对验证码的识别正确率较低,所以可以尝试用其他手段来对验证码进行处理。
import base64
import requests
from DecryptLogin import login
from PIL import Image
'''
百度文字识别api要求
1.图像数据,将图片转为二进制流再进行base64编码后进行urlencode(坑,其实只需要base64编码即可,url编码没什么作用)
2.请求参数需加上access_token
3.请求方法为post
4.请求Header Content-Type:application/x-www-form-urlencoded
'''
#获取百度文字识别的access_token
def get_access_token(client_id,client_secret):
host = 'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=' + client_id + '&client_secret=' + client_secret
response = requests.get(host)
res = response.json()
if response:
return res['access_token']
def getCode(imagepath):
baidu_base_url='https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic'
# 根据API Key和Secret Key先获取access_token
access_token=get_access_token('yourApiKey','yourSecret')
request_url=baidu_base_url + "?access_token=" + access_token
headers = {'content-type': 'application/x-www-form-urlencoded'}
# 直接调用百度文字识别无法识别知乎验证码图片,另存也无法识别,具体原因未知,所以只好先将其截图保存再传给百度api用来识别
im = Image.open(imagepath)
img_size = im.size
region = im.crop((0, 0, img_size[0],img_size[1]))
region.save('D:\\temp\\captcha.png')
#以二进制流形式打开图片
f = open('D:\\temp\\captcha.png', 'rb')
#图片base64编码
img = base64.b64encode(f.read())
params = {"image": img}
response = requests.post(request_url, data=params,headers=headers)
if response:
if (len(response.json()['words_result']) > 0):
print(response.json()['words_result'][0]['words'])
return str(response.json()['words_result'][0]['words'])
if __name__ == "__main__":
lg = login.Login()
infos_return, session = lg.zhihu(username='yourUsername', password='yourpassword', mode='pc',crack_captcha_func=getCode)