通过分析百度验证码的整个逻辑,我们可以了解到其验证过程包括了请求重定向和验证码验证两个主要步骤。我们将通过Python代码实现逆向打码过程。
获取验证参数
首先,我们需要获取验证码验证过程所需的参数,包括as、ds和tk。这些参数可以通过请求获取。
def get_verification_parameters():
url = "https://passport.baidu.com/viewlog"
params = {
"callback": "jQuery110205449684422426735_" + str(int(time.time() * 1000)),
"ak": "33c48884b7df83d4230e07cbcd0d07fd",
"_": str(int(time.time() * 1000))
}
response = session.get(url, headers=headers, params=params)
res_data = re.findall(r'.*?(\{.*?})\)', response.text)[0]
res_data = json.loads(res_data)
parameters = {
"tk": res_data['data']['tk'],
"as": res_data['data']['as'],
"ds": res_data['data']['ds']
}
return parameters
获取验证码图片
然后,我们需要获取验证码图片,并解析其中的内容。
def get_verification_image(parameters):
url = "https://passport.baidu.com/viewlog/getstyle"
params = {
"callback": "jQuery110205449684422426735_" + str(time.time() * 1000),
"ak": '3de47787fd60b30420f868ffbf4dbccd',
"tk": parameters["tk"],
"isios": "0",
"type": "spin",
"_": str(time.time() * 1000)
}
response = session.get(url, headers=headers, params=params)
ret_data = re.findall(r'.*?(\{.*?})\)', response.text)[0]
ret_data = json.loads(ret_data)
verification_image = {
"image_url": unquote(ret_data['data']['ext']['img']),
"backstr": ret_data['data']['backstr'],
"tk": parameters["tk"],
"as": parameters["as"]
}
response = session.get(verification_image['img_url'], verify=False)
with open('verification_image.png', 'wb')as f:
f.write(response.content)
return verification_image
打码
我们将获取到的验证码图片提交给打码平台进行识别。
def solve_captcha(verification_image):
with open('verification_image.png', 'rb') as f:
image_content = f.read()
image_b64 = base64.b64encode(image_content)
# 调用打码API
captcha_solution = dama_api(image_b64)
verification_image['angle'] = captcha_solution
return verification_image
验证
最后,我们将获取到的验证结果提交给百度服务器进行验证。
def verify_captcha(verification_image):
url = "https://passport.baidu.com/viewlog"
fs = build_fs(int(verification_image['angle']), verification_image['as'], verification_image['backstr'])
params = {
"callback": "jQuery110204100787474351779_" + str(time.time() * 1000),
"ak": "3de47787fd60b30420f868ffbf4dbccd",
"as": verification_image['as'],
"fs": fs,
"tk": verification_image['tk'],
"cv": "submit",
"_": str(time.time() * 1000)
}
response = session.get(url, headers=headers, params=params)
ret_data = re.findall(r'.*?(\{.*?})\)', response.text)[0]
ret_data = json.loads(ret_data)
return ret_data
主函数
将上述步骤整合到主函数中。
def main():
parameters = get_verification_parameters()
verification_image = get_verification_image(parameters)
verification_image = solve_captcha(verification_image)
verification_result = verify_captcha(verification_image)
if verification_result['data']['op'] == 1:
print("验证通过")
else:
print("验证失败")
if __name__ == "__main__":
main()
更多内容联系q1436423940