验证码识别
- 学习使用云打码识别图形验证码。
- 学会把验证码识别技术应用到爬虫中。
使用云打码平台
from yundama import YDMHttp
ydm = YDMHttp(username='wuyuli',password='qingyao520yuli')
uid = ydm.login()
if not uid:
print("登录失败")
balance = ydm.balance()
if balance<10:
print("余额不足")
_,result = ydm.decode(filename='getimage.jpg',codetype='3304')
print(result)
自动登录v2ex
# 1.因为用户名和密码和验证码的name都是随机的,所以我们要先获取网页源代码,然后去除其中的name值
# 2.获取到了name和once的值后,再通过调用https://www.v2ex.com/signin接口,把数据通过post请求发送过去
# 3.利用云打码平台去自动识别验证码。
import requests
from lxml import etree
from yundama import YDMHttp
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.116 Safari/537.36',
'referer': 'https://www.v2ex.com/',
}
url = "https://www.v2ex.com/signin"
sess = requests.Session() #当执行后session会自动将cookie存放起来,等到下一次访问时会携带此cookie一起去访问
resp = sess.get(url,headers=headers)
html = resp.text
parse = etree.HTML(html)
inputs = parse.xpath("//form[@action='/signin']//input")
user_input = inputs[0] #用户名
password_input = inputs[1] #密码
captcha_input = inputs[2] #验证码
once_input = inputs[3] #once
user_name = user_input.get('name')
password_name = password_input.get('name')
captcha_name = captcha_input.get('name')
once_value = once_input.get('value')
#下载验证码图片
captcha_url = 'https://www.v2ex.com/_captcha?once=' + once_value
print(captcha_url)
img_resp = sess.get(captcha_url,headers=headers)
with open('captcha.png','wb') as f:
f.write(img_resp.content)
#接入云打码平台
ydm = YDMHttp(username='*******',password='**********')
uid = ydm.login()
if not uid:
print("登录失败")
balance = ydm.balance()
print(balance)
if balance<10:
print("余额不足")
_,result = ydm.decode(filename='getimage.jpg',codetype='3304')
print(result)
data = {
user_name:'wuyuli',
password_name:'123456',
captcha_name:result,
"once":once_value,
"next":'/'
}
sess.post(url,headers=headers,data=data) #登录操作