图形验证码、极验验证码
(1)手动输入
(2)光学识别 OCR
毕竟是机器,一般识别率 80% 仅限于简单的图形验证码
指令识别,tesseract 图片名字 lala 0%
代码识别,60%
pip install pytesseract
pip install pillow
(3)打码平台
云打码
import requests
from bs4 import BeautifulSoup
import urllib.request
import pytesseract
from PIL import Image
from PIL import ImageEnhance
import time
def shibie(imagepath):
# 打开图片
img = Image.open(imagepath)
img = img.convert('RGB')
enhancer = ImageEnhance.Color(img)
enhancer = enhancer.enhance(0)
enhancer = ImageEnhance.Brightness(enhancer)
enhancer = enhancer.enhance(2)
enhancer = ImageEnhance.Contrast(enhancer)
enhancer = enhancer.enhance(8)
enhancer = ImageEnhance.Sharpness(enhancer)
img = enhancer.enhance(20)
# 转化为灰度图片
img = img.convert('L')
# 二值化处理
threshold = 140
table = []
for i in range(256):
if i < threshold:
table.append(0)
else:
table.append(1)
out = img.point(table, '1')
return pytesseract.image_to_string(img)
# 要使用会话。
s = requests.Session()
i = 1
while 1:
url = 'https://so.gushiwen.org/user/login.aspx?from=http://so.gushiwen.org/user/collect.aspx'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36',
}
r_get = s.get(url=url, headers=headers)
soup = BeautifulSoup(r_get.text, 'lxml')
# 提取验证码图片的src属性
image_src = 'https://so.gushiwen.org' + soup.select('#imgCode')[0]['src']
# 将图片下载到本地
# urllib.request.urlretrieve(image_src, 'code.png')
r_image = s.get(url=image_src, headers=headers)
with open('code.png', 'wb') as fp:
fp.write(r_image.content)
# 获取表单隐藏框里面的数据
views = soup.select('#__VIEWSTATE')[0]['value']
viewg = soup.select('#__VIEWSTATEGENERATOR')[0]['value']
# 让用户输入验证码
code = shibie('code.png')
# 发送post请求
post_url = 'https://so.gushiwen.org/user/login.aspx?from=http%3a%2f%2fso.gushiwen.org%2fuser%2fcollect.aspx'
formdata = {
'__VIEWSTATE': views,
'__VIEWSTATEGENERATOR': viewg,
'from': 'http://so.gushiwen.org/user/collect.aspx',
'email': '1090509990@qq.com',
'pwd': '123456',
'code': code,
'denglu': '登录',
}
r_post = s.post(url=post_url, headers=headers, data=formdata)
# 判断有没有登录成功
if '退出登录' in r_post.text:
print('恭喜你第--%s--次登录成功' % i)
break
print('不要灰心,这是你第--%s--次失败' % i)
i += 1
time.sleep(2)