话不多说,大人上码ლ(′◉❥◉`ლ)!!!
# coding = utf-8
import requests
import pytesseract
from PIL import Image
class checkcode():
def __init__(self): # 初始化参数
self.start_url = 'http://jxjy.dwjtaq.com/random.xhtml'
self.headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) '
'AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/62.0.3202.62 Safari/537.36'
}
def parse(self): # 请求数据
ret = requests.get(self.start_url, headers=self.headers)
return ret.content
def save_code(self, content): # 保存验证码
with open('./code.png', 'wb') as f:
f.write(content)
def check_code(self):
image = Image.open('./code.png')
text = pytesseract.image_to_string(image)
return text
def run(self):
for i in range(10):
ret = self.parse()
self.save_code(ret)
text = self.check_code()
print("#")
print(text)
if __name__ == '__main__':
start = checkcode()
start.run()
这个验证码非常规则,所以识别率100%(吹牛,我自己都不信)
此上代码思路:直接请求验证码,转化为图片进行识别
步骤:
安装两个模块pytessract、PIL
import pytesseract
from PIL import Image
关键语句:
Image.open('./code.png') # 载入图片
pytesseract.image_to_string(image) #图片转化为文本输出
以上案例只能参考学习,无深度仅供入门