本文是使用PIL+pytesseract识别简单验证码,目的是要识别红色字体,思路如下:
话不多说,直接上代码
"""
# author: ****
# date : 2018.7.10
# desc : 图片验证码识别
"""
import requests
from PIL import Image
import pytesseract
import os
img_type = ".png"
default_name = "verifying_code" + img_type
default_out_name = "test_" + "verifying_code" + img_type
fp = "image/"
def downloads_pic(pic_name=default_name):
url = 'http://www.bhi.com.cn/Public/Isvalid.ashx'
res = requests.get(url, stream=True)
with open("image/" + pic_name, 'wb') as f:
for chunk in res.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
f.flush()
f.close()
def img_to_str(image_name=default_out_name):
"""
二值化的图片识别,返回字符串
:param image_name: 图片名称
:return: 验证码字符串
"""
bname = fp + 'b' + image_name
pytesseract.pytesseract.tesseract_cmd = 'D:\\Program Files\\Tesseract-OCR\\tesseract.exe'
tessdata_dir_config = '--tessdata-dir "D:\\Program Files\\Tesseract-OCR\\tessdata"'
text = pytesseract.image_to_string(Image.open(bname), config=tessdata_dir_config)
r_text &