使用 tesseract 技术,练习验证码识别技术
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import pytesseract
import urllib
import urllib.request
from PIL import Image
url = 'https://so.gushiwen.org/RandCode.ashx'
urllib.request.urlretrieve(url=url,filename='./captcha.jpg')
image = Image.open('./captcha.jpg')
image.show()
# # 识别之前修改这张图片TODO
def deal_captcha(image):
# 黑白的图片
image = image.convert('L')
# 图片中的数据二维数组[[209,156,……],[],[]]
data = image.load()
w,h= image.size
# 颜色范围0~255
# 255纯白
# 0纯黑
for i in range(w):
for j in range(h):
if data[i,j] > 100:
data[i,j] = 255
else:
data[i,j] = 0
return image
image = deal_captcha(image)
str = pytesseract.image_to_string(image)
print(str)