! user/bin/env python
-- coding: utf-8 -
from PIL import Image,ImageEnhance
import pytesseract
import sys,time
import StringIO
reload(sys)
sys.setdefaultencoding('utf-8')
rep={'':'' }; #根据识别验证码的种类,采用该表进行修正
def get_code():
img = Image.open("验证码图片路径") # PIL库加载图片
box=(1109,340,1171,361)
box=(841,340,901,361)
img =img.crop(box)
img = img.convert('RGBA') # 转换为RGBA
pix = img.load() # 读取为像素
for x in range(img.size[0]): # 处理上下黑边框
pix[x, 0] = pix[x, img.size[1] - 1] = (255, 255, 255, 255)
for y in range(img.size[1]): # 处理左右黑边框
pix[0, y] = pix[img.size[0] - 1, y] = (255, 255, 255, 255)
for y in range(img.size[1]): # 二值化处理,这个阈值为R=95,G=95,B=95
for x in range(img.size[0]):
if pix[x, y][0] < 95 or pix[x, y][1] < 95 or pix[x, y][2] < 95:
pix[x, y] = (0, 0, 0, 255)
else:
pix[x, y] = (255, 255, 255, 255)
img.save("新生成图片保存路径") # 由于tesseract限制,这里必须存到本地文件
im = Image.open("新生成图片路径")
im.show()
text=pytesseract.image_to_string(img)
for r in rep:
pic = text.replace(r, rep[r])
return pic
if name == 'main':
code=get_code()
print code