import io
import sys
sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='utf-8')
from PIL import Image
import pytesseract
im=Image.open('new_num.jpg')
gray=im.convert('L')
gray.show()
gray.save('new_num_gray.jpg')
threshold=150
table=[]
for i in range(256):
if i <threshold:
table.append(0)
else:
table.append(1)
out=gray.point(table,'1')
out.show()
out.save('new_num_thresholded.jpg')
th=Image.open('new_num_thresholded.jpg')
print(pytesseract.image_to_string(th))
看处理前后的3张图
但是识别的结果 不是很准确。只识别出了‘2’。
然后又试了所有的参数,准确率就好了一丢丢。
import io
import sys
sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='utf-8')
from PIL import Image
import pytesseract
im=Image.open('new_num.jpg')
gray=im.convert('L')
gray.show()
gray.save('new_num_gray.jpg')
threshold=150
table=[]
for i in range(256):
if i <threshold:
table.append(0)
else:
table.append(1)
out=gray.point(table,'1')
out.show()
out.save('new_num_thresholded.jpg')
th=Image.open('new_num_thresholded.jpg')
for i in range(3,14):
str1='--psm '+str(i)+'--oem 3 -c tessedit_char_whitelist=0123456789'
print(pytesseract.image_to_string(th,config=str1))