任务:将以下三张图片中左上角的数字导入表格中
001.jpg
002.jpg
003.jpg
1.为了更好的识别,我们首先要把我们需要的区域提取出来,提取结果如下
2.OCR识别,导入表格
具体代码如下:
from PIL import Image
import pytesseract
import os
import xlsxwriter
path = "D:/yuantu/" # 图像读取地址
savepath = "D:/picture/" # 图像保存地址
filelist = os.listdir(path) # 打开对应的文件夹
total_num = len(filelist) # 得到文件夹中图像的个数
workbook = xlsxwriter.Workbook('D:/outcome.xlsx') #存放表格
worksheet = workbook.add_worksheet()
x = 0
y = 0
for i in range(total_num):
jpg_name = path + '00' +str(i+1) + '.jpg' # 图像的读取地址
img = Image.open(jpg_name)
def cropimg():
region = img.crop((0, 0, 200, 200)) ## 裁剪
result_img = region
savejpg_name = savepath + str(i) + '.jpg'
result_img.save(savejpg_name)
text = pytesseract.image_to_string(Image.open(savejpg_name), lang="eng") # 文本识别
worksheet.write(x, y, i+1)
worksheet.write(x, y+1, text)
cropimg()
x=x+1
workbook.close()