python识别图片文字_Python识别图片中的文字

1 importos,glob2 defphoto_compression(original_imgage,tmp_image_path):3 '''图片备份、压缩;param original_imgage:原始图片路径;param tmp_imgage_path:临时图片路径,备份路径;return'''

4 from PIL importImage5 img =Image.open(original_imgage)6 width,height =img.size7 while (width*height>4000000):#该数值压缩后的图片大约200多k

8 width = width//2

9 height = height//2

10 e_img =img.resize((width,height),Image.BILINEAR)11 save_path =os.path.join(tmp_image_path,os.path.basename(original_imgage))12 e_img.save(save_path)13 returnsave_path14

15 defocr(original_image):16 '''使用百度OCR进行文字识别,支持JPG、JPEG、PNG、BMP格式;param original_image:待识别图片;return'''

17 from aip importAipOcr18 filename =os.path.basename(original_image)19 #输入自己的百度ai账号ID密码:参考链接:https://m.toutiaocdn.com/i6704242394566492684/

20 APP_ID = '******'

21 API_KEY = '*******'

22 SECRECT_KEY = '*********'

23

24 client =AipOcr(APP_ID,API_KEY,SECRECT_KEY)25

26 with open(original_image,'rb') as picfile_read:27 img =picfile_read.read()28 print('正在识别图片:{0}......'.format(filename))29 try:30 result = client.basicGeneral(img)#通用文字识别,50000次/天免费

31 except:32 result = client.basicAccurate(img)#通用文字识别(高精度版),500次/天免费

33 returnresult34

35 def run_ocr(original_image,tmp_image_path,result_file_path='identify_results.txt'):36 '''主函数 批量执行图片文本识别,结果存储;original_image:原始图片;tmp_image_path:临时图片;result_file_path:识别文字存储文件;return'''

37

38 if os.path.exists(result_file_path):#判断是否存在历史识别结果,若存在则删除

39 os.remove(result_file_path)40 if not os.path.exists(tmp_image_path):#判断临时图片路径是否存在,若不存在则创建

41 os.mkdir(tmp_image_path)42 tmp_file_path = []#临时文件路径列表

43 for picfile in glob.glob(original_image):#glob.glob的参数是一个只含有方括号、问号、正斜线的正则表达式

44 tmp_file =photo_compression(picfile,tmp_image_path)45 tmp_file_path.append(tmp_file)46 for picfile in tmp_file_path:#遍历所有文件,进行OCR识别 结果存储

47 result =ocr(picfile)48 lines = [text.get('words') + '\n' for text in result.get('words_result')]49 #lines = [text.get('words').encode('utf-8')+'\n' for text in result.get('words_result')]

50

51 with open(result_file_path,'a+',encoding='utf-8') as fo:52 fo.writelines("="*100+'\n')53 fo.writelines("【识别图片】:{0} \n".format(os.path.basename(picfile)))54 fo.writelines("【文本内容】: \n")55 fo.writelines(lines)56 os.remove(picfile)57

58 if __name__ == '__main__':59 tmp_image_path = os.getcwd()+'\\tmp'

60 original_image = os.getcwd() + '\\*.png'

61 run_ocr(original_image,tmp_image_path)

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值