import os
import pytesseract
from PIL import Image
import chardet
import os
import urllib.request
import uuid
import pymssql
import threading
import time
import decimal
decimal.__version__
file_path = 'd:/img/'
def get_date(id):
try:
#数据库获取数据集
except Exception as ex:
print(ex)
def downimg(image_url):
try:
if not os.path.exists(file_path):
os.makedirs(file_path)
filename = '{}{}{}'.format(file_path, uuid.uuid1(),'.jpg')
urllib.request.urlretrieve(image_url, filename=filename)
return filename
except IOError as e:
print(1, e)
return ''
except Exception as e:
print(2, e)
return ''
def mycensor(url,id):
filepath=downimg(url)
BASE_DIR = os.path.dirname(__file__)
zh_img = os.path.join(BASE_DIR, filepath)
zh = pytesseract.image_to_string(Image.open(zh_img), lang="chi_sim")
print(zh)
with open("123.txt", "r",encoding='utf-8') as f:
data = f.readlines()
result=0
code=""
for line in data:
if line.replace(" ", "").replace('\n', '').replace('\r', '').encode('utf-8').decode('utf-8') in zh:
result=1
code=line
print(url)
break
#print("result", str(result),"code",str(code))
#if os.path.exists(filepath):
#os.remove(filepath)
id=1
while id<100:
results=get_date(1)
print(str(results))
id=1000
for row in results:
mycensor(str(row[2]), 1)
python 图片的文字识别
最新推荐文章于 2023-12-11 10:24:04 发布