import json
import requests
import cv2 as cv
import os
import re
import random
import glob as gb
import time
'''
(1)json.dumps()函数是将一个Python数据类型列表进行json格式的编码(可以这么理解,json.dumps()函数是将字典转化为字符串)
(2)json.loads()函数是将json格式数据转换为字典(可以这么理解,json.loads()函数是将字符串转化为字典)
'''
def post_requests(url, data, file_path):
files = {'file': open(str(file_path), 'rb')}
angle_result = requests.post(url, files=files, data=data)
# angle_result_utf = angle_result.content.decode('utf-8')
angle_result_utf = angle_result.text
# print(json.loads(angle_result_utf))
# print(angle_result_utf)
return json.loads(angle_result_utf)
def get_images(root):
'''
get images's path and name
'''
files = []
for ext in ['jpg', 'png', 'jpeg', 'JPG']:
files.extend(gb.glob(os.path.join(root, '*.{}'.format(ext))))
name = []
for i in range(len(files)):
name.append(files[i].split('/')[-1])
return files, name
def img_path_parse(data_path):
# class_names = os.listdir(data_path)
img_path = []
class_dir_path = os.path.join(data_path)
# c. 获取子文件夹中的所有图像名称
image_names = os.listdir(class_dir_path)
# d. 遍历所有图像,分别提取图像特征信息
for image_name in image_names:
# 1. 获取图像的具体路径
image_path = os.path.join(class_dir_path, image_name)
#image_name = image_name.split('.')[-2].split('/')[-1]
# 2. 基于图像路径获取图像的特征数据,要求格式为: [features,]
img_path.append(image_path)
random.shuffle(img_path)
return img_path
def readFilename(path, allfile):
filelist = os.listdir(path)
for filename in filelist:
filepath = os.path.join(path, filename)
if os.path.isdir(filepath):
readFilename(filepath, allfile)
else:
allfile.append(filepath)
return allfile
def imglist(path):
img = []
label = []
for file in os.listdir(path):
file_img = os.path.join(path, file)
if os.path.isdir(file_img):
img = img + os.listdir(file_img)
num = len(img)
for i in range(num):
imgs = img[i]
#name1 = imgs.split(".")[-2] # 获取后缀之前的元素
name1 = imgs.split('.')[-2].split('/')[-1]
name2 = name1.split('%')[-1] # 获取标签
label = label + [int(name2)]
assert len(img) == len(label)
#print("train img:", len(img), "......train label", len(label))
if __name__ == '__main__':
#img_path = "D:\\.....\\test1.jpg"
#img_path = "E:\\trade\\贸易合同工期页\\"
data_path = "E:\\trade\\贸易合同价款页"
image_path = img_path_parse(data_path)
#print(image_path)
# for fname in os.listdir(img_path):
# if '.jpg' in fname or '.JPG' in fname or '.png' in fname:
# img_path = os.path.join(img_path, fname)
for name in image_path:
angle_result_utf = post_requests("http://172.16.:9009/icr/......", {}, name)
#print(img_path)
allfile1 = []
allfile1 = readFilename(data_path, allfile1)
allname1 = []
# print(name)
name = name
t = name.split(".")[0].split("\\")[-1]
t1 = allname1.append(t)
#print(t)
i = 0
# file_handle = open('./{}.txt'.format(t), mode='a', encoding = 'utf-8')
with open('E:/trade/gts/贸易合同价款页/{}.txt'.format(t), mode='a', encoding='utf-8') as f:
for t in angle_result_utf["document"]["blocks"]:
data = angle_result_utf["document"]["blocks"][i]["lines"][0]["text"]
boxes = angle_result_utf["document"]["blocks"][i]["lines"][0]["position"]["vertices"]
box2 = ", ".join(repr(e) for e in boxes)
# print(box2)
box2 = re.sub("\D", ' ', box2)
box2 = ','.join(x for x in box2.split() if x)
strl = box2 + ',' + data
i = i + 1
f.write(strl + '\n')
# print(strl)
# print(i)
# f.close()
调用http接口ocr返回文本信息提取json字段写入txt
最新推荐文章于 2022-11-04 21:29:28 发布