我们想识别图像中对我们有用的评论,所以需要卡一个阈值来仅仅获得对我们有用的信息
import easyocr
# 创建reader对象
import json
reader = easyocr.Reader(['en'])
result_list = reader.readtext('review2.png')
def use_result_list_to_recentangle(result_list):
pos_list = []
text_list = []
for sample in result_list:
if int(sample[0][1][0])>560:
pos_list.append(sample[0])
text_list.append(sample[1])
return pos_list,text_list
def get_important_information(pos_list,text_list):
tmp_sample = {}
content = ''
all_samples = []
flag = 'review_background'
for index in range(len(text_list)):
if 'Reviewed in the' in text_list[index]:
place = text_list[index]
title = text_list[index-1]
tmp_sample['title'] = title
tmp_sample['place'] = place
if 'Color' in text_list[index]:
color = text_list[index]
tmp_sample['color'] = color
if flag == 'content':
content += text_list[index]
if 'Purchase' in text_list[index]:
Verify_purchase = text_list[index]
tmp_sample['Verify_purchase'] = Verify_purchase
flag = 'content'
if 'Helpful' == text_list[index]:
flag = 'review_background'
tmp_sample['content'] = content
all_samples.append(tmp_sample)
content = ''
tmp_sample = {}
return all_samples
pos_list,text_list = use_result_list_to_recentangle(result_list)
all_samples = get_important_information(pos_list,text_list)
json_path = '/cloud/cloud_disk/users/huh/nlp/vision-reptile/vision_reptile/data/result.json'
out_file = open(json_path, "w")
json.dump(all_samples, out_file, indent=6)