该代码的背景是统计OCR识别率,按单个文字为统计个数,而不是一张图片为统计个数。
现在只能统计英文,利用difflib,有些情况也有问题,各位需要自己测试自己需要的例子
import difflib
def count_error(text1,text2):
text1_lines = text1.splitlines()
text2_lines = text2.splitlines()
d = difflib.Differ()
diff = d.compare(text1_lines,text2_lines)
result = '\n'.join(diff)
# print(result)
if len(result.split('\n')) == 1:
return 0
if len(result.split('\n')) == 2:
return len(text1)
flag = True
sum_error = 0
for line in result.split('\n'):
# print(line)
if len(line) == 0:
continue
elif list(line)[0] == '?':
sum_error += line.count('-')+ line.count('+')
if flag:
sum_error += line.count('^')
flag = False
return sum_error
text1 = "abcdefg"
text2 = "abcdedg"
print(count_error(text1,text2))