# -*- coding: utf-8 -*-
import re
correctfilename = "../data/199801_seg&pos.txt"
segfilename = "../data/seg_BMM.txt"
def evaluate(correct_path=correctfilename, seg_path=segfilename):
correct_sum = 0
seg_sum = 0
correct = 0
allwrong = []
n = 0
with open(correct_path, 'r', encoding='gbk') as f1:
with open(seg_path, 'r', encoding='gbk') as f2:
for x, y in zip(f1.readlines(), f2.readlines()):
n += 1
if re.match('\n', x):
continue
y = y[:-1]
y = str(y).split("/")[:-1]
x = x[:x.find('\n')]
x = re.split('\s+', x)[:-1]
x = [word[:word.find('/')] for word in x]
correct_sum += len(x)
seg_sum += len(y)
x_pos = []
y_pos = []
i = 0
ap = 0
for word in x:
ap += 1
if len(word) > 0 and word[0] == '[':
word = word[1:]
if len(word) > 0 and word[-1] == ']':
word = word[:-1]
x_pos.append((i, i + len(word)))
i += len(word)
i = 0
for word in y:
if len(word) == 0:
continue
y_pos.append((i, i + len(word)))
i += len(word)
m = 0
for pos1 in x_pos:
flag = False
m += 1
for pos2 in y_pos:
if pos1[0] == pos2[0] and pos1[1] == pos2[1]:
correct += 1
flag = True
break
if not flag:
allwrong.append({n: x[m - 1]})
# if n == 10:
# return (x_pos,y_pos,allwrong,space_pos)
precision = float(correct) / seg_sum
recall = float(correct) / correct_sum
print(precision)
print(recall)
print(2 * precision * recall / (precision + recall))
# return (precision,recall)
return correct, correct_sum, seg_sum, allwrong
if __name__ == "__main__":
evaluate()
3.3Evaluate
最新推荐文章于 2024-04-30 19:24:04 发布