def eval_sighan2015_by_model(sighan_path):
"""
Args:
correct_fn:
input_eval_path:
output_eval_path:
verbose:
Returns:
Acc, Recall, F1
"""
TP = 0.0
FP = 0.0
FN = 0.0
TN = 0.0
total_num = 0
start_time = time.time()
with open(sighan_path, 'r', encoding='utf-8') as f:
change_num = 0
for line in f:
src, tgt_pred, tgt = line.strip().split(" ")
if src != tgt_pred:
change_num += 1
# 负样本
if src == tgt:
# 预测也为负
if tgt == tgt_pred:
TN += 1 # 预测为正
else:
FP += 1
# 正样本
else:
# 预测也为正
if tgt == tgt_pred:
TP += 1 # 预测为负
else:
FN += 1
total_num += 1
spend_time = time.time() - start_time
acc = (TP + TN) / total_num
# precision = TP / (TP + FP) if TP > 0 else 0.0
# 用这个p值会高很多
precision = TP / change_num if TP > 0 else 0.0
# 用这个指标p值会低很多
recall = TP / (TP + FN) if TP > 0 else 0.0
f1 = 2 * precision * recall / (precision + recall) if precision + recall != 0 else 0
print(
f'Sentence Level: acc:{acc:.4f}, precision:{precision:.4f}, recall:{recall:.4f}, f1:{f1:.4f}, cost time:{spend_time:.2f} s')
return acc, precision, recall, f1
中文错别字纠正评价代码
最新推荐文章于 2023-07-04 14:18:01 发布