字错误率(cer)
使用于中文等不以空格为分隔的语言
import evaluate
metric = evaluate.load("cer")
print(metric.compute(predictions=['你吃了吗', '今天我要去打篮球'], references=["我吃了么a" , '明天我要去打篮球']))
词错误率(wer)
import evaluate
metric = evaluate.load("wer")
print(metric.compute(predictions=['a b c', 'd e f'], references=['a b c', '1 2 3']))
获取最优f1
def find_best_f1_and_threshold(scores, labels, ):
"""
:param scores: 正样本概率
:param labels: 真实标签
:return:
"""
assert len(scores) == len(labels)
scores = np.asarray(scores)
labels = np.asarray(labels)
rows = list(zip(scores, labels))
rows = sorted(rows, key=lambda x: x[0], reverse=True)
best_f1 = best_precision = best_recall = 0
threshold = 0
nextract = 0
ncorrect = 0
total_num_duplicates = sum(labels)
for i in range(len(rows) - 1):
score, label = rows[i]
nextract += 1
if label == 1:
ncorrect += 1
if ncorrect > 0:
precision = ncorrect / nextract
recall = ncorrect / total_num_duplicates
f1 = 2 * precision * recall / (precision + recall)
if f1 > best_f1:
best_f1 = f1
best_precision = precision
best_recall = recall
threshold = (rows[i][0] + rows[i + 1][0]) / 2
return best_f1, best_precision, best_recall, threshold
寻找最优accuracy
from sklearn.metrics import accuracy_score
def find_best_accuracy_and_threshold(scores, labels, ):
"""
:param scores: 正样本概率
:param labels: 真实标签
:return:
"""
best_accuracy = 0
best_th = 0
for th in scores:
pre_label = [1 if score >= th else 0 for score in scores]
accuracy = accuracy_score(labels, pre_label)
if accuracy > best_accuracy:
best_accuracy = accuracy
best_th = th
return best_accuracy, best_th