pre = "0 0 B_SONG I_SONG I_SONG 0 B_SONG I_SONG I_SONG 0 0 B_SINGER I_SINGER I_SINGER 0 O O O B_ALBUM I_ALBUM I_ALBUM O O B_TAG I_TAG I_TAG O"
true = "0 0 B_SONG I_SONG I_SONG 0 0 0 0 0 0 B_SINGER I_SINGER I_SINGER 0 O O O B_ALBUM I_ALBUM I_ALBUM O O B_TAG I_TAG I_TAG O"
# 一个括号代表一个实体整体
tags = [("B_SONG","I_SONG"),("B_SINGER","I_SINGER"),("B_ALBUM","I_ALBUM"),("B_TAG","I_TAG")]
def _find_tag(labels,B_label="B_SONG",I_label="I_SONG"):
result = []
if isinstance(labels,str):
labels = labels.strip().split()
labels = ["O" if label =="0" else label for label in labels]
# print(labels)
for num in range(len(labels)):
if labels[num] == B_label:
#起始位置
song_pos0 = num
if labels[num] == I_label and labels[num-1] == B_label:
#实体长度
lenth = 2
for num2 in range(num,len(labels)):
if labels[num2] == I_label and labels[num2-1] == I_label:
lenth += 1
if labels[num2] == "O":
result.append((song_pos0,lenth))
break
# result = [(起始位置,实体长度),(起始位置,实体长度)...]
return result
def find_all_tag(labels):
result = {}
for tag in tags:
res = _find_tag(labels,B_label=tag[0],I_label=tag[1])
result[tag[0].split("_")[1]] = res
# result = {每个实体的类别名称:res,...}
# res = [(起始位置,实体长度),(起始位置,实体长度)...]
return result
def precision(pre_labels,true_labels):
'''
:param pre_tags: list
:param true_tags: list
:return:
'''
pre = []
# 字符串标注切片成list
if isinstance(pre_labels,str):
pre_labels = pre_labels.strip().split()
pre_labels = ["O" if label =="0" else label for label in pre_labels]
if isinstance(true_labels,str):
true_labels = true_labels.strip().split()
true_labels = ["O" if label =="0" else label for label in true_labels]
# pre_labels
pre_result = find_all_tag(pre_labels)
# 遍历每个实体类别
for name in pre_result:
#遍历当前实体类别的位置
for x in pre_result[name]:
if x:
# 相对位置的实体 真假判断
if pre_labels[x[0]:x[0]+x[1]] == true_labels[x[0]:x[0]+x[1]]:
pre.append(1)
else:
pre.append(0)
return sum(pre)/len(pre)
def recall(pre_labels,true_labels):
'''
:param pre_tags: list
:param true_tags: list
:return:
'''
recall = []
if isinstance(pre_labels,str):
pre_labels = pre_labels.strip().split()
pre_labels = ["O" if label =="0" else label for label in pre_labels]
if isinstance(true_labels,str):
true_labels = true_labels.strip().split()
true_labels = ["O" if label =="0" else label for label in true_labels]
# true_labels
true_result = find_all_tag(true_labels)
for name in true_result:
for x in true_result[name]:
if x:
if pre_labels[x[0]:x[0]+x[1]] == true_labels[x[0]:x[0]+x[1]]:
recall.append(1)
else:
recall.append(0)
return sum(recall)/len(recall)
def f1_score(precision,recall):
return (2*precision*recall)/(precision+recall)
if __name__ == '__main__':
precision = precision(pre,true)
recall = recall(pre,true)
f1 = f1_score(precision,recall)
print(precision)
print(recall)
print(f1)
NER实体级评估: precision、recall和F1
最新推荐文章于 2023-06-07 16:49:55 发布