seqeval.metrics实体识别中准确率、精确率、召回率、f1计算实例


# from .help import flatten_lists
from seqeval.metrics import accuracy_score
from seqeval.metrics import classification_report
from seqeval.metrics import f1_score
from seqeval.metrics import precision_score
from seqeval.metrics import recall_score


def flatten_lists(lists):
    """将list of list 压平成list"""
    flatten_list = []
    for list_ in lists:
        if type(list_) == list:
            flatten_list.extend(list_)
        else:
            flatten_list.append(list_)
    return flatten_list


# 在计算准确率时,我抛弃了标签级准确率,选择了对ner任务更为合理的实体级准确率
def _find_tag(labels, B_label="B-defect",I_label="M-defect", E_label="E-defect", S_label="S-defect"):
    result = []
    lenth = 0
    for num in range(len(labels)):
        if labels[num] == B_label:
            song_pos0 = num
        if labels[num] == B_label and labels[num+1] == E_label:
            lenth = 2
            result.append((song_pos0,lenth))

        if labels[num] == I_label and labels[num-1] == B_label:
            lenth = 2
            for num2 in range(num,len(labels)):
                if labels[num2] == I_label and labels[num2-1] == I_label:
                    lenth += 1
                if labels[num2] == E_label:
                    lenth += 1
                    result.append((song_pos0,lenth))
                    break
        if labels[num] == S_label:
            lenth = 1
            song_pos0 = num
            result.append((song_pos0,lenth))
            
    return result

#桥梁数据集的标签,BIO格式修改为BMES之后的。
tags = [("B-defect","M-defect","E-defect","S-defect"),
        ("B-comp","M-comp","E-comp","S-comp"),
        ("B-action","M-action","E-action","S-action"),
        ("B-action","M-action","E-action","S-action"),
        ("B-Inspec","M-Inspec","E-Inspec","S-Inspec")]

 
def find_all_tag(labels):
    result = {}
    for tag in tags:
        res = _find_tag(labels, B_label=tag[0], I_label=tag[1], E_label=tag[2], S_label=tag[3])
        result[tag[0].split("-")[1]] = res
    return result

def precision(pre_labels,true_labels):
    '''
    计算实体级查准率(精确率),也就是预测出m个实体,其中n个是正确预测的。也就是n/m
    :param pre_tags: list
    :param true_tags: list
    :return:
    '''
    print("评价实体级查准率(精确率)的输入数据为:")
    print(pre_labels)
    print(true_labels)
    pre = []
    pre_labels = flatten_lists(pre_labels) #将二维数组转成一维数组
    true_labels = flatten_lists(true_labels)

    pre_result = find_all_tag(pre_labels) #找到每类实体的起始字的索引和实体长度
    true_result = find_all_tag(true_labels)

    result_dic = {}
    for name in pre_result:
        for x in pre_result[name]:
            if result_dic.get(name) is None:
                result_dic[name] = []
            if x:
                if pre_labels[x[0]:x[0]+x[1]] == true_labels[x[0]:x[0]+x[1]]:
                    result_dic[name].append(1)
                else:
                    result_dic[name].append(0)
        # print(f'tag: {name} , length: {len(result_dic[name])}')
    
    sum_result = 0
    for name in result_dic:
        sum_result += sum(result_dic[name])
        # print(f'tag2: {name} , length2: {len(result_dic[name])}')
        result_dic[name] = sum(result_dic[name]) / len(result_dic[name])

    for name in pre_result:
        for x in pre_result[name]:
            if x:
                if pre_labels[x[0]:x[0]+x[1]] == true_labels[x[0]:x[0]+x[1]]:
                    pre.append(1)
                else:
                    pre.append(0)
    total_precision = sum(pre)/len(pre)
    print("total_precision:",total_precision)
    print("result_dic:", result_dic)
    return total_precision, result_dic

#使用seqeval计算精确率、召回率和F1值的代码
# https://zhuanlan.zhihu.com/p/495414141
'''示例数据
    y_true = [['O', 'O', 'O', 'B-MISC', 'I-MISC', 'I-MISC', 'O'], ['B-PER', 'I-PER', 'O']]
    y_pred = [['O', 'O', 'B-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'O'], ['B-PER', 'I-PER', 'O']]
'''
def getMetrics(pre_labels, true_labels):
    precision=precision_score(true_labels, pre_labels)
    recall=recall_score(true_labels, pre_labels)
    f1=f1_score(true_labels, pre_labels)
    accuracy_sco=accuracy_score(true_labels, pre_labels)
    print(classification_report(true_labels, pre_labels))
    print("precision,recall,f1:",precision,recall,f1)
    print("accuracy_sco:",accuracy_sco)
    return precision,recall,f1




if __name__ == "__main__":
    print("")
    y_true = [['B-comp', 'E-comp', 'O','O', 'S-action', 'S-action', 'B-defect', 'M-defect', 'E-defect', 'S-action'], ['B-comp', 'E-comp',   'O',       'S-action','B-defect', 'E-defect', 'O']]
    y_pred = [['O',         'O',   'O','O', 'S-action', 'O', 'B-defect', 'M-defect', 'E-defect', 'S-action'], ['B-comp', 'E-comp', 'S-action',  'O',         'O',    'B-defect', 'O']]
    '''
    真实的标注数据中:comp实体有2个,defect实体有2个。action实体有4个。总共有8个实体。也就是support:当前行的类别在测试数据中的样本总量。这个指标对于评估模型性能非常重要,因为如果某个类别的 support 很小,则评估指标(如精确率和召回率等)可能会变得不稳定,从而难以准确地评估模型在该类别上的性能。通常情况下,较大的 support 意味着更可靠的评估结果。
    预测action实体有3个,只预测对2个,精确率(Precision)为2/3=0.66666,召回率为2/4=0.5,f1-score=0.5711
    预测数据中,预测comp实体有1个,且这1个comp实体预测正确了; 对于comp实体,查准率或叫精确率(Precision)为1。实际上comp实体有2个,只正确预测出1个,召回率为0.5,f1-score=0.67
    预测defect实体有2个(将含有bme标签的连续词语或孤立的字也作为是预测出的实体),但是只预测对1个defect实体,精确率(Precision)为0.5,召回率为0.5,f1-score=0.5    
    也即:seqeval.metrics,总体上的准确度是针对标签级的,不是实体级,预测标签与真实标签匹配数目(10/真实数据中的标签总数(17=0.05882
    此外,总共预测了6个实体,其中4个是正确的,所以精确率为4/6=0.666。总体上是有8个实体,正确预测出4个,总体召回率为0.5;总体上的f1-score=0.5714
    '''
    getMetrics(y_pred,y_true)
    '''
              precision    recall  f1-score   support

      action       0.67      0.50      0.57         4
        comp       1.00      0.50      0.67         2
      defect       0.50      0.50      0.50         2

   micro avg       0.67      0.50      0.57         8
   macro avg       0.72      0.50      0.58         8
weighted avg       0.71      0.50      0.58         8

precision,recall,f1: 0.6666666666666666 0.5 0.5714285714285715
accuracy_sco: 0.5882352941176471  
    '''
    print("===============")
    precision(y_pred,y_true)
    '''
    total_precision: 0.8   (没有把'B-defect'作为1个defect实体)
    result_dic: {'defect': 1.0, 'comp': 1.0, 'action': 0.6666666666666666}
    '''


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值