论文辅助笔记：LLM-Mob metric测量

最新推荐文章于 2024-06-12 20:49:18 发布

UQI-LIUWJ

最新推荐文章于 2024-06-12 20:49:18 发布

阅读量272

点赞数 7

分类专栏：论文笔记 python库整理文章标签：笔记 numpy

本文链接：https://blog.csdn.net/qq_40206371/article/details/138188543

版权

python库整理同时被 2 个专栏收录

325 篇文章 48 订阅

订阅专栏

论文笔记

285 篇文章 73 订阅

订阅专栏

本文介绍了如何使用Python计算和评估模型的性能，包括Top1、Top5和Top10预测的准确性（Acc@k）、加权F1-score以及归一化折扣累积增益（NDCG）。方法涉及读取CSV文件，处理数据，以及使用sklearn库进行计算。

摘要由CSDN通过智能技术生成

0 导入库

import os
import pandas as pd
from sklearn.metrics import f1_score
import ast
import numpy as np

1 基本的metric计算方式

1.1 get_acc1_f1

def get_acc1_f1(df):
    #计算top1 prediction的准确度和f1 score
    acc1 = (df['prediction'] == df['ground_truth']).sum() / len(df)
    f1 = f1_score(df['ground_truth'], df['prediction'], average='weighted')
    #根据支持度（每个标签的真实实例数）加权平均
    return acc1, f1

1.2 get_is_correct

def get_is_correct(row):
    #计算ground truth是否在top k prediction内
    pred_list = row['prediction']
    if row['ground_truth'] in pred_list:
        row['is_correct'] = True
    else:
        row['is_correct'] = False
    
    return row

1.3 get_is_correct10

def get_is_correct10(row):
    #计算ground truth是否在top 10，top 5， top 1 prediction内
    pred_list = row['top10']
    if row['ground_truth'] in pred_list:
        row['is_correct10'] = True
    else:
        row['is_correct10'] = False
        
    pred_list = row['top5']
    if row['ground_truth'] in pred_list:
        row['is_correct5'] = True
    else:
        row['is_correct5'] = False

    pred = row['top1']
    if pred == row['ground_truth']:
        row['is_correct1'] = True
    else:
        row['is_correct1'] = False
    
    return row

1.4 first_nonzero

def first_nonzero(arr, axis, invalid_val=-1):
    mask = arr!=0
    return np.where(mask.any(axis=axis), mask.argmax(axis=axis), invalid_val)
#在给定轴上找到数组中第一个非零元素的索引。如果没有非零元素，则返回一个无效值

1.5 get_ndcg

#计算归一化折扣累积增益（NDCG），这是评估排名质量的一种方式，尤其用于推荐系统和信息检索
def get_ndcg(prediction, targets, k=10):
    """
    Calculates the NDCG score for the given predictions and targets.

    Args:
        prediction (Nxk): list of lists. the softmax output of the model.
        targets (N): torch.LongTensor. actual target place id.

    Returns:
        the sum ndcg score
    """
    for _, xi in enumerate(prediction):
        #首先遍历prediction列表中的每个子列表xi
        if len(xi) < k:
            xi += [-5 for _ in range(k-len(xi))]
            #如果xi的长度小于k，则将其通过添加特定值（-5）扩展到k的长度
        elif len(xi) > k:
            xi = xi[:k]
            #如果长度大于k，则截断至k
        else:
            pass
    '''
    确保每个预测列表都有k个元素，方便后续操作
    '''
    
    
    n_sample = len(prediction)
    
    prediction = np.array(prediction)
    
    targets = np.broadcast_to(targets.reshape(-1, 1), prediction.shape)
    #targets被重塑并广播到与prediction相同的形状，以便可以逐元素比较
    
    hits = first_nonzero(prediction == targets, axis=1, invalid_val=-1)
    #调用first_nonzero函数，该函数返回prediction中与targets相等的元素的第一个索引位置
    #如果没有匹配的元素，则返回一个预先设定的无效值（-1）
    
    hits = hits[hits>=0]
    ranks = hits + 1
    #计算每个有效命中的排名（即索引位置加1，因为索引是从0开始的
    
    ndcg = 1 / np.log2(ranks + 1)
    #计算每个排名的折扣增益，使用公式1 / np.log2(ranks + 1)
    
    return np.sum(ndcg) / n_sample
    #计算所有样本的平均NDCG分数

2 Top10预测指标衡量

2.1 文件列表获取

output_dir = 'results/geolife/top10_wot'
file_list = [file for file in os.listdir(output_dir) if file.endswith('.csv')]
file_list

file_path_list = [os.path.join(output_dir, file) for file in file_list]
file_path_list

iter_df = pd.read_csv(file_path_list[0])
iter_df

2.2. 创建结果dataframe

df = pd.DataFrame({
    'user_id': None,
    'ground_truth': None,
    'prediction': None,
    'reason': None
}, index=[])
df

for file_path in file_path_list:
    iter_df = pd.read_csv(file_path)
    if output_dir[-1] != '1':
        pred_series = iter_df['prediction'].apply(lambda x: ast.literal_eval(x))  # A pandas series
        iter_df['top10'] = pred_series.apply(lambda x: x[:10] if type(x) == list else [x] * 10)
        iter_df['top5'] = pred_series.apply(lambda x: x[:5] if type(x) == list else [x] * 5)
        iter_df['top1'] = pred_series.apply(lambda x: x[0] if type(x) == list else x)
        #如果预测的结果是列表类型（也就是预测top k），那么保存前k个元素的list
        #如果预测的结果是int类型（预测最有可能的location），那么复制这个元素k次
    df = pd.concat([df, iter_df], ignore_index=True)
df

2.3 调用get_is_correct10

df = df.apply(func=get_is_correct10, axis=1)
df

2.4 结果计算

acc1 = (df['is_correct1']).sum() / len(df)
acc5 = (df['is_correct5']).sum() / len(df)
acc10 = (df['is_correct10']).sum() / len(df)
f1 = f1_score(df['ground_truth'], df['top1'], average='weighted')
preds = df['top10'].tolist()
targets = np.array(df['ground_truth'].tolist())
ndcg = get_ndcg(prediction=preds, targets=targets, k=10)

print("Acc@1: ", acc1)
print("Acc@5: ", acc5)
print("Acc@10: ", acc10)
print("Weighted F1: ", f1)
print("NDCG@10: ", ndcg)
'''
Acc@1:  0.3295750216825672
Acc@5:  0.8291413703382481
Acc@10:  0.8736629083550159
Weighted F1:  0.21629743615527502
NDCG@10:  0.6276420364672752
'''

3 Top1

3.1 读取文件+创建df

output_dir = 'results/geolife/top1'
file_list = [file for file in os.listdir(output_dir) if file.endswith('.csv')]

file_path_list = [os.path.join(output_dir, file) for file in file_list]

df = pd.DataFrame({
    'user_id': None,
    'ground_truth': None,
    'prediction': None,
    'reason': None
}, index=[])



pd.read_csv(file_path_list[0])

3.2 读取prediction 结果


for file_path in file_path_list:
    iter_df = pd.read_csv(file_path)
    df = pd.concat([df, iter_df], ignore_index=True)

df['prediction'] = df['prediction'].apply(lambda x: int(x))
df['ground_truth'] = df['ground_truth'].apply(lambda x: int(x))
df

3.3 计算metric

acc1, f1 = get_acc1_f1(df)
print("Acc@1: ", acc1)
print("F1: ", f1)
'''
Acc@1:  0.4512864989881469
F1:  0.403742729579556
'''

UQI-LIUWJ

关注

7
点赞
踩
5

收藏

觉得还不错? 一键收藏
打赏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫

专栏目录