排序问题评价指标

最新推荐文章于 2023-03-04 22:58:54 发布

weixin_30634661

最新推荐文章于 2023-03-04 22:58:54 发布

阅读量173

点赞数

原文链接：http://www.cnblogs.com/kayy/p/10565605.html

版权

# -*- coding: utf-8 -*-

import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
import pandas as pd

'''
y_true: 0 or 1
predict_score: float, rank
'''
def roc_draw(y_true, predict_score, predict_label):
    fpr,tpr,threshold = roc_curve(y_true, predict_score)
    plt.figure()
    plt.plot(fpr, tpr)
    plt.plot([0, 1], [0, 1])
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.0])
    plt.title('AUC: {0}'.format(roc_auc_score(y_true, predict_label)))
    #plt.show()
    plt.savefig('../output/picture/lr_auc.png')

def cal_topn_accuracy(test_df, preds=None, pos_type_min=3, inplace=False):
    n_list = [1, 5, 10, 15, 20, 25, 30, 40, 50, 70, 100]

    test_df_new = test_df
    if(preds is not None):
        if not inplace:
            test_df_new = test_df[['session', 'rank', 'deal_type']].copy()

        test_df_new = pd.concat([test_df_new, pd.DataFrame(preds, columns=['pred_score'])], axis=1)
        test_df_new['rank'] = test_df_new['pred_score'].groupby(test_df_new['session']).rank(ascending=0, method='first')

    accuracy = {}
    for n in n_list:
        df_topn = test_df_new[test_df_new['rank'] <= n]
        pos_cnt = df_topn[df_topn['deal_type'] >= pos_type_min]['deal_type'].count()
        total_cnt = df_topn['deal_type'].count()
        accuracy['top_' + str(n)] = round((1.0 * pos_cnt) / total_cnt, 4)
    print accuracy

转载于:https://www.cnblogs.com/kayy/p/10565605.html

weixin_30634661

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
排序问题评价指标

# -*- coding: utf-8 -*-import matplotlib.pyplot as pltfrom sklearn.metrics import roc_curvefrom sklearn.metrics import roc_auc_scoreimport pandas as pd'''y_true: 0 or 1predict_score: float, rank'''def...
复制链接

扫一扫