import bisect
from sklearn import metrics
from sklearn.metrics import precision_recall_curve
# 创建一个单独的Metircs.py文件
def calc_auc(y_true, y_pred): # y_pred:[0,1]之间任何数
return metrics.roc_auc_score(y_true, y_pred)
def calc_ks(y_true, y_pred): # y_pred:[0,1]之间任何数
fpr, tpr, thresholds = metrics.roc_curve(y_true, y_pred)
return max(tpr - fpr)
def calc_acc(y_true, y_pred): # y_pred:{0,1} 必须事先通过阈值转变为0,1
return metrics.accuracy_score(y_true, y_pred)
def calc_f1(y_true, y_pred): # y_pred:{0,1} 必须事先通过阈值转变为0,1
return metrics.recall_score(y_true, y_pred), metrics.precision_score(y_true, y_pred), metrics.f1_score(y_true, y_pred)
# 召回率大于0.9时的准确率
def precision_at_r9(y_true, y_pred):
p, r, tresholds = precision_recall_curve(y_true, y_pred)
ind = bisect.bisect(r, 0.9)
if ind >= len(p)-1:
return r[-2], p[-2], tresholds[-1]
else:
return r[ind], p[ind], tresholds[ind]
# 准确率大于0.9时的召回率
def recall_at_p9(y_true, y_pred):
p, r, tresholds = precision_recall_curve(y_true, y_pred)
ind = bisect.bisect(p, 0.9)
if ind >= len(p)-1:
return r[-2], p[-2], tresholds[-1]
else:
return r[ind], p[ind], tresholds[ind]
NDCG@K
def get_dcg(y_pred, y_true, k):
# 注意y_pred与y_true必须是一一对应的,而且y_pred越大越接近label=1(用相关性的说法就是,与label=1越相关)
df = pd.DataFrame({"y_pred": y_pred, "y_true": y_true})
df = df.sort_values(by="y_pred", ascending=False) # 对y_pred进行降序排列,越排在前面的,越接近label=1
df = df.iloc[0:k, :] # 取前K个
dcg = (2 ** df["y_true"] - 1) / np.log2(np.arange(1, df["y_true"].count() + 1) + 1) # 位置从1开始计数
dcg = np.sum(dcg)
return dcg
def calc_ndcg(y_true, y_pred, k):
dcg = get_dcg(y_pred, y_true, k)
idcg = get_dcg(y_true, y_true, k)
ndcg = dcg / idcg
return ndcg
Recall@K,Precision@K
def calc_f1(y_true, y_pred, k):
df = pd.DataFrame({"y_pred": y_pred, "y_true": y_true})
df = df.sort_values(by="y_pred", ascending=False) # 对y_pred进行降序排列,越排在前面的,越接近label=1
df = df.iloc[0:k, :] # 取前K个
y_true = np.array(df["y_true"])
y_pred = np.round(np.array(df["y_pred"])) # y_pred:{0,1} 必须事先通过阈值转变为0,1
return recall_score(y_true, y_pred), precision_score(y_true, y_pred), f1_score(y_true, y_pred)