【sklearn】sklearn.metrics

1. sklearn.metrics.make_scorer

1.1 参数解释

'''
score_func
	记分函数 如sklearn.metrics.accuracy_score
	损失函数 如sklearn.metrics.mean_squared_error
greater_is_better
	True(默认) 直译为值越大越好,score_func是记分函数时取True
	False score_func为损失函数时值越小拟合情况越好,得到的对象会对结果进行翻转
needs_proba
	True 输出概率
	False(默认) 不输出概率
needs_threshold
	True 用于二分类
	False(默认)
'''
sklearn.metrics.make_scorer(score_func, greater_is_better=True, needs_proba=False, needs_threshold=False)

1.2 用于随机/网格搜索 sklearn.model_selection.RandomizedSearchCV/GridSearchCV(scoring=make_scorer(*))

from sklearn.metrics import make_scorer
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
import numpy as np
import pandas as pd

def smape(y_true, y_pred):
    smap = np.zeros(len(y_true))
    num = np.abs(y_true - y_pred)
    dem = ((np.abs(y_true) + np.abs(y_pred)) / 2)
    pos_ind = (y_true!=0)|(y_pred!=0)
    smap[pos_ind] = num[pos_ind] / dem[pos_ind]
    return 100 * np.mean(smap)
rfc = RandomForestRegressor()
forest_params = [{'n_estimators': n_estimators,
	              'max_features': max_features,
                  'max_depth': max_depth,
                  'min_samples_split': min_samples_split,
                  'min_samples_leaf': min_samples_leaf,
                  'bootstrap': bootstrap}]
cv = KFold(n_splits=10, shuffle=True, random_state=42)
# 作为scoring参数传入
clf = RandomizedSearchCV(rfc, forest_params, cv = cv, scoring=make_scorer(smape), verbose = -1)

1.3 用于交叉验证 sklearn.model_selection.cross_validate(scoring=make_scorer(*))

from sklearn.metrics import make_scorer
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_validate

liner_model = LinearRegression()
scoring = {
    'customize_score': my_scorer
}
kfold = KFold(n_splits=10, random_state=0)
# 作为scoring参数传入
cv_cross = cross_validate(liner_model, x_train_std, y_train, cv=kfold, scoring=scoring)

print(cv_cross['test_customize_score'].mean()) # 交叉验证的均值
print(cv_cross['test_customize_score'].std()) # 交叉验证的方差

2. sklearn.metrics.f1_score/recall_score/precision_score

# keras训练的model
# .predict() 输出预测的值
pre = model.predict(X_test, batch_size=batch_size)
pre, y_test
'''
array([[0.18804531, 0.3357192 , 0.47623548],
        [0.30215347, 0.36785322, 0.32999334],
        [0.18804531, 0.3357192 , 0.47623548],
        [0.18804531, 0.3357192 , 0.47623548],
        [0.18804531, 0.3357192 , 0.47623548],
        [0.30215347, 0.36785322, 0.32999334],
        [0.30215347, 0.36785322, 0.32999334],
        [0.18804531, 0.3357192 , 0.47623548],
        [0.30215347, 0.36785322, 0.32999334]])
array([[1., 0., 0.],
        [1., 0., 0.],
        [0., 1., 0.],
        [1., 0., 0.],
        [0., 1., 0.],
        [1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.],
        [1., 0., 0.],
        [0., 1., 0.]])
'''
# 进行转化
for idx1 in range(len(pre)):
    max_val = max(pre[idx1])
    for idx2 in range(len(pre[idx1])):
        if max_val == pre[idx1][idx2]:
            pre[idx1][idx2] = 1
        else:
            pre[idx1][idx2] = 0

from sklearn.metrics import f1_score, recall_score, precision_score

f1 = f1_score(y_true=y_test, y_pred=pre, average='weighted')
recall = recall_score(y_true=y_test, y_pred=pre, average='weighted')
precision = precision_score(y_true=y_test, y_pred=pre, average='weighted')
print(" f1: %f — precision: %f — recall: %f" % (f1, precision, recall))

3. sklearn.metrics.confusion_matrix

from sklearn.metrics import confusion_matrix


y_true = [2, 0, 2, 2, 0, 1]
y_pred = [0, 0, 2, 2, 0, 2]
'''
输出混淆矩阵
[i, j] 表示真实标签是i但预测标签是j的样本个数
normalize
    None 默认,输出的每个元素是个数
    ‘pred’ 输出概率,每个元素个数除以列(预测)
    'true' 输出概率,每个元素除以行(真实)
    'all' 输出概率,每个元素除以矩阵总和
'''
confusion_matrix(y_true, y_pred, normalize=None)
'''
array([[2, 0, 0],
       [0, 0, 1],
       [1, 0, 2]])
'''
  • 二分类 真阴性(tn)、假阳性(fp)、假阴性(fn)和真阳性(tp)
y_true = [0, 0, 0, 1, 1, 1, 1, 1]
y_pred = [0, 1, 0, 1, 0, 1, 0, 1]
'''
对于二分类
输出真阴性(tn)、假阳性(fp)、假阴性(fn)和真阳性(tp)的计数
'''
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
print(tn, fp, fn, tp) # 2 1 2 3

4. sklearn.metrics.cohen_kappa_score

4.1 Kappa系数原理

'''
Kappa系数用于一致性检验,也可以用于衡量分类精度;
Kappa系数是一种比例,代表着分类与完全随机的分类产生错误减少的比例;
kappa系数的计算是基于混淆矩阵的。

K = (Po - Pe) / (1 - Pe)
Po即准确率(accuracy),每一类被正确分类的样本数量之和除以总样本数;
Pe = (a1*b1 + a2*b2+...+ac*bc) / n*n
每一类的真实样本个数分别为a1, a2,...,ac,预测的每一类的样本个数分别为b1, b2, ..., bc,总样本数为n;

kappa计算结果为-1~1,但通常落在 0~1 间。
0.0~ 0.20极低的一致性(slight);
0.21~ 0.40一般的一致性(fair);
0.41~ 0.60 中等的一致性(moderate);
0.61~ 0.80 高度的一致性(substantial);
0.81~1几乎完全一致(almost perfect)
'''

4.2 api使用

from sklearn.metrics import cohen_kappa_score

'''
weights
    ‘linear’ 线性加权
    ‘quadratic’ 二次加权
    None 默认
'''
y_true = [2, 0, 2, 2, 0, 1]
y_pred = [0, 0, 2, 2, 0, 2]
cohen_kappa_score(y_true, y_pred, weights='quadratic')
'''
0.5454545454545454
'''
  • 1
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值