sklearn python3_使用python+sklearn实现三分类的概率校准

最新推荐文章于 2024-07-01 00:45:00 发布

李牧錞

最新推荐文章于 2024-07-01 00:45:00 发布

阅读量459

点赞数

文章标签： sklearn python3

本文链接：https://blog.csdn.net/weixin_33741534/article/details/112038753

版权

本文展示了如何使用Python3和sklearn库中的CalibratedClassifierCV类对随机森林分类器进行概率校准，以提高三分类任务的预测概率准确性。通过绘制预测概率变化，展示了校准前后的影响，并解释了sigmoid校准方法。

摘要由CSDN通过智能技术生成

print(__doc__)# 作者: Jan Hendrik Metzen # 许可证: BSD Style.import matplotlib.pyplot as pltimport numpy as npfrom sklearn.datasets import make_blobsfrom sklearn.ensemble import RandomForestClassifierfrom sklearn.calibration import CalibratedClassifierCVfrom sklearn.metrics import log_lossnp.random.seed(0)# 产生数据X, y = make_blobs(n_samples=1000, random_state=42, cluster_std=5.0)X_train, y_train = X[:600], y[:600]X_valid, y_valid = X[600:800], y[600:800]X_train_valid, y_train_valid = X[:800], y[:800]X_test, y_test = X[800:], y[800:]# 在整个训练集和验证集上训练未校准的随机森林分类器# 在测试集上进行评估clf = RandomForestClassifier(n_estimators=25)clf.fit(X_train_valid, y_train_valid)clf_probs = clf.predict_proba(X_test)score = log_loss(y_test, clf_probs)# 训练随机森林分类器，在验证集上进行校准# 在测试集上进行评估clf = RandomForestClassifier(n_estimators=25)clf.fit(X_train, y_train)clf_probs = clf.predict_proba(X_test)sig_clf = CalibratedClassifierCV(clf, method="sigmoid", cv="prefit")sig_clf.fit(X_valid, y_valid)sig_clf_probs = sig_clf.predict_proba(X_test)sig_score = log_loss(y_test, sig_clf_probs)# 通过箭头绘制预测概率的变化plt.figure()colors = ["r", "g", "b"]for i in range(clf_probs.shape[0]):plt.arrow(clf_probs[i, 0], clf_probs[i, 1],sig_clf_probs[i, 0] - clf_probs[i, 0],sig_clf_probs[i, 1] - clf_probs[i, 1],color=colors[y_test[i]], head_width=1e-2)# 绘制完美预测plt.plot([1.0], [0.0], "ro", ms=20, label="Class 1")plt.plot([0.0], [1.0], "go", ms=20, label="Class 2")plt.plot([0.0], [0.0], "bo", ms=20, label="Class 3")# 绘制单位边界plt.plot([0.0, 1.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], "k", label="Simplex")# 在单一形状(simplex)上标注点plt.annotate(r"($\frac{1}{3}$, $\frac{1}{3}$, $\frac{1}{3}$)",xy=(1.0/3, 1.0/3), xytext=(1.0/3, .23), xycoords="data",arrowprops=dict(facecolor="black", shrink=0.05),horizontalalignment="center", verticalalignment="center")plt.plot([1.0/3], [1.0/3], "ko", ms=5)plt.annotate(r"($\frac{1}{2}$, $0$, $\frac{1}{2}$)",xy=(.5, .0), xytext=(.5, .1), xycoords="data",arrowprops=dict(facecolor="black", shrink=0.05),horizontalalignment="center", verticalalignment="center")plt.annotate(r"($0$, $\frac{1}{2}$, $\frac{1}{2}$)",xy=(.0, .5), xytext=(.1, .5), xycoords="data",arrowprops=dict(facecolor="black", shrink=0.05),horizontalalignment="center", verticalalignment="center")plt.annotate(r"($\frac{1}{2}$, $\frac{1}{2}$, $0$)",xy=(.5, .5), xytext=(.6, .6), xycoords="data",arrowprops=dict(facecolor="black", shrink=0.05),horizontalalignment="center", verticalalignment="center")plt.annotate(r"($0$, $0$, $1$)",xy=(0, 0), xytext=(.1, .1), xycoords="data",arrowprops=dict(facecolor="black", shrink=0.05),horizontalalignment="center", verticalalignment="center")plt.annotate(r"($1$, $0$, $0$)",xy=(1, 0), xytext=(1, .1), xycoords="data",arrowprops=dict(facecolor="black", shrink=0.05),horizontalalignment="center", verticalalignment="center")plt.annotate(r"($0$, $1$, $0$)",xy=(0, 1), xytext=(.1, 1), xycoords="data",arrowprops=dict(facecolor="black", shrink=0.05),horizontalalignment="center", verticalalignment="center")# 添加网格plt.grid(False)for x in [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]:plt.plot([0, x], [x, 0], "k", alpha=0.2)plt.plot([0, 0 + (1-x)/2], [x, x + (1-x)/2], "k", alpha=0.2)plt.plot([x, x + (1-x)/2], [0, 0 + (1-x)/2], "k", alpha=0.2)plt.title("Change of predicted probabilities after sigmoid calibration")plt.xlabel("Probability class 1")plt.ylabel("Probability class 2")plt.xlim(-0.05, 1.05)plt.ylim(-0.05, 1.05)plt.legend(loc="best")print("Log-loss of")print(" * uncalibrated classifier trained on 800 datapoints: %.3f "% score)print(" * classifier trained on 600 datapoints and calibrated on ""200 datapoint: %.3f" % sig_score)# 说明校准器plt.figure()# 通过2-simplex生成网格p1d = np.linspace(0, 1, 20)p0, p1 = np.meshgrid(p1d, p1d)p2 = 1 - p0 - p1p = np.c_[p0.ravel(), p1.ravel(), p2.ravel()]p = p[p[:, 2] >= 0]calibrated_classifier = sig_clf.calibrated_classifiers_[0]prediction = np.vstack([calibrator.predict(this_p)for calibrator, this_p inzip(calibrated_classifier.calibrators_, p.T)]).Tprediction /= prediction.sum(axis=1)[:, None]# 绘制校准器的修改for i in range(prediction.shape[0]):plt.arrow(p[i, 0], p[i, 1],prediction[i, 0] - p[i, 0], prediction[i, 1] - p[i, 1],head_width=1e-2, color=colors[np.argmax(p[i])])# 绘制单位边界plt.plot([0.0, 1.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], "k", label="Simplex")plt.grid(False)for x in [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]:plt.plot([0, x], [x, 0], "k", alpha=0.2)plt.plot([0, 0 + (1-x)/2], [x, x + (1-x)/2], "k", alpha=0.2)plt.plot([x, x + (1-x)/2], [0, 0 + (1-x)/2], "k", alpha=0.2)plt.title("Illustration of sigmoid calibrator")plt.xlabel("Probability class 1")plt.ylabel("Probability class 2")plt.xlim(-0.05, 1.05)plt.ylim(-0.05, 1.05)plt.show()

李牧錞

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
sklearn python3_使用python+sklearn实现三分类的概率校准

print(__doc__)# 作者: Jan Hendrik Metzen # 许可证: BSD Style.import matplotlib.pyplot as pltimport numpy as npfrom sklearn.datasets import make_blobsfrom sklearn.ensemble import RandomForestClassifierfrom ...
复制链接

扫一扫