机器学习模型评估与选择之P-R曲线与ROC曲线的python实现


#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
 @Time    : 2018/5/29 10:40
 @Author  : xwill
 @File    : 分类.py
 @Software: PyCharm
"""
import csv
from sklearn import metrics
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import average_precision_score
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
column, column1, column = [], [], []
with open('jisuan.csv', 'rt') as csvfile:
    reader = csv.DictReader(csvfile)
    column = [row['预测值'] for row in reader]
with open('jisuan.csv', 'rt') as csvfile:
    reader = csv.DictReader(csvfile)
    column1 = [row['判决结果'] for row in reader]
with open('jisuan.csv', 'rt') as csvfile:
    reader = csv.DictReader(csvfile)
    column2 = [row['Ground-truth'] for row in reader]
def f(column1, column2):
    TP = 0
    FP = 0
    FN = 0
    TN = 0
    #print(column1)
    for i in range(0, len(column1)):
        #print(int(column1[i]))
        if int(column1[i]) == 1 and int(column2[i]) == 1:
            TP = TP + 1
        if int(column1[i]) == 1 and int(column2[i]) == 0:
            FP = FP + 1
        if int(column1[i]) == 0 and int(column2[i]) == 1:
            FN = FN + 1
        if int(column1[i]) == 0 and int(column2[i]) == 0:
            TN = TN + 1
        column[i] = float(column[i])
        column1[i] = int(column1[i])
        column2[i] = int(column2[i])
    return TP, TN, FP, FN
TP, TN, FP, FN = f(column1, column2)
print(TP, TN, FP, FN)
print('算出来的准确率指标:', (TP + TN)/(TP + TN + FP + FN)) # 准确率
print('python自带的:', accuracy_score(column2, column1))
P = TP / (TP + FP)
print('算出来的查准率指标:', P)#查准率
print('python自带的:', metrics.precision_score(column2, column1))
R = TP / (TP + FN)
print('算出来的查全率指标:', R)#查全率
print('python自带的:', metrics.recall_score(column2, column1))
B = 1
FB = ((1 + B**2) * P * R)/((B**2 * P) + R)
print('算出来的准确率与召回率的调和平均指标=1:', FB)#准确率与召回率的调和平均指标
print('python自带的:', metrics.f1_score(column2, column1))
B = 0.5
FB = ((1 + B**2) * P * R)/((B**2 * P) + R)
print('算出来的准确率与召回率的调和平均指标=0.5:', FB)#准确率与召回率的调和平均指标
print('python自带的:', metrics.fbeta_score(column2, column1, beta=0.5))
B = 2
FB = ((1 + B**2) * P * R)/((B**2 * P) + R)
print('算出来的准确率与召回率的调和平均指标=2:', FB)#准确率与召回率的调和平均指标
print('python自带的:', metrics.fbeta_score(column2, column1, beta=2))

# 画图

def pr(column, column1, column2):
    py, rx, fpx, tpy = [], [], [], []
    P1 = R1 = fp = tp = 0
    # print(column, 50 / 100)
    for i in range(0, 101):
        for j in range(0, len(column2)):
            if column[j] < i/100:
                column1[j] = 0
            else:
                column1[j] = 1
        # print(column1)
        TP, TN, FP, FN = f(column1, column2)
        # r-p图像
        if (TP + FP) == 0:
            P1 = 1
        else:
            P1 = TP / (TP + FP)
        if (TP + FN) == 0:
            R1 = 1
        else:
            R1 = TP / (TP + FN)
        py.append(P1)
        rx.append(R1)
        # roc图像
        fp = FP / (TP + FN)
        tp = TP / (TN + FP)
        fpx.append(fp)
        tpy.append(tp)
    return py, rx, fpx, tpy
py, rx, fpx, tpy = pr(column, column1, column2)
#tpy = tpy[:: -1]
print(fpx)
print(tpy)
plt.subplot(211)
plt.plot(py, rx)
plt.xlabel("Recall")
plt.ylabel('Precision')
plt.title("P-R曲线")
plt.subplot(212)
plt.plot(fpx, tpy)
plt.xlabel("fpx")
plt.ylabel('tpy')
plt.title("ROC曲线")
plt.show()
#print(py)
#print(rx)
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值