# AUC(Area Under Curve)

## ROC与AUC常用来评价一个二值分类器(binary classifier)的优劣

### 4.计算代码

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

parameter = 30
data = pd.DataFrame(index = range(0,parameter),columns = ('probability','The true label'))
data['The true label'] = np.random.randint(0,2,size = len(data))
data['probability'] = np.random.choice(np.arange(0.1,1,0.1),len(data['probability']))
data#random库随机生成数据

cm = np.arange(4).reshape(2,2)
cm[0,0] = len(data[data['The true label']==0][data['probability']<0.5])#TN
cm[0,1] = len(data[data['The true label']==0][data['probability']>=0.5])#FP
cm[1,0] = len(data[data['The true label']==1][data['probability']<0.5])#FN
cm[1,1] = len(data[data['The true label']==1][data['probability']>=0.5])#TP

import itertools#自带
classes = [0,1]
plt.figure()
plt.imshow(cm,interpolation='nearest',cmap=plt.cm.Blues)
plt.title('Confusion matrix')
#画方块
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks,classes,rotation = 0)
plt.yticks(tick_marks,classes)
thresh = cm.max()/2
for i,j in itertools.product(range(cm.shape[0]),range(cm.shape[1])):
plt.text(j,i,cm[i,j],horizontalalignment='center',color='white' if cm[i,j]>thresh else 'black')
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')

data.sort_values('probability',inplace=True, ascending=False)#排序
data

TPRandFPR = pd.DataFrame(index=range(len(data)),columns=('TP','FP'))
for j in range(len(data)):
TPRandFPR.iloc[j]=[TP,FP]
TPRandFPR    

from sklearn.metrics import auc
#threshold依次取0.9,0.9,0.9,0.8……
AUC = auc(TPRandFPR['FP'],TPRandFPR['TP'])
plt.scatter(x=TPRandFPR['FP'],y=TPRandFPR['TP'],label='(FPR,TPR)',color='b')
plt.plot(TPRandFPR['FP'],TPRandFPR['TP'],'k',label='AUC=%0.2f'%AUC)
plt.legend(loc='lower right')
plt.show()