模型评估方法
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
data=pd.read_csv("creditcard.csv")
print(data.head())
count_classes=pd.value_counts(data['Class'],sort=True).sort_index()
count_classes.plot(kind='bar')
plt.title("Fraud class histogram")
plt.xlabel("Class")
plt.ylabel("Frequency")
plt.show()
from sklearn.preprocessing import StandardScaler
#Series中没有reshape函数
#解决办法:用value将Series对象化为numpy中的ndarray,再用reshape方法
data['normAmount']=StandardScaler().fit_transform(data['Amount'].values.reshape(-1,1))
data=data.drop(['Time','Amount'],axis=1)
print(data.head())
#两种数据数目不同时,下采样策略
#下采样策略:使0和1的数据一样少
X=data.loc[:,data.columns!='Class']
y=data.loc[:,data.columns=='Class']
#Number of data points in the minority class
number_records_fraud=len(data[data.Class==1])#类为1的样本个