import pandas as pd
data=pd.read_csv('creditcard.csv')
count_classes=pd.value_counts(data['Class'],sort=True).sort_index()
count_classes.plot(kind='bar')
plt.title('Fraud class histogram')
plt.xlabel('Class')
plt.ylabel('Frequency')
from sklearn.preprocessing import StandardScaler
data['normAmount']=StandardScaler().fit_transform(data['Amount'].reshape(-1,1))#对差异较大的列进行标准化操作
data=data.drop(['Time','Amount'],axis=1)
data.head()
x=data.ix[:,data.columns!='Class']
y=data.ix[:,data.columns=='Class']
#class=1--->存在欺诈行为,class=0--->不存在欺诈行为
number_records_fraud=len(data[data.Class==1])#找class=1的个数
fraud_indices=np.array(data[data.Cla