python 数据分析与挖掘实战




第六章

对数据进行拉格朗日差值:

import pandas as pd

from scipy.interpolate  import lagrange

miss=pd.read_excel('/home/yao/data/chapter6/demo/data/missing_data.xls',
def p(s,n,k=5):
    y = s[list(range(n-k,n))+list(range(n+1,n+1+k))]
    y = y[y.notnull()]
    return lagrange(y.index,list(y))(n)

for i in miss.columns:
    for j in range(len(miss)):
       if (miss[i].isnull())[j]:
          miss[i][j]=p(miss[i],j)


使用随机函数分为训练数据和测试数据

model=pd.read_excel('/home/yao/data/chapter6/model.xls',header=0)

from random import shuffle
model = model.as_matrix()

shuffle(model)

train = model[:int(len(model)*0.8),:]
test = model[int(len(model)*0.8):,:]


#创建cm_plot模块

  def cm_plot(y,yp):
   from sklearn.metrics import confusion_matrix
   cm = confusion_matrix(y,yp)
   import matplotlib.pyplot as plt
   plt.matshow(cm,cmap=plt.cm.Greens)
   plt.colorbar()
   
   for x in range(len(cm)):
      for y in range(len(cm)):
         plt.annotate(cm[x,y],xy=(x,y),horizontalalignment='center',verticalalignment='center')

   plt.ylabel('True label')
   plt.xlabel('Predicted label')
   return plt


#构建LM神经网络模型

from keras.models import Sequential
from keras.layers.core import Dense ,Activation
net = Sequential()
net.add(Dense(input_dim =3,output_dim=10))

net.add(Activation('relu'))
net.add(Dense(input_dim=10,output_dim=1))

net.add(Activation('sigmoid'))
net.compile(loss = 'binary_crossentropy',optimizer='adam')
net.fit(train[:,:3],train[:,3],nb_epoch =100,batch_size = 1)

predict_result = net.predict_classes(train[:,:3]).reshape(len(train))

from cm_plot import *
cm_plot(train[:,3],predict_result).show()

predict_result = net.predict(test[:,:3]).reshape(len(test))

fpr,tpr,thresholds = roc_curve(test[:,3],predict_result,pos_label = 1)>>> 
import matplotlib.pyplot as plt
plt.plot(fpr,tpr,linewidth=2,label='Roc of lm')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.xlim(0,1.05)
plt.ylim(0,1.05)
plt.show()


#创建 决策树CART模型

from sklearn.tree import DecisionTreeClassifier

tree = DecisionTreeClassifier()

tree.fit(train[:,:3],train[:,3])

from sklearn.externals import joblib

#joblib.dump(tree,treefile)

from cm_plot import *

cm_plot(trian[:,3],tree.predict(train[:,:3])).show()

from sklearn.metrics import roc_curve

fpr,tpr,thresholds=roc_curve(test[:,3],tree.predict_proba(test[:,:3])[:,1],pos_label=1)

plt.plot(fpr,tpr,linewidth = 2,label = 'Roc of cart',color = 'green')

plt.xlabel('False Positive Rate')

plt.ylabel('True Positive Rate')

plt.xlim(0,1.05)

plt.ylim(0,1.05)

plt.show()



第七章: 航空公司客户价值分析

#数据分析

import pandas as pd 
air = pd.read_csv('/home/yao/data/chapter7/air_data.csv')

air.describe().T

explore = air.describe(percentiles=[],include ='all').T

explore['null']= len(air)-explore['count']

explore.to_csv('/home/yao/data/chapter7/explore.csv')

#数据预处理

data = air[air['SUM_YR_1'].notnull()*air['SUM_YR_2'].notnull()]

index1 = data['SUM_YR_1']!=0
index2 = data['SUM_YR_2']!=0
index3 = (data['SEG_KM_SUM']==0&data['avg_discount']==0)

data2 = data[index1|index2|index3]

#数据正则化

zs = pd.read_excel('/home/yao/data/chapter7/zscoredata.xls')

zsn = (zs-zs.mean(axis=0))/zs.std(axis=0)

#K-means聚类算法实现

from  sklearn.cluster import KMeans 

kmodel = KMeans(n_clusters = 5)

kmodel.fit(zsn)

kmodel.cluster_centers_ # 聚类中心

kmodel.labels_# 查看各个样本对应的类别

#画图

import numpy as np
import matplotlib.pyplot as plt
labels = zsn.column

k = 5plot_data = kmodel.cluster_centers_

color= ['b','g','r','c','y']

angles=np.linspace(0,2*np.pi,k,endpoint = False)
plot_data=np.concatenate((plot_data,plot_data[:,[0]]),axis=1)

fig = plt.figure()
ax = fig.add_subplot(111,polar=True)

ax = fig.add_subplot(111,polar=True)

for i in range(len(plot_data)):
    ax.plot(angles,plot_data[i],'o-',color =color[i],label=u'客户群'+str(i),linewidth = 2)

ax.set_rgrids(np.arange(0.01,3.5,0.5),np.arange(-1,2.5,0.5),fontproperties="SimHei")

ax.set_thetagrids(angles*180/np.pi,labels,fontproperties='SimHei')

plt.show()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值