读取“客户价值.xlsx”,使用Axes3D绘制反映RFM数据列关系的3D散点图。
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
df = pd.read_excel('某超市的销售数据.xlsx')
df = df.dropna() # 删除缺失值
df1 = df[['顾客编号', '销售金额', '销售日期','数据采集时间']]
df1['R']=(pd.to_datetime(df1['数据采集时间'])-pd.to_datetime(df1['销售日期'])).values/np.timedelta64(1,'D')
df1=df1[['顾客编号', '销售金额', '销售日期','R']]
df2=df1.groupby('顾客编号').agg({'R':'min','销售金额':'mean'})
df2['F']=df1.groupby(['顾客编号'])['顾客编号'].size()
df2.to_excel('data.xlsx',index=False)
datafile=r'data.xlsx'
transformfile=r'tfdata.xls'
data=pd.read_excel(datafile)
data=data[["R",'F','销售金额']]
data=(data-data.mean(axis=0))/(data.std(axis=0))
data.columns=['R','F','M']
data.to_excel(transformfile,index=False)
inputfile=r'tfdata.xls'
data = pd.read_excel(inputfile)
iteration=500
kmodel = KMeans(n_clusters = 4,max_iter=iteration)
kmodel.fit(data) # 训练模型
rl=pd.Series(kmodel.labels_).value_counts()
r2=pd.DataFrame(kmodel.cluster_centers_)
r=pd.concat([r2,rl],axis=1)
r.columns=list(data.columns)+[u'聚类数量']
r3 =pd.Series(kmodel.labels_,index=data.index)
r=pd.concat([data,r3],axis=1)
r.columns=list(data.columns)+[u'聚类类别']
r.to_excel('客户价值1.xlsx')
kmodel.cluster_centers_
kmodel.labels_
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import matplotlib
matplotlib.rcParams['font.sans-serif']=['SimHei']
matplotlib.rcParams['axes.unicode_minus']=False
fig=plt.figure()
ax=Axes3D(fig)
ax.scatter(r['R'],r['F'],r['M'])
ax.set_xlabel('消费间隔')
ax.set_ylabel('消费频率')
ax.set_zlabel('消费金额')
plt.show