目录
第一题
一、加入相应的库
import pandas as pd
import warnings
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn import metrics
from sklearn import preprocessing
from pyecharts import options as opts
from pyecharts.charts import Radar
二、去除空值然后进行描述性统计
warnings.filterwarnings('ignore') #去除警告
data=pd.read_excel('文件名字.xlsx')
data=data.dropna(axis=0,how='any')
descriptive_statistics=data.describe()
print(descriptive_statistics)
三、找出CH值最大的时候的k值
data=data[['PURCHASES','INSTALLMENTS_PURCHASES','PURCHASES_FREQUENCY','PURCHASES_TRX','CREDIT_LIMIT']]
norm=preprocessing.scale(data)
chs=[];ks=[]
for k in range(2,11):
y_pred=KMeans(n_clusters=k,random_state=10).fit_predict(norm)
ch=metrics.calinski_harabaz_score(norm,y_pred) #如果出问题将 harabaz 改成 harabasz
ks.append(k)
chs.append(ch)
result=pd.DataFrame({'k':ks,'ch':chs})
print(result[result['ch']==max(result['ch'])])
plt.plot(ks,chs)
plt.show()
四、进行数据分析和处理
model=KMeans(n_clusters=3,random_state=10).fit(norm) data['group']=model.labels_ centers=model.cluster_centers_ data_mean=data.groupby('group').mean() data_mean=data_mean.iloc[:,:] data.iloc[:,:].groupby('group').describe().T value_1=[data_mean.iloc[0,:].tolist(),] value_2=[data_mean.iloc[1,:].tolist(),] value_3=[data_mean.iloc[2,:].tolist(),] c_schema = [{'name':"PURCHASES","max":8377}, {'name':"INSTALLMENTS_PURCHASES","max":3284}, {"name":"PURCHASES_FREQUENCY","max":1}, {"name":"PURCHASES_TRX","max":105}, {"name":"CREDIT_LIMIT","max":9902}] c=(Radar() .add_schema(schema=c_schema, shape="circle",textstyle_opts=opts.TextStyleOpts(color="#130c0e"),) .add("客户群体 1",value_1,color="#d71345") .add("客户群体 2",value_2,color="#f47920") .add("客户群体 3",value_3,color="#71680e") .set_series_opts(label_opts=opts.LabelOpts(is_show=False)) ) c.render_notebook()
第二题
一、建立表格
二、画图
import pandas as pd
from pyecharts import options as opts
from pyecharts.charts import Radar
datasale=pd.read_excel('文件名字.xlsx')
datasale=datasale.T
value_1=[df.loc['预算分配'].tolist(),]
value_2=[df.loc['实际开销'].tolist(),]
c_schema = [{'name':"销售","max":5000},
{'name':"管理","max":14000},
{"name":"信息技术","max":28000},
{"name":"客服","max":35000},
{"name":"研发","max":50000},
{"name":"研发","max":21000}]
c=(Radar()
.add_schema(schema=c_schema, shape="circle",textstyle_opts=opts.TextStyleOpts(color="#130c0e"),)
.add("预算",value_1,color="#d71345")
.add("实际",value_2,color="#f47920")
.set_series_opts(label_opts=opts.LabelOpts(is_show=False)))
c.render_notebook()
作业写完不易,兄弟们点点关注,点点赞!!!!!