import pandas as pd
data =
pd.read_excel(r"F:\BaiduYunDownload\python数据分析和挖掘实战\chapter4\chapter4\demo\data\discretization_data.xls")
data = data['肝气郁结证型系数']
k = 4
#等宽离散化
d1 = pd.cut(data, k, labels = range(k))
#等频离散化
w = [i/k for i in range(k+1)]
w = data.describe(percentiles = w)[4:4+k+1]
w[0] = w[0]*(1 - le - 10)
d2 = pd.cut(data, w, labels = range(k))
#(一维)聚类
from sklearn.cluster import KMeans
kmodel = KMeans(n_clusters = k, n_jobs = 8)
kmodel.fit(data.reshape((len(data), 1)))
c =
pd.DataFrame(kmodel.cluster_centers_).sort(0)
w = pd.rolling_mean(c, 2).iloc[1:]
w = [0] + list(w[0]) + [data.max()]
d3 = pd.cut(data, w, labels = range(k))
#绘图
def c