Sklearn实现K-Means
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import scipy.io
from sklearn.cluster imoprt KMeans
导入数据
def loaddata():
data = np.loadtxt('data/cluster_data.csv',delimiter=',')
return data
X = loaddata()
fit模型
model = KMeans(n_cluster=3,max_iter=10)
model.fit(X)
print('聚类中心:',model.cluster_centers_)
print('每个样本所属的簇:', model.labesl_)
# 画图
cm_dark = mpl.colors.ListedColormap(['g', 'r', 'b'])
plt.scatter(X[:, 0], X[:, 1], c=model.labels_, cmap=cm_dark, s=20)
plt.scatter(model.cluster_centers[:, 0], model.cluster_centers_[:, 1], c=np.arange(len(model.cluster_centers_)))
层次聚类实现
from sklearn.cluster import AgglomerativeClustering
linkage可取值:
- ward:最小方差
- complete:最大距离
- average:平均距离
- single:最小距离
model = AgglomerativeClustering(n_clusters=3,affinity='euclidean',linkage='complete')
print('每个样本所属的簇:',model.labels_)
# 画图
cm_dark = mpl.colors.ListedColormap(['g', 'r', 'b'])
plt.scatter(X[:, 0], X[:, 1], c=model.labels_, cmap=cm_dark, s=20)
plt.show()
密度聚类
根据密度聚类的过程,无需设置聚类个数
from sklearn.cluster import DBSCAN
def loaddata():
data = np.loadtxt('data/cluster_data.csv',delimiter=',')
return data
X = loaddata()
model = DBSCAN(eps=0.5, min_samples=5, metric='euclidean')
model.fit(X)
print('每个样本所属的簇', model.labels_)
画图
cm_dark = mpl.colors.ListedColormap(['g', 'r', 'b', 'c'])
plt.scatter(X[:,0], X[:,1], c=model.labels_, cmap=cm_dark, s=20)
plt.show()