案例:
第一步:使用sklearn的make_moons生成如下数据,要求使用合适的聚类算法
DBSCAN算法进行聚类分类,选择合适的参数eps,min_samples,得到比较好的聚类效果,
并进行可视化。
第二步:用K-means算法对上述的数据集再做聚类分析,并进行可视化(每个聚类的质心也要画出来),
并对比两种聚类算法的效果。
注:make_moons是用于生成两个交错半圆
from sklearn.datasets import make_moons
import matplotlib.pyplot as plt
x1,y1=make_moons(n_samples=1000,noise=0.1,random_state=0)#make_moons是生成两个交错的半圆
plt.scatter(x1[:,0],x1[:,1])
plt.show()
#建立模型,其中eps代表距离阀值,min_samples代表核心对象在eps领域的样本数阀值
import numpy as np
from sklearn.cluster import DBSCAN
dbscan=DBSCAN(eps=0.1,min_samples=4)
cluster=dbscan.fit_predict(x1)
print(cluster)
plt.scatter(x1[:,0],x1[:,1],c=cluster)
plt.show()
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
KM=KMeans(n_clusters=4)
KM.fit(x1)
centers=KM.cluster_centers_
print(centers)
result=KM.predict(x1)
print('分簇结果:',result)
mark=['^r','sb','*g','oy']
for i,d in enumerate(x1):
plt.plot(d[0],d[1],mark[result[i]])
markcentor=['+b','+r','+y','+g']
for i,centor in enumerate(centers):
plt.plot(centor[0],centor[1],markcentor[i],markersize=30)
plt.scatter(x1[result==0,0],x1[result==0,1],s=30,c='orange',
marker='o',label='cluster=1')
plt.scatter(x1[result==1,0],x1[result==1,1],s=30,c='green',
marker='s',label='cluster=2')
plt.scatter(x1[result==2,0],x1[result==2,1],s=30,c='blue',
marker='o',label='cluster=3')
plt.scatter(x1[result==3,0],x1[result==3,1],s=30,c='red',
marker='*',label='cluster=4')
plt.scatter(centers[:,0],centers[:,1],s=500,marker="+",c='red',label='cluster center')
plt.legend(loc='lower left',bbox_to_anchor=(-0.4,0.5,0.1,0.1))
plt.show()