运行一个聚类算法; 要求: 1) 数据自选,例如:iris 2) 修改给出的代码,运行得到结果,例如修改类别、修改算法参数、更换聚类算法等;3)打印出评价指标;
K-means实验代码
import matplotlib.pyplot as plt
import numpy as np
from sklearn.cluster
import KMeansfrom sklearn
import datasets
from sklearn.metrics import fowlkes_mallows_scoreimport sklearn
iris = datasets.load_iris()
X = iris.data[:, :4] # #表示我们取特征空间中的4个维度
estimator = KMeans(n_clusters=3) # 构造聚类器estimator.fit(X) # 聚类
label_pred = estimator.labels_ # 获取聚类标签
# DBI的值最小是0,值越小,代表聚类效果越好
FMI =fowlkes_mallows_score(iris.data[:,-1], label_pred)
print("FM指数(K-means) :", FMI) # 绘制k-means结果
x0 = X[label_pred == 0]
x1 = X[label_pred == 1]
x2 = X[label_pred == 2]
plt.scatter(x0[:, 0], x0[:, 1], c="red", marker='o', label='label_0')
plt.scatter(x1[:, 0], x1[:, 1], c="green", marker='*', label='label_1')
plt.scatter(x2[:, 0], x2[:, 1], c="blue", marker='+', label='label_2')
plt.xlabel('sepal length')
plt.ylabel('sepal width')plt.legend(loc=2)
plt.show()
AGNES实验代码
from sklearn import datasetsfrom sklearn.cluster
import AgglomerativeClustering
import matplotlib.pyplot as plt
from sklearn.metrics import fowlkes_mallows_score
iris = datasets.load_iris()
irisdata = iris.data
clustering = Agglomerative
Clustering(linkage='ward', n_clusters=3)res = clustering.fit(irisdata) # FMI的值越大,代表聚类效果越好
FMI =fowlkes_mallows_score(iris.data[:,-1],clustering.labels_)
print("FM指数(AGNES) :", FMI)
plt.figure()
d0 = irisdata[clustering.labels_ == 0]
plt.plot(d0[:, 0], d0[:, 1], 'r.')
d1 = irisdata[clustering.labels_ == 1]
plt.plot(d1[:, 0], d1[:, 1], 'go')
d2 = irisdata[clustering.labels_ == 2]
plt.plot(d2[:, 0], d2[:, 1], 'b*')
plt.xlabel("Sepal.Length")
plt.ylabel("Sepal.Width")
plt.title("AGNES Clustering")plt.show()
实验结果
DBSCAN的实验代码
import matplotlib.pyplot as plt
import numpy as np
from sklearn.cluster import KMeans
from sklearn import datasets
from sklearn.cluster import DBSCAN
from sklearn.metrics import fowlkes_mallows_score
iris = datasets.load_iris()
X = iris.data[:, :4]
# #表示我们只取特征空间中的4个维度
dbscan = DBSCAN(eps=0.4, min_samples=9)
dbscan.fit(X)
label_pred = dbscan.labels_ # FMI的值越大,代表聚类效果越好
FMI =fowlkes_mallows_score(iris.data[:,-1],label_pred)
print("FM指数(DBSCAN) :", FMI)
x0 = X[label_pred == 0]
x1 = X[label_pred == 1]
x2 = X[label_pred == 2]
plt.scatter(x0[:, 0], x0[:, 1], c="red", marker='o', label='label0')
plt.scatter(x1[:, 0], x1[:, 1], c="green", marker='*', label='label1')
plt.scatter(x2[:, 0], x2[:, 1], c="blue", marker='+', label='label2')
plt.xlabel("Sepal.Length")
plt.ylabel("Sepal.Width")plt.title("DBSCAN")
plt.legend(loc=2)
plt.show()
总结:我们使用鸢尾花数据集,通过运用三个聚类算法的比较,通过指标FMI 来评测实验结果得优劣,从实验结果看出,在鸢尾花数据集中,K-means的表现最好。