import pandas as pd
# 读取数据
data = pd.read_excel('./演示数据.xlsx')
data.head()
# 绘制图形
import matplotlib.pyplot as plt
plt.scatter(data.iloc[:, 0], data.iloc[:, 1], c='green', marker='^')
plt.xlabel('x')
plt.ylabel('y')
plt.show()
# 使用DBSCAN算法进行聚类
from sklearn.cluster import DBSCAN
dbs = DBSCAN()
dbs.fit(data)
label_dbs = dbs.labels_
label_dbs
array([0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0,
1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1,
0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0,
0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0,
1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0], dtype=int64)
# 展示聚类后的结果
plt.scatter(data[label_dbs==0].iloc[:, 0], data[label_dbs==0].iloc[:, 1], c='red', marker='o', label='class0', s=80)
plt.scatter(data[label_dbs==1].iloc[:, 0], data[label_dbs==1].iloc[:, 1], c='green', marker='*', label='class1', s=80)
plt.legend(loc='lower right')
plt.show()
# Kmeans算法和DBSCAN算法对比
from sklearn.cluster import KMeans
kms = KMeans(n_clusters=2)
kms.fit(data)
label_kms = kms.labels_
label_kms
array([1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0,
1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
# 展示聚类后的结果
plt.scatter(data[label_kms==0].iloc[:, 0], data[label_kms==0].iloc[:, 1], c='red', marker='o', label='class0', s=80)
plt.scatter(data[label_kms==1].iloc[:, 0], data[label_kms==1].iloc[:, 1], c='green', marker='*', label='class1', s=80)
plt.legend(loc='lower right')
plt.show()