import numpy as np
from sklearn.cluster import DBSCAN
from sklearn import metrics
from sklearn.datasets.samples_generator import make_blobs
from sklearn.preprocessing import StandardScaler
# #############################################################################
# Generate sample data
centers = [[1, 1], [-1, -1], [1, -1]]
X, labels_true = make_blobs(n_samples=750, centers=centers, cluster_std=0.4,
random_state=0)
X = StandardScaler().fit_transform(X)
# #############################################################################
# Compute DBSCAN
db = DBSCAN(eps=0.3, min_samples=10).fit(X)
core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
labels = db.labels_
# Number of clusters in labels, ignoring noise if present.
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
print('Estimated number of clusters: %d' %
DBSCAN算法实现
最新推荐文章于 2022-10-17 21:43:57 发布
该博客演示了如何使用sklearn库中的DBSCAN算法对样本数据进行聚类。首先,通过make_blobs生成了750个样本点,然后进行标准化处理。接着,应用DBSCAN算法,设置eps为0.3,min_samples为10,计算得到聚类结果。博客还展示了聚类的评估指标,包括Homogeneity、Completeness、V-measure、Adjusted Rand Index、Adjusted Mutual Information和Silhouette Coefficient。最后,使用matplotlib可视化了聚类结果。
摘要由CSDN通过智能技术生成