import numpy as np
import pickle
# 从文件中读取对象
with open('clusters.pkl', 'rb') as f:
clusters = pickle.load(f)
data_list = []
for index_data, datas in enumerate(clusters):
dk = []
for i, data_cluster in enumerate(datas):
for d in data_cluster.data:
dk.append(d)
data_list.append(dk)
def DBSCAN(points, eps, min_points):
"""
DBSCAN算法实现
:param points: 数据点数组,每行表示一个数据点
:param eps: 半径
:param min_points: 最小点数
:return: 聚类标签数组
"""
# 初始化聚类标签数组,所有初始值为-1,表示未分类
labels = [-1] * len(points)
# 初始化点的核心性数组
core_points = np.zeros(len(points), dtype=bool)
# 计算每个点的核心性
for i in range(len(points)):
# 计算以当前点为圆心,eps为半径的圆形区域内的点数
count = np.sum(np.linalg.norm(points - points[i], axis=1) <= eps)
# 如果点的个数大于等于min_points,则该点为核心点
if count >= min_points:
core_points[i] = True
# 标记聚类号
cluster_id = 0
# 开始聚类
for i in range(len(points)):
# 如果当前点已有聚类标签,则跳过
if labels[i] != -1:
continue
# 如果当前点为核心点,则从当前点开始扩展聚类
if core_points[i]:
labels[i] = cluster_id
# 扩展当前点的聚类
expand_cluster(points, labels, core_points, i, cluster_id, eps, min_points)
# 聚类号加1
cluster_id += 1
return labels
def expand_cluster(points, labels, core_points, point_id, cluster_id, eps, min_points):
"""
扩展当前点的聚类
:param points: 数据点数组,每行表示一个数据点
:param labels: 聚类标签数组
:param core_points: 点的核心性数组
:param point_id: 当前点的索引
:param cluster_id: 当前聚类的标签
:param eps: 半径
:param min_points: 最小点数
:return: None
"""
# 找到以当前点为圆心,eps为半径的圆形区域内的所有点
neighbor_ids = np.where(np.linalg.norm(points - points[point_id], axis=1) <= eps)[0]
# 如果当前点不是核心点,则只标记该点为当前聚类
if not core_points[point_id]:
labels[point_id] = cluster_id
return
# 将当前点及其邻居点都标记为当前聚类
for i in neighbor_ids:
if labels[i] == -1:
labels[i] = cluster_id
# 如果邻居点也是核心点,则继续扩展聚类
if core_points[i]:
expand_cluster(points, labels, core_points, i, cluster_id, eps, min_points)
data_value = np.array(data_list[0])
l = DBSCAN(data_value, 0.5, 5)
import matplotlib.pyplot as plt
colors = ['red', 'blue', 'green', 'orange', 'purple','black']
for i in range(len(l)):
if l[i] != -1:
plt.scatter(data_value[i][0], data_value[i][1], c=colors[l[i]])
plt.show()
DBSCAN 基于密度的聚类算法
最新推荐文章于 2024-08-04 13:06:27 发布