问题描述
Python实现DASCAN聚类。
解题方法
周志华老师的《机器学习》一书中详细说明了算法的过程,这里就不再赘述,密度聚类的好坏与参数设置有很大的关系。
# coding:UTF-8
import numpy as np
import random
import math
import copy
import scipy.io as sio
import matplotlib.pyplot as plt
import time
class Ind():
def __init__(self):
self.x = 0
self.y = 0
self.Core_True = False
self.neighboor = []
self.num_neighboor = 0
self.color = 0
matfn = u'C:/Users/MR_LEE/Desktop/课程文件/数据挖掘/第四次作业/data-密度聚类/square4.mat'
temp = sio.loadmat(matfn)
print(temp.keys())
#print(temp)
data = temp['b']
Length = data.shape[0]
radius = 2.5
MinPts = 115
distance_matirx = np.zeros([Length, Length])
D = list([Ind() for _ in range(Length)])
Core = []
for i in range(0, Length, 1):
for j in range(i + 1, Length, 1):
distance_matirx[i, j] = math.sqrt(pow(data[i, 0] - data[j, 0], 2) + pow(data[i, 1] - data[j, 1], 2))
distance_matirx[j, i] = distance_matirx[i, j]
D[i].x = data[i, 0]
D[i].y = data[i, 1]
for j in range(0, Length, 1):
if distance_matirx[i, j] < radius and i != j:
D[i].num_neighboor += 1
D[i].neighboor.append(j)
if D[i].num_neighboor >= MinPts:
Core.append(i)
print('数据的长度为%d, 核心数为%d'% (Length, len(Core)))
k = 0
T = list([i for i in range(Length)])
C = []
while len(Core) > 0:
Told = copy.deepcopy(T)
o = Core[random.randint(0, len(Core) - 1)]
Q = copy.deepcopy(D[o].neighboor)
T = list(set(T) - set(Q)) #[ele for ele in T if ele not in Q] #temp = list(set(T) - set(Q))
while len(Q) > 0:
q = Q[0]
del Q[0]
if D[q].num_neighboor >= MinPts:
temp = [ele for ele in D[q].neighboor if ele in T]
Q += [ele for ele in temp if ele not in Q]
T = [ele for ele in T if ele not in temp]
Ck = [ele for ele in Told if ele not in T]
C.append(Ck)
Core = [ele for ele in Core if ele not in Ck]
for i in range(len(C)):
for j in range(len(C[i])):
D[C[i][j]].color = i + 1
color_list = ['r', 'b', 'g', 'y', 'c', 'm']
label_list = ['.', 'o', 'v', '1', '*']
plt.figure()
for i in range(Length):
plt.plot(D[i].x, D[i].y, color = color_list[D[i].color], marker = label_list[D[i].color], markersize = 4)
plt.show()
付图