本文主要介绍的是DBSCAN.是一种著名的密度聚类算法。其原理这里就不详细介绍了,详细请看周志华西瓜书P211-P214。这里主要介绍它的python代码。
import xlrd
import numpy as np
import numpy.random as random
import math
import copy
import Queue
from random import choice
import matplotlib.pyplot as plt
from collections import Counter
class DataSet():
def __init__(self):
self.DataMat=[]
self.DataNum=0
self.FinalSub=[]
print "Creat a Class"
def ReadData(ob):
data=xlrd.open_workbook("kmeans.xlsx")
table=data.sheets()[0]
for i in range(table.nrows-1):
ob.DataMat.append(table.row_values(i+1))
ob.DataMat=np.array(ob.DataMat)
ob.DataNum=np.shape(ob.DataMat)[0]
print "Your Data Mat is \n",ob.DataMat
print "Your Data Set Num is",ob.DataNum
def DrawPicture(ob):
plt.figure('test')
Signal=['xb','xr','xg','xy','xm','xk','xw','xc']
for x in range(len(ob.FinalSub)):
for y in list(ob.FinalSub[x]):
plt.plot(ob.DataMat[y][0],ob.DataMat[y][1],Signal[x])
plt.savefig("My.jpg")
def CalculataSeed(ob,epsen,MinPts):
dist=0
epsenNum=0
epsenSet=[]
SeedSet=set()
for i in range(ob.DataNum):
epsenSet.append(set([-1]))
for j in range(ob.DataNum):
if i!=j:
dist=round(np.linalg.norm(ob.DataMat[i]-ob.DataMat[j]),3)
if dist<=epsen :
epsenNum+=1
epsenSet[i].add(j)
else :
continue
if len(epsenSet[i])>=MinPts:
SeedSet.add(i)
for i in range(len(epsenSet)):
print epsenSet[i]
print "The core seed set is ",SeedSet
k=0
NoViSet=set(range(ob.DataNum))
print "NoviSet is ",NoViSet
Olde_NoviSet=set()
Q=Queue.Queue()
randomStep=0
deta=set()
Qnum=0
while SeedSet != set() :
Olde_NoviSet=copy.deepcopy(NoViSet)
randomStep=choice(list(SeedSet))
Q.put(randomStep)
print "random seed is ",randomStep
NoViSet=NoViSet-set([randomStep])
while Q.empty() != True:
Qnum=Q.get()
if len(epsenSet[Qnum]) >= MinPts :
deta=epsenSet[Qnum]&NoViSet
NoViSet=NoViSet-deta
while deta != set():
Q.put(deta.pop())
k+=1
ob.FinalSub.append(Olde_NoviSet-NoViSet)
print "Class is", ob.FinalSub[k-1]
SeedSet=SeedSet-ob.FinalSub[k-1]
print "SeedSet is ",SeedSet
print "NoviSet is ",NoViSet
ob.FinalSub.append(NoViSet)
def main():
A=DataSet()
ReadData(A)
CalculataSeed(A,0.11,5)
DrawPicture(A)
main()
数据如下:把它存放在excel表格中。
density | sweet |
0.697 | 0.46 |
0.774 | 0.376 |
0.634 | 0.264 |
0.608 | 0.318 |
0.556 | 0.215 |
0.403 | 0.237 |
0.481 | 0.149 |
0.437 | 0.211 |
0.666 | 0.091 |
0.243 | 0.267 |
0.245 | 0.057 |
0.343 | 0.099 |
0.639 | 0.161 |
0.657 | 0.198 |
0.36 | 0.37 |
0.593 | 0.042 |
0.719 | 0.103 |
0.359 | 0.188 |
0.339 | 0.241 |
0.282 | 0.257 |
0.748 | 0.232 |
0.714 | 0.346 |
0.483 | 0.312 |
0.478 | 0.437 |
0.525 | 0.369 |
0.751 | 0.489 |
0.532 | 0.472 |
0.473 | 0.376 |
0.725 | 0.445 |
0.446 | 0.459 |
得到结果如图: