# -*- coding: utf-8 -*-
import numpy as np
from ExpandCluster import Expand_Cluster
from SearchRecord import ClusterSum
UNCLASSIFIED = False
NOISE = 0
def LoadDataSet(fileName, splitChar='\t'):
"""
输入:文件名
输出:数据集
描述:从文件读入数据集
"""
# 请在此添加实现代码 #
#********** Begin *********#
#从txt文件中读取数据
dataSet = []
with open(fileName) as fr:
for line in fr.readlines():
curline = line.strip().split(splitChar)
fltline = list(map(float, curline))
dataSet.append(fltline)
return dataSet
#********** End ***********#
def DBScan(data, eps, minPts):
"""
输入:数据集, 半径大小, 最小点个数
输出:分类簇id
"""
# 请在此添加实现代码 #
#********** Begin *********#
#调用Expand_Cluster函数,开始聚类
clusterId = 1
nPoints = data.shape[1]
clusterResult = [UNCLASSIFIED] * nPoints
for pointId in range(nPoints):
point = data[:, pointId]
if clusterResult[pointId] == UNCLASSIFIED:
if Expand_Cluster(data, clusterResult, pointId, clusterId, eps, minPts):
clusterId = clusterId + 1
return clusterResult, clusterId - 1
#********** End ***********#
def main():
dataSet = LoadDataSet('DBScan/DataPoints.txt', splitChar=',')
dataSet = np.mat(dataSet).transpose()
clusters, clusterNum = DBScan(dataSet, 2, 15)
print("cluster Numbers = ", clusterNum)
clustersum = ClusterSum(clusters, clusterNum)
print(clustersum)
if __name__ == '__main__':
main()