数据仓库与数据挖掘——DBSCAN

一、基本介绍

        DBSCAN算法是基于一组邻域参数(ε,MinPts)来描述样本分布的紧密程度,相比于基于划分的聚类方法和层次聚类方法,DBSCAN算法将簇定义为密度相连的样本的最大集合,能够将密度足够高的区域划分为簇,不需要给定簇的数量,并且可以在存在噪声的空间数据集中发现任意形状的簇。

二、核心思想

        DBSCAN的簇里面可以有一个或者多个核心对象。如果只有一个核心对象,则簇里其他的非核心对象样本都在这个核心对象的ϵ-邻域里;如果有多个核心对象,则簇里的任意一个核心对象的ϵ-邻域中一定有一个其他的核心对象,否则这两个核心对象无法密度可达。这些核心对象的ϵ-邻域里所有的样本的集合组成的一个DBSCAN聚类簇。

三、原理演示

        给出样本点和邻域参数(ε,MinPts)

        选出核心对象

        将每个核心点与其直接密度可达点聚成一类

 

        最后进行簇合并,直到不产生新的簇

四、算法流程图

五、关键源码展示

1、导入数据

2、生成核心对象以及每个核心对象一次直接密度可达点形成的簇

3、簇合并

4、输出聚类情况,绘制散点图

六、拓展实验

七、完整代码与数据集

1、完整代码

import math
import matplotlib.pyplot as plt


# import numpy as np

def loadData():
    try:
        with open("DBSCAN.txt", "r") as f:
            flines = f.readlines()
            dataSet = []
            for item in flines[0:]:
                dataSet.append((float(item.strip().split(',')[0]), float(item.strip().split(',')[1])))
            print('原始数据集={}'.format(dataSet))
            # dataSet = list(set(dataSet))
            # print('去重数据集={}'.format(dataSet))
            return dataSet
    except Exception as e:
        print(e)


# 曼哈顿距离
def distance(a, b):
    dist = 0
    for i in range(len(a)):
        dist += math.fabs(float(a[i]) - float(b[i]))
    return dist


# 欧氏距离
# def distance(a, b):
#     dist = 0
#     for i in range(len(a)):
#         dist += (float(a[i]) - float(b[i])) ** 2
#     return dist ** 0.5


# 算法模型
# def DBSCAN(dataSet, e, MinPts):
#     # 核心点
#     Center = set()
#     for p in dataSet:
#         if len([i for i in dataSet if distance(p, i) <= e]) >= MinPts:
#             Center.add(p)
#
#     print(Center)
#     Data = set(dataSet)
#     clusters = []
#     k = 0  # 用来计数簇数
#     # 两个簇 合并 两个
#     while len(Center):
#         buffer = Data.copy()  # 用来寄存数据集合
#         center = list(Center)[np.random.randint(0, len(Center))]
#         P = [center]  # 用来存放以一个核心点搜集到的所有数据
#         while len(P):
#             p = P[0]  # 取出P内的一个点
#             Np = [i for i in dataSet if distance(i, p) <= e]  # 求以p为核心的所有直接密度可达的点
#             S = Data & set(Np)  # 取出交集
#             P += list(S)  # 将S中的点加入到P中
#             Data -= S  # 减去所有的已经满足条件的点
#             P.remove(p)
#         k += 1
#         Ck = list(buffer - Data)  # 以p点为核心点的密度可达的所有的点
#         Center -= set(Ck)  # 从Center中删除所有已经包含在p内的核心点
#         clusters.append(Ck)
#
#     for i in range(len(clusters)):
#         print('Cluster[{}]:{}'.format(i + 1, clusters[i]))
#
#     return clusters

def DBSCAN(dataSet, e, MinPts):
    Center = []
    ct = []
    for p in dataSet:
        if len([i for i in dataSet if distance(p, i) <= e]) >= MinPts:
            ct.append(p)
            Center.append(ct.copy())
            ct.remove(p)
    print('Center={}'.format(Center))

    Ck = []  # 每个核心点一次直接密度可达的簇
    for i in range(0, len(Center)):
        Np = [j for j in dataSet if distance(j, list(Center[i][0])) <= e]
        Ck.append(Np)
    print('Ck={}'.format(Ck))

    flag = True
    cnt = 1
    while flag:
        print('第{}轮'.format(cnt))
        # print('Center={}'.format(Center))
        for n in range(len(Center)):
            print('Center[{}]={}'.format(n + 1, Center[n]))
        for n in range(len(Ck)):
            print('Ck[{}]:{}'.format(n + 1, Ck[n]))
        i = 0
        flag = False
        while i < len(Center):
            j = i + 1
            while j < len(Center):
                # if Center[i] in Ck[j]:
                if len(set(Ck[i]) & set(Ck[j])):
                    Center[i].append(tuple(Center[j][0]))
                    # print(Center[i])
                    Ck[i] = list(set(Ck[i] + Ck[j]))
                    del Center[j]
                    del Ck[j]
                    flag = True
                    print('C{}与C{}合并:{}'.format(i + 1, j + 1, Ck[i]))
                else:
                    j += 1
            print()
            i += 1
            for n in range(len(Center)):
                print('Center[{}]={}'.format(n + 1, Center[n]))
            for n in range(len(Ck)):
                print('Ck[{}]:{}'.format(n + 1, Ck[n]))
        cnt += 1

    print('完成合并')
    # print('Center={}'.format(Center))
    # print('Ck={}'.format(Ck))
    clusters = Ck
    # for i in range(len(clusters)):
    #     print('Cluster[{}]:{}'.format(i + 1, clusters[i]))
    return clusters


# 1 2 !(1 3) (2 3) -> (1 2 3)
# 绘制散点图
def draw(Clusters):
    # colValue = ['red', 'blue', 'gray', 'black', 'purple', 'deepskyblue', 'darkblue']
    shape = ['.', 's', 'x', 'H', 'v', '^', '<', '>', '+', 'x', 'D']
    num_cluster = len(Clusters)
    for i in range(num_cluster):
        x = []
        y = []
        for j in range(len(Clusters[i])):  # 第i个簇的里面的元素
            x.append(Clusters[i][j][0])
            y.append(Clusters[i][j][1])
        plt.xlim([0.0, 100.0])
        plt.ylim([0.0, 100.0])
        # plt.xlim([0.0, 40.0])
        # plt.ylim([0.0, 30.0])
        plt.scatter(x, y, color='red', label=i, marker=shape[i])
    # plt.legend()
    plt.show()


def drawDataset(data):
    x = []
    y = []
    for xx, yy in data:
        x.append(xx)
        y.append(yy)
    plt.xlim([0.0, 100.0])
    plt.ylim([0.0, 100.0])
    # plt.xlim([0.0, 40.0])
    # plt.ylim([0.0, 30.0])
    plt.scatter(x, y)
    plt.show()


def main():
    data = loadData()
    e = 2
    MinPts = 3
    drawDataset(data)
    Clusters = DBSCAN(data, e, MinPts)
    draw(Clusters)


if __name__ == "__main__":
    main()

 2、数据集

(1)原始数据集

20,45
34,23
53,67
54,85
67,4
33,67
24,78
37,90
67,34
34,56
89,78
65,23
45,45
67,67
84,6
23,78
13,7
45,70
67,76
4,54
68,60
20,45
34,23
53,67
54,85
67,4
33,67
24,78
37,90
67,34
78,32
23,33
45,34
67,35
67,76
4,54
68,60
7,38
70,39
76,40
45,70
67,76
6,36
78,37
7,38
70,39
6,36
45,45
67,67
84,6
23,78
13,7
45,70
67,76
4,54
68,60
45,45
34,67
35,67
36,4
37,68
38,45
20,34

(2)拓展数据集

15.55,28.65
14.9,27.55
14.45,28.35
14.15,28.8
13.75,28.05
13.35,28.45
13,29.15
13.45,27.5
13.6,26.5
12.8,27.35
12.4,27.85
12.3,28.4
12.2,28.65
13.4,25.1
12.95,25.95
12.9,26.5
11.85,27
11.35,28
11.15,28.7
11.25,27.4
10.75,27.7
10.5,28.35
9.65,28.45
10.25,27.25
10.75,26.55
11.7,26.35
11.6,25.9
11.9,25.05
12.6,24.05
11.9,24.5
11.1,25.2
10.55,25.15
10.05,25.95
9.35,26.6
9.3,27.25
9.2,27.8
7.5,28.25
8.55,27.45
8.5,27.05
8.05,27.2
7.85,26.8
7.3,27.4
6.8,26.85
7,26.5
7.55,26.3
8.55,26.3
9,25.85
8.6,25.65
9.4,25.55
8.45,25.05
8.85,24.6
9.65,24.7
10.55,24.35
11.05,23.9
10.55,23.55
9.45,23.35
9.2,23.9
8.35,23.9
7.35,24.75
7.4,25.45
6.6,25.75
6.1,26
5.8,26.95
5.65,25.8
5.3,26.1
6.4,25.4
5.4,25.25
5.35,24.7
4.8,25.05
4.2,25.55
6.4,24.8
6.55,24.3
7.4,24.25
5.45,24.2
4.3,24
4,24.25
3.35,23.3
4.85,23.05
4.3,22.75
5.85,23.4
5.9,23.55
7.55,23.7
6.85,23.25
7.65,23.1
6.95,22.55
6.1,22.6
5.5,22.6
4.7,22.1
3.8,21.85
4.65,21.2
4.15,20.35
5.3,20.4
5.6,20.75
5.8,21.95
6.4,21.95
6.55,21.15
7.45,21.95
7.4,21.55
7.75,21.2
7.65,20.65
6.95,19.8
6.6,20.1
6.05,20.2
5.4,19.65
5.35,19.05
5.8,18.25
6.3,19.1
7,18.9
7.15,17.9
7.35,18.2
8.2,20.05
8.3,19.45
8.3,18.5
8.75,18.8
9.05,18.2
9.35,17.7
8.9,17.65
8.45,17.2
10.05,17.2
10.4,16.75
8.6,20.9
8.65,21.3
8.65,21.9
8.65,22.5
8.95,22.8
9.95,22.65
8.95,22.2
9.65,21.9
10.55,22.3
10.9,22.85
11.35,23.45
12.05,23.4
12.3,22.75
11.7,22.15
11.15,22.05
10.85,21.5
10.85,21.05
9.6,21.3
9.85,20.7
9.35,20.6
9.25,19.65
9.95,19.8
10.7,20.35
11.3,20.7
12.35,21.6
13.1,21.3
12.85,20.75
12,20
11,19.85
10.35,19
9.9,18.65
10.6,18.15
11.4,18.3
11.4,19.25
12.35,18.8
12.8,19.75
12.15,18.1
11.05,17.5
11.95,17.25
12.25,17.5
13.05,17.4
13.75,18.15
13.5,18.65
13.65,19.25
14,19.9
15.2,18.2
15.5,17.15
13.9,17.1
13.75,16.6
12.15,16.4
7.8,13.7
8.85,13.35
9,12.7
9.7,12.1
8.05,12.9
7.7,13.25
6.8,13.2
6.6,13.45
6.2,12.55
5.4,12.85
5.7,12.25
5.2,11.9
5.15,11.35
5.85,11.2
6.1,11.75
7,12.35
7.05,12.45
7.9,12.5
8.55,12.1
7.85,11.85
7.1,11.95
6.9,11.5
6.85,10.9
6.4,10.7
5.9,10.3
6.4,10.25
7.05,10.05
7.35,10.5
7.65,11.1
8.1,11.2
8.8,11.4
8.3,10.55
9,10.9
9.35,10.5
10.15,11
10.4,10.55
10.9,10
11.55,10.2
11.75,10.85
10.1,8.65
11.05,9.1
11.85,9.8
12.85,10.65
12.9,11.7
13.6,11.1
14.05,11.75
14.5,11.8
14.3,12.45
17,12.9
15.8,12.6
15.85,12
16.7,12.2
16.25,11.7
15.55,11.15
14.8,11.35
14.45,10.75
13.75,10.45
12.8,10.1
13.15,9.8
12.45,9.3
11.8,8.95
11.1,8.45
10.35,7.7
10.1,6.75
11.3,7.95
12.35,8.45
13.1,8.95
13.2,9.35
14.1,10.05
11.5,7.5
11.35,6.9
11.95,6.75
12.4,7.1
12.25,7.6
12.95,7.6
13.45,7.95
13.35,8.25
13.75,9
14.3,9.3
14.85,9.55
15.1,10.25
15.45,10.55
16.35,10.85
16.75,11.5
16.25,10.2
15.4,10.1
15.45,9.7
15.15,9.3
15.25,8.65
15.55,8.2
14.25,8.7
14.25,8.25
15.05,7.8
14.3,7.5
13.55,7.45
14.3,6.95
13.95,6.7
13.05,6.95
13.05,6.2
11.55,6.3
10.8,5.85
10.6,5.05
11.35,5.55
12.15,5.4
12.4,5.8
12.8,5.7
13.65,5.9
13.9,5.3
13.1,5.1
12.55,4.9
11.5,4.75
11.35,4.05
12.4,4.35
11.75,3.45
12.65,3.7
13.4,4.35
13.9,4.95
12.75,3
13.55,3.15
13.7,3.65
14.1,4.1
14.65,5.05
14.35,5.75
14.5,6.55
15.15,7.1
13.6,2.55
14.45,2.4
14.6,3.05
15,3.4
15.25,3.5
14.7,4.1
14.7,4.5
15.25,2.7
15.65,2.05
15.95,2.8
16.1,3.55
15.9,4
15.6,4.75
15.55,5.05
15.35,5.5
15.15,5.95
15.5,6.75
15.7,6.35
16.2,5.9
16.35,5.35
16.2,4.55
16.55,4.2
16.95,4.75
17.05,5.1
17.3,4.8
17.3,4.15
17.6,4.3
17.05,3.7
17.25,3.05
16.65,2.8
16.55,2.15
17.2,2.05
18.15,1.95
18.05,2.45
18.15,3.05
18.6,3.45
18.4,3.6
18.85,3.2
19.1,2.65
19.45,2.65
19,2.1
19.9,2.05
20.45,2.8
19.8,3.25
19.45,3.9
18.65,4.2
18.4,4.6
18.65,4.75
18.75,5.15
19.1,4.55
17.9,5.4
17.65,5.7
17.05,6.05
17.4,6.5
16.6,6.85
15.7,7.15
15.75,7.75
16.6,7.95
20.4,3.4
20.7,3.45
21.15,2.85
21.75,2.65
22,3.25
22.2,3.5
21.45,3.75
21.1,4.05
20.15,4.3
20.8,4.7
20.7,5.15
19.75,5.05
19.85,5.5
20.4,5.65
20.55,5.75
18.7,5.75
19.25,5.95
18.4,6
18.45,6.6
17.65,7.05
16.7,7.4
18.65,7.3
18.05,7.35
17.85,7.75
17.5,8.25
17.15,8.6
17.05,9
16.4,8.7
16.05,8.95
16.05,9.6
16.5,9.75
17.25,9.6
17.6,9.9
17.8,9.3
18,8.55
18.8,8.1
18.8,8.35
19.4,7.6
19.25,6.6
20.05,6.95
19.8,7.5
20.05,6.35
21.15,5.7
21.65,4.85
22.15,4.35
23.05,3.35
23.05,3.8
23.15,4.4
22.5,4.75
22.15,5.2
24.15,4.55
23.5,5.05
23.1,5.3
23,5.75
22.2,5.75
21.85,6.2
20.75,6.55
21,7.15
20.75,7.65
20,8.2
19.5,8.65
18.85,9.05
18.75,9.55
18.6,10
16.95,10.35
17.35,10.85
18,10.65
18.5,10.55
18.1,11.1
17.55,11.3
17.95,11.9
18.3,12
18,12.5
19,11.65
19.5,11.05
19.45,10.55
19.4,9.65
20.1,9.4
20.05,9.95
20.05,10.2
19.35,12.2
19.2,12.25
20.05,11.6
20.6,11.15
20.7,10.65
21.3,11.65
21.8,11.15
21.85,10.7
21.65,10.05
20.95,10.2
20.9,9.7
21.65,9.45
21.2,9.25
20.75,8.75
20.55,8.75
21.1,8
21.65,8.65
21.75,8.2
21.95,7.55
22,6.75
22.8,6.45
22.65,6.65
22.75,7.05
23,7.35
22.55,7.9
22.2,8.7
22.9,8.45
22.35,9.2
22.75,9.35
22.4,10.05
23.05,10.9
23.3,9.85
23.95,9.8
23.65,9.1
23.7,8.85
24.25,8.25
24.85,7.95
23.5,7.85
23.85,7.35
23.95,6.9
23.65,6.5
23.6,5.7
24.3,5.65
24.8,6.4
34.05,3.5
33.05,3.85
32,3.8
31.9,4.4
31.05,4.75
30.4,5.65
30.75,6.1
30,6.7
30.1,7.4
29.5,8.15
30.75,8
30.85,7.35
31.5,6.75
31.75,5.95
32.35,6.45
32.8,6
32.05,5.1
32.8,4.8
32.65,4.4
33.65,4.6
33.05,5.15
33.6,5.45
34.5,5.05
34.9,4.65
35.45,4.1
34.6,4.05
34.2,4.2
36.3,5.2
35.55,5.35
35.95,6.05
34.8,5.85
33.7,6.15
33.95,6.6
33.7,7.05
32.75,7.1
32.3,7.65
33,7.9
31.95,8.15
31.15,8.65
30.35,8.85
29.85,9
30.7,9.15
29.7,9.9
30.45,9.95
30.95,9.85
31.8,9.45
32.45,8.8
33.55,8.6
34.35,7.7
34.7,8
34.6,7.25
35,6.8
35.5,7.35
36.1,7.5
36.55,7
36,8.2
35.35,8.05
36.55,8.65
36.4,9.1
35.5,9.1
34.55,8.85
35.25,9.4
34.4,9.5
33.5,9.3
33.85,9.8
32.5,9.65
32.3,10.25
33.3,10.3
31.6,10.5
30.6,10.5
30.4,11.1
30.9,11.45
30.7,11.65
30.4,12.05
31.2,12
31.95,11.35
31.65,11.05
32.95,11.15
32.65,11.7
32.25,12.25
32.05,12.25
31.3,12.7
31.95,12.95
32.75,13.1
33.15,13.2
33.1,12.75
33.15,12.1
34.3,11.75
34,10.85
34.65,11
34.8,10.1
35.65,9.85
36.35,10
35.55,10.75
35.8,11.55
35.2,11.75
34.7,11.75
34.95,12.75
34.05,12.55
34.05,13.05
33.25,13.7
33.2,14.15
33.25,14.7
33,15.15
32.95,15.65
32.6,16.15
32.45,16.75
32.65,17.05
32.75,17.3
31.75,17.2
31.7,17.65
31,17.5
31.15,17.9
30.45,18.05
30.05,18.8
30.55,18.8
30.5,19.3
30.25,19.4
29.6,19.85
29.15,20.55
30.25,20.45
30.7,20.05
31,19.9
31.55,19.65
31.5,18.55
32.05,18.6
31.95,19.1
32.6,18.35
32.85,17.95
33.45,17.45
33.7,17
34.25,17.35
34.3,18.05
33.85,18.4
33.05,18.85
33.25,18.95
32.8,19.15
32.3,19.85
32.8,20
31.75,20.25
31.75,20.75
32.1,21.15
31.55,21.6
30.65,21.3
29.95,21.6
29.5,21.6
30.35,22.05
31.05,22
31.55,22.2
30.95,22.65
30.3,23.1
29.6,23.15
29.35,22.55
29.2,23.85
30.75,24
30.95,24.15
31.45,23.7
31.95,23.15
32.55,22.05
32.6,22.55
33.25,22.25
33.65,21.9
33.5,21.3
33.1,20.75
33.8,20.4
33.85,20
34.15,19.3
34.85,18.85
35.3,18.55
35.4,19.35
34.55,19.75
35.05,20
35.95,19.85
36.35,20.6
35.5,20.55
34.45,20.65
34.4,21.25
35,21.05
35.75,21.3
35.05,21.5
34.6,22.05
34.2,21.75
36.25,21.95
35.7,22.3
35.5,22.9
35.85,23.25
36.3,23.8
35.45,24.1
34.9,23.5
34.2,22.9
33.85,23.3
33.25,23.35
32.45,23.7
33.6,23.9
34.25,23.95
34.25,24.1
35.4,24.7
35.15,25.3
34.4,24.9
33.7,24.85
32.25,24.45
32.5,24.7
31.45,24.45
31.55,25.2
31.05,25
30.25,24.3
29.8,24.8
29.6,25.5
29.7,26.05
30.5,25.5
30.65,26
31.25,26.05
31.45,26.95
30.75,26.9
30.65,27.15
31.25,27.85
31.85,27.75
32.7,28.2
33.25,27.55
32.4,27.1
32.15,26.65
32.35,25.95
32.95,25.5
33.85,26.05
33.05,26.5
33.65,27
34.1,27.35
34.2,27.95
34.65,26.85
35.25,26
35.7,26.15
34.4,25.6
21.3,20.8
20.15,20.9
19.2,21.35
19.1,21.85
18.45,22.8
18.75,22.95
19.4,23
19.55,22.25
19.8,21.85
20.5,21.85
21.45,21.45
21.7,21.9
21.4,22.3
21,22.6
21.15,22.95
20.5,22.85
19.75,23.65
19.2,23.7
18.45,24.35
20.65,23.85
20.65,24.3
19.7,24.6
20.15,25.05
22.15,25.1
21.6,24.65
21.7,23.8
21.9,23.65
22.55,23.5
22.55,24.3
23.3,24.45
24.25,24.35
23.8,25.25
23.4,23.8
22.9,23.2
22.3,22.8
22.2,22.4
23.1,21.7
22.85,21.9
22.65,21.1
23.15,22.6
24.1,21.9
24.7,22.2
24.3,22.6
24.15,23.3
23.9,23.45
5.2,2.15
6.35,1.95
6.75,2.3
5.9,2.4
5.4,2.7
4.85,2.9
4.85,3.35
5.15,3.45
5.7,3.45
6.2,3
6.2,3.2
7.65,2.15
7.2,2.75
6.75,3.2
6.75,3.55
6.65,3.8
5.8,4
4.95,4.05
5.1,4.35
5.7,4.45
5.45,4.85
6.7,4.8
6.55,5.05
7.2,4.9
6.2,4.25
7.1,4.3
7.85,4.5
7.6,4.15
7.25,3.55
7.8,3.35
8.05,2.75
8.5,3.25
8.1,3.55
8.15,4

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

有为肥宅

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值