DBSCAN

https://blog.csdn.net/zdy0_2004/article/details/72953531

https://blog.csdn.net/lishuhuakai/article/details/53980517

import numpy as np
import pandas as pd
from sklearn.cluster import DBSCAN
import matplotlib.pyplot as plt

df = pd.read_csv('E:\date\clustering.csv', usecols=[0, 2])

d = df.groupby('sjfldm')['sjbh'].apply(list)
x = np.array(d)


def jaccard(p, q):
    c = 0
    for i in p:
        if i in q:
            c = c + 1
    return float(1 - c / (len(p) + len(q) - c))


J = [([-1.] * len(x)) for i in range(len(x))]
for i in range(0, len(x)):
    for j in range(0, len(x)):
        if i == j:
            J[i][j] = 0
        else:
            if J[i][j] == -1.:
                J[i][j] = jaccard(x[i], x[j])
            else:
                J[i][j] = J[j][i]
S = np.array(J)
print(S)

db = DBSCAN(eps=0.9, min_samples=2, metric='precomputed').fit(S)
labels = db.labels_
core_sample_mask1 = np.zeros_like(db.labels_, dtype=bool)
core_sample_mask1[db.core_sample_indices_] = True

n_clusters_ = len(np.unique(labels)) - (1 if -1 in labels else 0)
print(n_clusters_)

#unique_label = set(labels)
#颜色 colors = [plt.get_cmap('Spectral')(each) for each in np.linspace(0, 1, len(unique_label))]
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值