https://blog.csdn.net/zdy0_2004/article/details/72953531
https://blog.csdn.net/lishuhuakai/article/details/53980517
import numpy as np import pandas as pd from sklearn.cluster import DBSCAN import matplotlib.pyplot as plt df = pd.read_csv('E:\date\clustering.csv', usecols=[0, 2]) d = df.groupby('sjfldm')['sjbh'].apply(list) x = np.array(d) def jaccard(p, q): c = 0 for i in p: if i in q: c = c + 1 return float(1 - c / (len(p) + len(q) - c)) J = [([-1.] * len(x)) for i in range(len(x))] for i in range(0, len(x)): for j in range(0, len(x)): if i == j: J[i][j] = 0 else: if J[i][j] == -1.: J[i][j] = jaccard(x[i], x[j]) else: J[i][j] = J[j][i] S = np.array(J) print(S) db = DBSCAN(eps=0.9, min_samples=2, metric='precomputed').fit(S) labels = db.labels_ core_sample_mask1 = np.zeros_like(db.labels_, dtype=bool) core_sample_mask1[db.core_sample_indices_] = True n_clusters_ = len(np.unique(labels)) - (1 if -1 in labels else 0) print(n_clusters_) #unique_label = set(labels) #颜色 colors = [plt.get_cmap('Spectral')(each) for each in np.linspace(0, 1, len(unique_label))]