import numpy as np
import pandas as pd
from collections import OrderedDict
from sklearn import datasets
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from scipy.spatial.distance import pdist,squareform
from collections import Counter
data = np.array(pd.read_csv(r"D:\OCdata\automobile.csv"))
X = data[:, :-1]
scaler = StandardScaler()
X = scaler.fit_transform(X)
y = data[:, -1]
dist_matrix = squareform(pdist(X,metric='euclidean'))
zero_count = 0
for i in range(dist_matrix.shape[0]):
for j in range(dist_matrix.shape[1]):
if dist_matrix[i,j] == 0.0:
zero_count += 1
print(X.shape[0]," ",zero_count )
查看数据集中是否有重复样本
最新推荐文章于 2024-01-16 13:39:13 发布