注释都在代码里了,就不细说。
聚类的数据集和提取的matlab代码会放在最后
import numpy as np
import matplotlib.pyplot as plt
import numpy.random as random
from scipy.io import loadmat
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
class readData:
def __init__(self, data_path='./data.mat'):
# 读取数据集
self.data = loadmat(data_path)
def get_data(self):
X = self.data['X']
y = []
j = 0
for i in range(len(X)):
y.append(j)
if (i + 1) % 200 == 0:
j = j + 1
y = np.array(y)
return X, y
def plotData(self):
X, y = self.get_data()
plt.scatter(X[:, 0], X[:, 1])
plt.show()
return 1
class Kmeans:
def __init__(self, data, k):
"""
初始化
:param data: 输入数据
:param k: 输入要类别数量
"""
self.data = data
self.k = k
self.pred_label = None
self.skl_pre_label = None
self.center = None
def sklearn_kmeans(self):
"""
使用sklearn中的k-means聚类
:return: sk_label
"""
skl_kmean = KMeans(self.k, random_state=1)
skl_label = skl_kmean.fit(self.data).labels_
# print(skl_label)
self.skl_pre_label = skl_label
return skl_label
def k_means(self):
"""
自己的k_means
:ret