用make_blobs函数生成类似聚类数据,然后用k临近去预测
import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets.samples_generator import make_blobs
from sklearn.neighbors import KNeighborsClassifier
# 通过make_blobs生成类似聚类数据
center = [[1, 1], [-1, 1], [1, -1], [-1,-1]]
x, y = make_blobs(n_samples = 100, centers=center, random_state=0, cluster_std=0.4)
#print(x,y)
#print(type(x)) #numpy.ndarray
# k临近训练
k = 5
#默认为加权平均
knn_class = KNeighborsClassifier(n_neighbors = k)
knn_class.fit(x, y)
# 预测
x_sample = np.array([[0.5, 0.6], [-0.5,-0.6]])
y_sample = knn_class.predict(x_sample)
print(y_sample)
#x_neighbors = knn_class.kneighbors(x_sample, return_distance=True)
#print(x_neighbors[0])
neighbors = knn_class.kneighbors(x_sample, return_distance=False)
print(neighbors)
#看评分
print(knn_class.score(x, y))
#数据可视化
plt.figure(figsize=(10,10))
center_np = np.array(center)
#样本
plt.scatter(x[:, 0], x[:, 1], c=y, s=50)
#对应集群中心点
plt.scatter(center_np[:, 0], center_np[:, 1], s=100, marker='^', c='r')
# 待预测的点 其颜色为预测的类别
plt.scatter(x_sample[:, 0], x_sample[:, 1], c=y_sample, marker="x", s=120)
for j in range(len(neighbors)):
for i in neighbors[j]:
# 预测点与距离最近的 5 个样本的连线
plt.plot([x[i][0], x_sample[j][0]], [x[i][1], x_sample[j][1]], 'k--', linewidth=0.6)
结果如下