一、实验报告
二、代码如下
(1)K-Means算法:
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import time
from pylab import *
t1_X = time.time();
#创建5个随机的数据集
x_1=append(randn(500,1)+3.6,randn(500,1)+2.5,axis=1)
x_2=append(randn(500,1)+2,randn(500,1)-3.6,axis=1)
x_3=append(randn(500,1)-0.9,randn(500,1)+3.6,axis=1)
x_4=append(randn(500,1)-5,randn(500,1)-5,axis=1)
x_5=append(randn(500,1),randn(500,1),axis=1)
# 把5个数据集合并成 (2500,2)大小的数组data
data=append(x_1,x_2,axis=0)
data=append(data,x_3,axis=0)
data=append(data,x_4,axis=0)
data=append(data,x_5,axis=0)
plot(x_1[:,0],x_1[:,1],'oc',markersize=0.8)
plot(x_2[:,0],x_2[:,1],'og',markersize=0.8)
plot(x_3[:,0],x_3[:,1],'ob',markersize=0.8)
plot(x_4[:,0],x_4[:,1],'om',markersize=0.8)
plot(x_5[:,0],x_5[:,1],'oy',markersize=0.8)
kmeans_X=KMeans(n_clusters=5,random_state=0)
t0_X = time.time();
k=kmeans_X.fit(data)
X_time = time.time() - t0_X;
print(" time is %.2f ms"%(X_time))
t=k.cluster_centers_ # 获取数据中心点
plot(t[:,0],t[:,1],'r*',markersize=16) # 显示这5个中心点,五角星标记
ans_1 = ans_2 = ans_3 = ans_4 = ans_5 = 0
label_pred=kmeans_X.labels_
for i in [1,2,3,4,5]:
ans1 = ans2 = ans3 = ans4 = ans5 = 0
if i == 1:
x = x_1
if i == 2:
x = x_2
if i == 3:
x = x_3
if i == 4:
x = x_4
if i == 5:
x = x_5
for e in x:
if(e in data[label_pred == 0]):
ans1 = ans1 + 1
if(e in data[label_pred == 1]):
ans2 = ans2 + 1
if(e in data[label_pred == 2]):
ans3 = ans3 + 1
if(e in data[label_pred == 3]):
ans4 = ans4 + 1
if(e in data[label_pred == 4]):
ans5 = ans5 + 1
ans = ans1
if ans < ans2:
ans = ans2
if ans < ans3:
ans = ans3
if ans < ans4:
ans = ans4
if ans < ans5:
ans = ans5
if i == 1:
ans_1 = ans
if i == 2:
ans_2 = ans
if i == 3:
ans_3 = ans
if i == 4:
ans_4 = ans
if i == 5:
ans_5 = ans
print((ans_1+ans_2+ans_3+ans_4+ans_5)/2500)
plt.show()
X_time = time.time() - t1_X;
print(X_time)
(2)DBSCAN算法:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.cluster import KMeans
from sklearn import datasets
from sklearn.cluster import DBSCAN
iris = datasets.load_iris()
X = iris.data[:, :4]
print(X.shape)
# 绘制数据分布图
dbscan = DBSCAN(eps=0.4, min_samples=4)
dbscan.fit_predict(X)
label_pred = dbscan.labels_
color = ["red", "blue", "green", "gray", "black"]
marker = ['o', '*', '+', ]
for i in range(max(label_pred)):
x = X[label_pred == i]
print(len(x))
plt.scatter(x[:, 2], x[:, 3], c=color[i % 5], marker=marker[i % 3], label='class' + str(i + 1) + '-' + str(len(x)))
x1 = X[label_pred == 3]
print(len(x1))
plt.scatter(x1[:, 2], x1[:, 3], c=color[-1], marker=marker[-1])
x = X[label_pred == -1]
print(len(x))
plt.scatter(x[:, 2], x[:, 3], c=color[-1], marker=marker[-1], label='noise' + '-' + str(len(x) + len(x1)))
plt.legend(loc=2)
plt.show()