多个分类器分类make_circles及其可视化
- 题目
使用线性回归、逻辑回归、支持向量机、KNN、朴素贝叶斯对make_circles生成的数据进行分类,并输出分类精度。 - 代码实现
import numpy as np
from sklearn.datasets import make_circles
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from scipy.optimize import linear_sum_assignment
def cluster_acc(y_true, y_pred):
y_true = np.array(y_true).astype(np.int64)
assert y_pred.size == y_true.size
D = max(y_pred.max(), y_true.max()) + 1
w = np.zeros((D, D), dtype=np.int64)
for i in range(y_pred.size):
w[y_pred[i], y_true[i]] += 1
ind = linear_sum_assignment(w.max() - w)
ind = np.asarray(ind)
ind = np.transpose(ind)
return sum([w[i, j] for i, j in ind]) * 1.0 / y_pred.size
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
fig = plt.figure(1)
fig.subplots_adjust(bottom=0,top=1,left=0,right=1,hspace=0.3)
x1, y1 = make_circles(n_samples=400, factor=0.3, noise=0.1)
plt.subplot(2,3,1)
plt.title('原始数据')
plt.scatter(x1[:, 0], x1[:, 1], marker='o', c=y1,s=10)
knn = KNeighborsClassifier()
knn.fit(x1, y1)
label_sample = knn.predict(x1)
knn_acc=accuracy_score(y1, label_sample)
print("knn","=",knn_acc)
plt.subplot(2,3,2)
plt.title('knn(acc='+str(knn_acc)+')')
plt.scatter(x1[:, 0], x1[:, 1], marker='o', c=label_sample,s=10)
gnb = GaussianNB()
gnb.fit(x1,y1)
label_sample = gnb.predict(x1)
gnb_acc = accuracy_score(y1, label_sample)
print("gnb","=", gnb_acc)
plt.subplot(2,3,3)
plt.title('NaiveBayes(acc='+str(gnb_acc)+')')
plt.scatter(x1[:, 0], x1[:, 1], marker='o', c=label_sample,s=10)
lr = LinearRegression()
lr.fit(x1,y1)
label_sample = lr.predict(x1)
label_sample = np.round(label_sample)
label_sample=label_sample.astype(np.int64)
lr_acc = cluster_acc(y1, label_sample)
print('lr', "=", lr_acc)
plt.subplot(2,3,4)
plt.title('LinearRegression(acc='+str(lr_acc)+')')
plt.scatter(x1[:, 0], x1[:, 1], marker='o', c=label_sample,s=10)
log_reg = LogisticRegression()
log_reg.fit(x1,y1)
label_sample = log_reg.predict(x1)
lsr_acc = cluster_acc(y1, label_sample)
print("lsr", "=", lsr_acc)
plt.subplot(2,3,5)
plt.title('Logistic(acc='+str(lsr_acc)+')')
plt.scatter(x1[:, 0], x1[:, 1], marker='o', c=label_sample,s=10)
scaler = StandardScaler()
x1_= scaler.fit_transform(x1, y1)
svc = SVC()
svc.fit(x1_,y1)
label_sample = svc.predict(x1_)
svc_acc = cluster_acc(y1, label_sample)
print("svc", "=", svc_acc)
plt.subplot(2,3,6)
plt.title('SVM(acc='+str(svc_acc)+')')
plt.scatter(x1_[:, 0], x1_[:, 1], marker='o', c=label_sample,s=10)
plt.show()
- 实验结果