数据生成与PLA、POCKET算法训练。
a)产生两个都具有200个二维向量的数据集X_1和X_2。
数据集X_1的样本来自均值向量〖m_1=[-5,0]〗^T 、协方差矩阵s_1=I的正态分布,属于“+1”类,
数据集X_2的样本来自均值向量〖m_2=[0,5]〗^T、协方差矩阵s_2=I的正态分布,属于“-1”类,
其中I是一个2*2的单位矩阵。产生的数据中80%用于训练,20%用于测试
b)分别运用PLA算法和Pocket算法,利用产生的训练样本集得到分类面,算法中用到的各类超参数自定。
c)分别在训练集和测试集上统计分类正确率。
d)分别统计两个算法的运行时间
e)画出数据集和分类面。
#encoding:utf-8
'''
@ author: Darcy
'''
import numpy as np
from numpy import *
import random
import math
import matplotlib.pyplot as plt
from matplotlib import style
import operator
import sklearn.linear_model
def Data_Creat():
mean = (1, 0)
cov = [[1, 0], [0, 1]]
x_1 = np.random.multivariate_normal(mean, cov, 200)
mean = (0, 1)
x_2 = np.random.multivariate_normal(mean, cov, 200)
# 增广
b = []
for i in range(0, 200):
b.append(1)
x1 = np.column_stack((b, x_1))
x2 = np.column_stack((b, x_2))
input_data = np.vstack((x1[:160], x2[:160]))
test_data = np.vstack((x1[160:200], x2[160:200]))
y = []
y_test = []
w = np.zeros(3)
for i in range (0, 160):
y.append(1)
for i in range(160, 320):
y.append(-1)
for i in range (0, 40):
y_test.append(1)
for i in range(0, 40):
y_test.append(-1)
return input_data, y, w, test_data, y_test
def pic_data():
mean = (1, 0)
cov = [[1, 0], [0, 1]]
x_1 = np.random.multivariate_normal(mean, cov, 200)
mean = (0, 1)
x_2 = np.random.multivariate_normal(mean, cov, 200)
W_best = my_pocket()
x = arange(-5.0, 5.0, 0.1)
y = W_best[0]+ W_best[1] * x
plt.plot(x, y, color="red", linewidth=1.0, linestyle="-") # 将100个散点连在一起
plt.scatter(x_1[1:160, 0], x_1[1:160, 1], s = 160)
plt.scatter(x_2[1:160, 0], x_2[1:160, 1], s = 160)
plt.show()
style.use('ggplot')
def my_pla():
X, y, W, _, _ = Data_Creat()
count = 0
while 1:
count += 1
flag = True
for i in range(0, len(X)):
Y = np.dot(W, X[i])
if sign(Y) == sign(y[i]):
continue
else:
flag = False
W = W + (np.dot(y[i], X[i]))
if flag == True:
break
print("final W is:", W)
print("number of cycles is: ", count)
x = arange(-5.0, 5.0, 0.1)
y = W[0] + W[1] * x + W[2] * x
plt.plot(x, y, color="red", linewidth=1.0, linestyle="-") # 将100个散点连在一起
plt.show()
return W
def my_pocket():
time = 100
W_best = np.zeros(3)
X, Y, W, _, _ = Data_Creat()
count = 0
flag = False
while flag == False:
mistakes = pock_classify(W, X, Y)
if ( len(mistakes) == 0):
print("最终训练得到的W为:", W_best)
break
num_upt = mistakes[random.randint(0, len(mistakes) - 1)]
W = W + (np.dot(Y[num_upt], X[num_upt]))
if (len(pock_classify(W, X, Y)) < len(pock_classify(W_best, X, Y))):
W_best = W
count = count + 1
print("第", count, "次更新, 选取X为:", X[num_upt], "此时W为:", W_best, "错误分类点个数为:", len(pock_classify(W_best, X, Y)))
if count == time:
print("最终得到的W为:", W_best, "错误分类个数为:", len(pock_classify(W_best, X, Y)))
flag = True
return W_best
def pock_classify(W, X, Y):
mistakes = []
for i in range(0, len(X)):
y = np.dot(W, X[i])
if sign(y) == sign(Y[i]):
continue
else:
mistakes.append(i)
return mistakes
def data_test():
true_count = 0
_, _, _, X, Y = Data_Creat()
w = my_pocket() # my_pla()
# print(X)
# print(Y)
for i in range(0, 80):
y = np.dot(w, X[i])
if sign(y) == Y[i]:
true_count += 1
rate = true_count/80
return rate
def main():
pic_data()
rate = data_test()
print(rate)
if __name__ == '__main__':
main()
运行结果: