最近在看林轩台老师的机器学习基石,到他的coursa上找到了课后作业,下面是他布置作业中关于PLA的代码,由于是用linux系统完成的,我自己接触linux还没几天,所以没有用csv格式的数据,直接复制到txt文件中,所以预处理可能不太一样。
数据下载地址:https://d396qusza40orc.cloudfront.net/ntumlone%2Fhw1%2Fhw1_15_train.dat
import numpy as np
#将txt文件输出为输入矩阵和输出矩阵
def load_data(filename):
fn=open(filename)
X_matrix=[]
Y_matrix=[]
for line in fn.readlines():
line = line.strip('\n')
line = line.split('\t')
Y_cut = line[1]
Y_cut = [int(Y_cut)]
X_cut = line[0].split(' ')
X_cut.append('1')
X_cut = [float(x) for x in X_cut]
X_matrix.append(X_cut)
Y_matrix.append((Y_cut))
X_matrix = np.matrix(X_matrix)
Y_matrix = np.matrix(Y_matrix)
return X_matrix,Y_matrix
#定义sign函数
def sign(x):
if x>0:
return 1
else:
return -1
#感知机优化过程
def perceptron(X,Y):
totel_rows = X.shape[0]
totel_cols = X.shape[1]
W = np.zeros([1,totel_cols])
W = np.matrix(W)
updata_times = 0
while True:
Loss = 0
for i in range(totel_rows):
l = sign(np.dot(X[i],W.T).sum())
if l != Y[i,0]:
W = W + Y[i]*X[i]
updata_times +=1
Loss +=1
if Loss == 0:
break
print(W ,updata_times)
return 0
if __name__ == '__main__':
X,Y= load_data('PLA')
perceptron(X,Y)
最后跑出来是更新45次,改变W的初始值可能会使次数变化但在30-50之间。