PLA是一个数据二分类的算法,要确保我们的数据是线性可分的,简单点说就是可以通过一条线将数据集一分为二,我们的目的就是要找到能将这些数据一分为二的这条线。
第一步,随机找一个点,连接到原点,看这条线是否能把两堆数据分开,如果不能,则旋转这条线(更新权重), w t + 1 = w t + x t ∗ y t w_{t+1} = w_t + x_t*y_t wt+1=wt+xt∗yt, x t x_t xt是第t轮x的向量,而 y t y_t yt为正1或者负1。
更新权重后,我们的线条就旋转了一个角度,再根据这条线计算有没有分错类的点,不断重复直到没有点被分错。
在PLA算法中,并不是总能找到划分线,停下来的时间与fn函数有关,所以我在原始算法上做了一些改进,保证不会长时间旋转停。包括容忍部分噪音点,最大迭代数,以及在进行权重更新时可以通过外传参数控制每次旋转的角度。
直接贴代码:
#!/usr/local/bin/python3
# -*- coding: UTF-8 -*-
import numpy as np
class PLA:
"""
Perceptron Linear Algorithm
:param noise *int* Maximum amount of noise tolerated
:param lr *float* learn parameters update rule # Wt+1→Wt+ηyn(t)xn(t) with η=lr
----------------------------
Usage:
cls = PLA(noise=0, lr=1, max_iter=2000)
or cls = PocketPLA(noise=0, lr=1, max_iter=2000)
y_pred = cls.predict(x, y)
acc_rate = cls.accuracy(y, y_pred)
"""
def __init__(self, noise=0, lr=1, max_iter=None):
self.noise = noise
self.lr = lr
if max_iter:
self.max_iter = max_iter
else:
self.max_iter = np.inf
def get_score(self, x, w):
return np.dot(x, w)
def accuracy(self, y, y_pred):
return np.mean(y_pred == y)
def _weight_update(self, x, y, w):
return w + self.lr*np.dot(x, y)
def predict(self, x, y, w=0):
x = x - np.mean(x)
if type(w) == int:
if w == 0:
w = self.fit(x, y, normalize=False)
y_pred = np.dot(x, w)
y_pred[y_pred > 0] = 1
y_pred[y_pred <= 0] = -1
return y_pred
def fit(self, x, y, normalize=True):
if normalize:
x = x - np.mean(x)
permutation = np.random.permutation(x.shape[0])
x = x[permutation]
y = y[permutation]
# x = x - np.mean(x)
# cycle number
halt = 0
# init W
w = np.zeros(x.shape[1])
count_no = np.inf
while count_no > self.noise:
halt += 1
print("Run cycle %d" % halt)
scores = []
for i in range(0, x.shape[0]):
score = self.get_score(x[i], w)
scores.append(score*y[i])
scores = [1 if i > 0 else -1 for i in scores]
count_no = scores.count(-1)
if count_no <= self.noise:
break
elif count_no != 0:
_idx = scores.index(-1)
w = self._weight_update(x[_idx].T, y[_idx], w)
else:
break
if halt >= self.max_iter:
break
return w
即使这样,可能拿到的结果并不是训练中最好的结果,所有后来有了Pocket PLA算法,顾名思义,就是如果有分类效果比较好的线,就先把这条线放到口袋的保存起来,
最后输出的总是训练结果最优的那条线,但是算法更慢,因为每轮迭代都会与之前最好的线比较。
from copy import deepcopy
class PocketPLA(PLA):
"""
collect best line in pocket and return weight
"""
"""
Modify init if needed
def __init__(self, noise=0, lr=1, max_iter=None):
super(PocketPLA, self).__init__(noise=0, lr=1, max_iter=None)
self.noise = noise
self.lr = lr
if max_iter:
self.max_iter = max_iter
else:
self.max_iter = np.inf
"""
def fit(self, x, y, normalize=True):
if normalize:
x = x - np.mean(x)
permutation = np.random.permutation(x.shape[0])
x = x[permutation]
y = y[permutation]
# x = x - np.mean(x)
# cycle number
halt = 0
# init W
w = np.zeros(x.shape[1])
w_best = np.zeros(x.shape[1])
count_best = 0
count_no = np.inf
while count_no > self.noise:
halt += 1
print("Run cycle %d" % halt)
scores = []
for i in range(0, x.shape[0]):
score = self.get_score(x[i], w)
scores.append(score*y[i])
scores = [1 if i > 0 else -1 for i in scores]
count_yes = scores.count(1)
if count_yes != 0:
if scores.count(1) > count_best:
count_best = scores.count(1)
w_best = deepcopy(w)
count_no = scores.count(-1)
if count_no <= self.noise:
break
elif count_no != 0:
_idx = scores.index(-1)
w = self._weight_update(x[_idx].T, y[_idx], w)
else:
break
if halt >= self.max_iter:
break
return w_best
这样就可以提前把我们认为效果比较好的线拿在手上,训练结束后直接扔出来,这样就能保证在有限的训练迭代数中拿到最佳的那条线。
最后提供一个测试实例:
if __name__ == "__main__":
def boxmullersampling(mu=0, sigma=1, size=1):
u = np.random.uniform(size=size)
v = np.random.uniform(size=size)
z = np.sqrt(-2 * np.log(u)) * np.cos(2 * np.pi * v)
return mu + z * sigma
x1 = boxmullersampling(1.5, 0.1, 100)
x1 = [[x, x+1, x*2] for x in list(x1)]
y1 = [-1 for x in range(0, 100)]
x2 = boxmullersampling(2, 0.2, 100)
x2 = [[x, x+2, x*3] for x in list(x2)]
y2 = [1 for x in range(0, 100)]
train_x = np.array(x1+x2)
train_y = np.array(y1+y2)
cls = PocketPLA(noise=0, lr=1, max_iter=10000)
y_pred = cls.predict(train_x, train_y)
# _my_w = cls.fit(train_x, train_y, normalize=True)
# y_pred = cls.predict(train_x, train_y, _my_w)
acc_rate = cls.accuracy(train_y, y_pred)
print(train_y)
print(y_pred)
print(acc_rate)
测试结果(对于线性可分的数据还是能很快找到分割线):
Run cycle 1
Run cycle 2
Run cycle 3
Run cycle 4
[-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
-1 -1 -1 -1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1]
[-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1.]
1.0
Process finished with exit code 0