PLA感知机

PLA是一个数据二分类的算法,要确保我们的数据是线性可分的,简单点说就是可以通过一条线将数据集一分为二,我们的目的就是要找到能将这些数据一分为二的这条线。

第一步,随机找一个点,连接到原点,看这条线是否能把两堆数据分开,如果不能,则旋转这条线(更新权重), w t + 1 = w t + x t ∗ y t w_{t+1} = w_t + x_t*y_t wt+1=wt+xtyt, x t x_t xt是第t轮x的向量,而 y t y_t yt为正1或者负1。

更新权重后,我们的线条就旋转了一个角度,再根据这条线计算有没有分错类的点,不断重复直到没有点被分错。

在PLA算法中,并不是总能找到划分线,停下来的时间与fn函数有关,所以我在原始算法上做了一些改进,保证不会长时间旋转停。包括容忍部分噪音点,最大迭代数,以及在进行权重更新时可以通过外传参数控制每次旋转的角度。

直接贴代码:

#!/usr/local/bin/python3
# -*- coding: UTF-8 -*-

import numpy as np


class PLA:

    """
    Perceptron Linear Algorithm

    :param noise *int* Maximum amount of noise tolerated
    :param lr *float* learn parameters update rule # Wt+1→Wt+ηyn(t)xn(t) with η=lr
    ----------------------------
    Usage:
    cls = PLA(noise=0, lr=1, max_iter=2000)
    or cls = PocketPLA(noise=0, lr=1, max_iter=2000)
    y_pred = cls.predict(x, y)
    acc_rate = cls.accuracy(y, y_pred)
    """

    def __init__(self, noise=0, lr=1, max_iter=None):
        self.noise = noise
        self.lr = lr
        if max_iter:
            self.max_iter = max_iter
        else:
            self.max_iter = np.inf

    def get_score(self, x, w):
        return np.dot(x, w)

    def accuracy(self, y, y_pred):
        return np.mean(y_pred == y)

    def _weight_update(self, x, y, w):
        return w + self.lr*np.dot(x, y)

    def predict(self, x, y, w=0):
        x = x - np.mean(x)
        if type(w) == int:
            if w == 0:
                w = self.fit(x, y, normalize=False)
        y_pred = np.dot(x, w)
        y_pred[y_pred > 0] = 1
        y_pred[y_pred <= 0] = -1
        return y_pred

    def fit(self, x, y, normalize=True):
        if normalize:
            x = x - np.mean(x)
        permutation = np.random.permutation(x.shape[0])
        x = x[permutation]
        y = y[permutation]
        # x = x - np.mean(x)
        # cycle number
        halt = 0
        # init W
        w = np.zeros(x.shape[1])
        count_no = np.inf
        while count_no > self.noise:
            halt += 1
            print("Run cycle %d" % halt)
            scores = []
            for i in range(0, x.shape[0]):
                score = self.get_score(x[i], w)
                scores.append(score*y[i])

            scores = [1 if i > 0 else -1 for i in scores]
            count_no = scores.count(-1)

            if count_no <= self.noise:
                break
            elif count_no != 0:
                _idx = scores.index(-1)
                w = self._weight_update(x[_idx].T, y[_idx], w)
            else:
                break

            if halt >= self.max_iter:
                break

        return w

即使这样,可能拿到的结果并不是训练中最好的结果,所有后来有了Pocket PLA算法,顾名思义,就是如果有分类效果比较好的线,就先把这条线放到口袋的保存起来,

最后输出的总是训练结果最优的那条线,但是算法更慢,因为每轮迭代都会与之前最好的线比较。

from copy import deepcopy


class PocketPLA(PLA):

    """
    collect best line in pocket and return weight

    """

    """
    Modify init if needed
    
    def __init__(self, noise=0, lr=1, max_iter=None):

        super(PocketPLA, self).__init__(noise=0, lr=1, max_iter=None)
        self.noise = noise
        self.lr = lr
        if max_iter:
            self.max_iter = max_iter
        else:
            self.max_iter = np.inf
    """

    def fit(self, x, y, normalize=True):
        if normalize:
            x = x - np.mean(x)
        permutation = np.random.permutation(x.shape[0])
        x = x[permutation]
        y = y[permutation]
        # x = x - np.mean(x)
        # cycle number
        halt = 0
        # init W
        w = np.zeros(x.shape[1])
        w_best = np.zeros(x.shape[1])
        count_best = 0

        count_no = np.inf
        while count_no > self.noise:
            halt += 1
            print("Run cycle %d" % halt)
            scores = []
            for i in range(0, x.shape[0]):
                score = self.get_score(x[i], w)
                scores.append(score*y[i])

            scores = [1 if i > 0 else -1 for i in scores]

            count_yes = scores.count(1)
            if count_yes != 0:
                if scores.count(1) > count_best:
                    count_best = scores.count(1)
                    w_best = deepcopy(w)

            count_no = scores.count(-1)

            if count_no <= self.noise:
                break
            elif count_no != 0:
                _idx = scores.index(-1)
                w = self._weight_update(x[_idx].T, y[_idx], w)
            else:
                break

            if halt >= self.max_iter:
                break
        return w_best

这样就可以提前把我们认为效果比较好的线拿在手上,训练结束后直接扔出来,这样就能保证在有限的训练迭代数中拿到最佳的那条线。

最后提供一个测试实例:

if __name__ == "__main__":
    def boxmullersampling(mu=0, sigma=1, size=1):
        u = np.random.uniform(size=size)
        v = np.random.uniform(size=size)
        z = np.sqrt(-2 * np.log(u)) * np.cos(2 * np.pi * v)
        return mu + z * sigma

    x1 = boxmullersampling(1.5, 0.1, 100)
    x1 = [[x, x+1, x*2] for x in list(x1)]
    y1 = [-1 for x in range(0, 100)]
    x2 = boxmullersampling(2, 0.2, 100)
    x2 = [[x, x+2, x*3] for x in list(x2)]
    y2 = [1 for x in range(0, 100)]
    train_x = np.array(x1+x2)
    train_y = np.array(y1+y2)

    cls = PocketPLA(noise=0, lr=1, max_iter=10000)

    y_pred = cls.predict(train_x, train_y)
    # _my_w = cls.fit(train_x, train_y, normalize=True)
    # y_pred = cls.predict(train_x, train_y, _my_w)
    acc_rate = cls.accuracy(train_y, y_pred)
    print(train_y)
    print(y_pred)
    print(acc_rate)

测试结果(对于线性可分的数据还是能很快找到分割线):

Run cycle 1
Run cycle 2
Run cycle 3
Run cycle 4
[-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1 -1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1]
[-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.]
1.0

Process finished with exit code 0
  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值