模式识别作业:Fisher, 感知机,MSE线性分类器实现

总结一下4月的模式识别作业

1.Fisher线性判别器

import numpy as np


class Fisher:
    def __init__(self):
        self.W = None
        self.b = None
        self.W_b_gather = {}


    def train(self, X,  iter=0):
        Sw = np.zeros((X[0].shape[0], X[0].shape[0]), dtype=float)
        m =[]
        for i in range(2):
            m.append(np.mean(X[i], axis=1, keepdims=1))
            X[i] = X[i] - m[i]
            Sw += np.dot(X[i], X[i].T)
        self.W = np.linalg.inv(Sw).dot(m[0] - m[1])
        self.b = -1/2 * (self.W.T.dot(m[0]) + self.W.T.dot(m[1]))[0]

        self.W_b_gather[str(iter)] = [self.W.copy().T, self.b.copy()]


    def plot(self, x1, x2):
        m1 = []
        m2 = []
        m1.append(x1)
        m2.append(-(self.b + self.W[0] * m1[0]) / self.W[1])
        m1.append(x2)
        m2.append(-(self.b + self.W[0] * m1[1]) / self.W[1])
        return m1, m2

    def predict(self, X, iter=0):
        W = self.W_b_gather[str(iter)][0]
        b = self.W_b_gather[str(iter)][1]
        y_pred = np.dot(W, X) + b
        y_pred[y_pred > 0] = 1
        y_pred[y_pred < 0] = -1
        return y_pred.T

    def predict_m(self, X, class_num):
        W = []
        b = []
        for i in range(class_num):
            W.append(self.W_b_gather[str(i)][0][0])
            b.append(self.W_b_gather[str(i)][1])
        W = np.array(W)
        b = np.array(b).reshape(-1, 1)
        scored = np.dot(W, X) + b

        y_pred = np.argmax(scored, axis=0) + 1
        return y_pred.reshape(-1, 1)



    def redimension(self, X):
        return self.W.dot(X)

2.感知机

import numpy as np

import random
import os

class Preceptron:
    #感知机一般处理二分类问题,通过点到分类超平面的距离,判断类别
    # 点到直线距离
    def __init__(self):
        self.W = None
        self.b = None

    #训练
    def train(self, X, y, learning_rate=1e-3, num_iters=1000, verbose=True, mode=1, batch_size=200, class_num=2, error_limit=1e-6):
    # verbose 表示是否迭代过程的loss
    # mode 表示使用不同的梯度下降方法 1 表示批量梯度下降 , 2表示随机梯度下降 3表示小批量随机梯度下降
        input_num, input_size = X.shape
        if class_num == 2:
           class_num -= 1
        self.W = 0.001 * np.random.randn(input_size, class_num)
        #self.W = np.array([[1e-03], [1e-03]])

        self.b = 1
        loss_history = []
        W_b_history = []
        for i in range(num_iters):
            if class_num < 3:
                loss, grads = self.Lossgrads(X, y, mode, batch_size)
            else:
                loss, grads = self.Lossgrads_m(X, y, mode, batch_size)
            loss_history.append(loss)
            W_b_history.append([self.W.copy(),  self.b])
            if loss < error_limit:
                print('iteration %d / %d: loss %f' % (i, num_iters, abs(loss)))
                break
            W = grads["W"]
            b = grads["b"]
            self.W =self.W + learning_rate * W
            self.b =self.b + learning_rate * b
            if verbose and i % 100 == 0:
                print('iteration %d / %d: loss %f' % (i, num_iters, loss))
        print("The learning-rate is ", learning_rate)
        print("The error-limit is ", error_limit)
        return loss_history, W_b_history

    # 多分类
    def Lossgrads_m(self, X, y, flag, batch_size):
        W = self.W
        b = self.b
        grads = {}
        num_train = X.shape[0]
        scores = X.dot(W) + b
        y = y-1
        correct_class_scores = scores[np.arange(num_train), y.reshape(1, -1)]
        correct_class_scores = correct_class_scores.reshape(-1, 1)

        if flag == 1:
            margins = scores - correct_class_scores
            margins[margins<0] = 0
            margins[np.arange(num_train), y.reshape(1, -1)] = 0
            loss = np.sum(margins) / num_train

            margins[margins>0] = 1
            row_num = np.sum(margins, axis=1)
            margins[np.arange(num_train), y.reshape(1, -1)] -= row_num
            grads["W"] = -np.dot(X.T, margins) / num_train
            grads["b"] = -np.sum(margins, axis=0)

        if flag == 2:
            y_pred = np.argmax(scores, axis=1)
            index = np.where(y_pred != y.reshape(1, -1))
            if len(index[0]) > 0:
                i = index[1]
                i = i[0]
                x = X[i]
            else:
                return 0, 0
            margins = scores[i] - correct_class_scores[i]
            margins[margins < 0] = 0
            margins[y[i]] = 0
            loss = np.sum(margins)
            margins[margins > 0] = 1
            row_num = np.sum(margins)
            margins[y[i]] -= row_num
            grads["W"] = -np.dot(x.reshape(-1, 1), margins.reshape(1, -1))
            grads["b"] = -margins

        return loss, grads



    # 计算损失和梯度二分类
    def Lossgrads(self, X, y, flag, batch_size):
        W = self.W
        b = self.b
        h = X.dot(W) + b
        num_train = X.shape[0]
        # 这样考虑,如果分类成功,scores应该都是大于0的。所以我们将大于0的部分设为0,意味着损失为0
        scores = y * h
        scores[scores > 0] = 0        # 将分数大于0 的则可以表示损失为0

        if flag == 1:                   #批量梯度下降
            loss = -np.sum(scores) / num_train
            if loss == 0:
                return loss, 0
            grads = self.BGD(X, y, scores)

        if flag == 2:                   #随机梯度下降
            scores = scores.reshape(1, -1)
            tup = np.nonzero(scores)
            index = tup[1]
            if len(index) == 0:
                loss = 0
                return loss, 0
            i = random.randint(0, index.size-1)
            loss = -scores[0][index[i]]
            grads = self.SGD(X, y, index[i])

        if flag == 3:                  #小样本梯度下降
            batch_scores = scores
            batch_size = 30
            batch_scores = scores[scores < 0]
            batch_scores = batch_scores.reshape(-1, 1)
            scores[scores < 0] = 1  # 因为我们只要计算有损失的部分,则不为0的可以记录为1
            X = X * scores              # 筛选样本点,只要有误差的部分
            y = y * scores
            batch_X = X[X != 0]
            batch_X = batch_X.reshape(-1, 2)
            batch_y = y[y != 0]
            batch_y = batch_y.reshape(-1, 1)
            if sum(batch_scores < 0) > batch_size:
                batch_scores = batch_scores[[0, batch_size]]
                batch_X = batch_X[[0, batch_size]]
                batch_y = batch_y[[0, batch_size]]
            loss = -np.sum(batch_scores)
            if loss == 0:
                return loss, 0
            grads = self.MBGD(batch_X, batch_y)

        grads["W"] = grads["W"].reshape(-1, 1)

        return loss, grads

    #随机梯度下降
    def SGD(self, X, y, index):
        grads = {}
        X = X[index]
        y = y[index]
        grads["W"] = X*y
        grads["b"] = y
        return grads

    # 批量梯度下降
    def BGD(self, X, y, scores):
        grads = {}
        scores[scores<0] = 1        # 因为我们只要计算有损失的部分,则不为0的可以记录为1
        X = X * scores              # 筛选样本点,只要有误差的部分
        grads["W"] = np.sum(X * (y), axis=0) / np.sum(scores == 1)
        y = y * scores
        grads["b"] = np.sum(y) / np.sum(scores==1)
        return grads

    def MBGD(self, X, y):
        grads={}
        grads["W"] = np.sum(X * (y), axis=0)
        grads["b"] = np.sum(y)
        return grads


    def predict_m(self, X):
        y_pred = np.argmax(X.dot(self.W) + self.b, axis=1)
        y_pred = y_pred+1
        return y_pred.reshape(-1, 1)


    def predict(self, X):
        y_pred = X.dot(self.W) + self.b
        y_pred[y_pred<0] = -1
        y_pred[y_pred>0] = 1

        return y_pred

    def plot(self, x1, x2):
        m1 = []
        m2 = []
        m1.append(x1)
        m2.append(-(self.b + self.W[0] * m1[0])/self.W[1])
        m1.append(x2)
        m2.append(-(self.b + self.W[0] * m1[1])/self.W[1])
        return m1, m2

3.MSE最小平方误差线性分类器

import numpy as np
import random


class MSE:
    def __init__(self):
        self.W = None
        self.W_gather = {}

    def pseudo(self, X, y, iter=0):
        X = np.c_[X, np.ones((X.shape[0], 1))]
        X = X*y
        X = np.linalg.inv(np.dot(X.T, X)).dot(X.T)     # 求伪逆矩阵
        b = np.abs(y)
        self.W = X.dot(b)
        self.W_gather[str(iter)] = self.W.copy().T

    def plot(self, x1, x2, iter=0):
        m1 = []
        m2 = []
        W = self.W_gather[str(iter)].T
        m1.append(x1)
        m2.append(-(W[0] * m1[0]+W[2])/W[1])
        m1.append(x2)
        m2.append(-(W[0] * m1[1]+W[2])/W[1])
        return m1, m2

    def predict(self, X, iter=0):
        X = np.c_[X, np.ones((X.shape[0], 1))]
        X = X.T
        W = self.W_gather[str(iter)].T
        y_pred = W.T.dot(X)
        y_pred[y_pred > 0] = 1
        y_pred[y_pred < 0] = -1
        return y_pred.T

    def predict_m(self, X, class_num):
        X = np.c_[X, np.ones((X.shape[0], 1))]
        W = []
        for i in range(class_num):
            W.append(self.W_gather[str(i)])
        W = np.array(W).reshape(-1, X.shape[1])

        scored = X.dot(W.T)

        y_pred = np.argmax(scored, axis=1) + 1
        return y_pred.T

    def train(self, X, y, learning_rate=1e-3, num_iters=10000, verbose=True, part_accuracy=True, mode=1, iter=0, error_limit=1e-7):
        # verbose 表示是否迭代过程的loss
        # part_accuracy 表示是否显示每个训练部分精度
        X_ = np.c_[X, np.ones((X.shape[0], 1))]
        input_num, input_size = X_.shape
        if self.W == None:
            self.W = 0.001 * np.random.randn(input_size, 1)
        loss_history = []
        W_b_history = []
        X_ = X_*y
        b = np.abs(y)
        for i in range(num_iters):
            loss, grads, g_ = self.Lossgrads(X_, b, mode)
            loss_history.append(loss)
            W_b_history.append([self.W.copy()])
            if g_ < error_limit:
                if verbose:
                    print('iteration %d / %d: loss %f' % (i, num_iters, abs(loss)))
                    break

            self.W =self.W - learning_rate * grads
            if verbose and i % 100 == 0:
                print('iteration %d / %d: loss %f' % (i, num_iters, loss))

        self.W_gather[str(iter)] = self.W.copy().T
        if part_accuracy:
            y_train_pred = self.predict(X, iter)
            y_train_accuracy = np.mean(y== y_train_pred)
            print("The train_part_%d accuracy is %f" % (iter+1, y_train_accuracy))
        self.W = None
        print("The learning-rate is ", learning_rate)
        print("The error-limit is ", error_limit)

        return loss_history, W_b_history




    def Lossgrads(self, X, b, mode):
        W = self.W
        h = X.dot(W) - b
        h = np.array(h, dtype=np.float64)
        loss = np.array(np.sum(np.square(h)) / X.shape[0], dtype=np.float64)
        if loss == 0:
            return loss, 0
        if mode == 1:
            grads = self.BGD(X, b)
            g_ = np.sum(np.square(grads))
        if mode == 2:
            grads = self.SGD(X, b)
            g_ = np.sum(np.square(grads))
        return loss, grads, g_


    def BGD(self, X, b):
        return X.T.dot(X.dot(self.W) - b)

    def SGD(self, X, b):
        i = random.randint(0, X.shape[0] - 1)
        grads = (X[i].dot(self.W) - b[i]) * X[i]
        return grads.reshape(-1, 1)

以上式三个线性分类器的代码和公式,代码备注不多,如果有不清楚的,可以通过564223274@qq.com与我联系,欢迎大家的批评指正。

  • 3
    点赞
  • 37
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

寰宇的行者

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值