FDA算法原理和python实现

FDA原理

这里写图片描述
这里写图片描述

FDA代码

此次的数据来源为二进制文件t10k-images-idx3-ubyte,t10k-labels-idx1-ubyte,train-images-idx3-ubyte,train-labels-idx1-ubyte,分别为测试集数据、 测试集标签、 训练集数
据、训练集标签。读入数据之后,对数据进行标准化。代码如下:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mnist import MNIST
import sys

def normalization(x):
    [n,m] = np.shape(x)
    mean = np.mean(x,axis = 1)   #row mean
    mean_mat = np.array([mean]*m).T
    std = np.std(x,axis = 1)     #row std
    std_mat = np.array([std]*m).T
    return (x-mean_mat)/std_mat

def load_mnist(only_binary = True,y_1 = 6,y_2 = 8):
    sys.path.append('E:\\2016.11\work\PRML')
    mndata = MNIST('E:\\2016.11\work\PRML')
    [x_train,y_train] = mndata.load_training()   #get train data
    [x_test,y_test] = mndata.load_testing()   #get test data
    [n_train,m_train] = np.shape(x_train)
    [n_test,m_test] = np.shape(x_test)

    if only_binary:
        x_train = [x_train[i] for i in xrange(n_train) if (y_train[i] == y_1) or (y_train[i] == y_2)]
        y_train = [y_train[i] for i in xrange(n_train) if (y_train[i] == y_1) or (y_train[i] == y_2)]
        x_test = [x_test[i] for i in xrange(n_test) if (y_test[i] == y_1) or (y_test[i] == y_2)]
        y_test = [y_test[i] for i in xrange(n_test) if (y_test[i] == y_1) or (y_test[i] == y_2)]

    x_train = normalization(x_train)
    x_test = normalization(x_test)
    print('data-reading success')
    print('x_train:',np.size(x_train))
    return x_train,y_train,x_test,y_test

print('reading data.....')
x_train,y_train,x_test,y_test = load_mnist()

基于Fisher判别法的模型训练
得到数据集 x1 , x2 , y1 , y2 之后,则可以代入公式(13),从而算出 w ,代入公式(13),从
而算出w0 ,代码如下

import numpy as np
from numpy.linalg import pinv
def FDA_train(x_1,x_2):
    n_1, d_1 = np.shape(x_1)
    n_2, d_2 = np.shape(x_2)
    m_1 = np.mean(x_1,axis = 0) 
    m_2 = np.mean(x_2,axis = 0)

    S_1 = np.zeros((d_1,d_1))
    for i in range(n_1):
        S_1 += np.mat(x_1[i] - m_1).T * np.mat(x_1[i] - m_1)

    S_2 = np.zeros((d_2,d_2))
    for i in range(n_2):
        S_2 += np.mat(x_2[i] - m_2).T * np.mat(x_2[i] - m_2)

    S_w = S_1 + S_2

    w_star = pinv(S_w) * np.mat(m_1 - m_2).T

    y_1 = np.mat(x_1) * w_star
    y_2 = np.mat(x_2) * w_star
    m_1_tilde = y_1.sum() / float(n_1)
    m_2_tilde = y_2.sum() / float(n_2)
    w_0 = -(m_1_tilde + m_2_tilde) / float(2)
    return w_star,w_0

def FDA_test(x_test, w_star, w_0):
    y_proj = x_test * w_star
    y_1 = y_proj >= w_0
    y_2 = y_proj < w_0
    y_pred = np.hstack((y_1,y_2)).astype(int)
    return y_pred

对于上述算出的 w , w0 ,根据公式(11),对输入数据计算之后,再进行one-of-K(one hot encoding)处理,最后
通过公式(14)计算错误率。

import numpy as np
from Load import load_mnist
from FDA import FDA_test,FDA_train
y_1 = 6;y_2 = 8
x_train, y_train, x_test, y_test = load_mnist(only_binary=True,y_1=y_1,y_2=y_2)

x_train_0 = np.array([x_train[i] for i in xrange(len(y_train)) if y_train[i] == y_1])
x_train_1= np.array([x_train[i] for i in xrange(len(y_train)) if y_train[i] == y_2])

w_star, w_0 = FDA_train(x_train_0, x_train_1)
y_pred = FDA_test(x_test, w_star, w_0)

y_0 = (np.mat(y_test) == y_1).astype(int).T
y_1 = (np.mat(y_test) == y_2).astype(int).T
y_test = np.hstack((y_0,y_1))

error_rate = abs(y_pred - y_test).sum() / (2 * len(y_test))
print("error rate is: %.4f" % error_rate)
  • 2
    点赞
  • 22
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值