Fisher进行两分类试验

最新推荐文章于 2023-10-08 18:50:12 发布

Vince_Cheng

最新推荐文章于 2023-10-08 18:50:12 发布

阅读量576

点赞数

分类专栏：机器学习

本文链接：https://blog.csdn.net/Vince_Cheng/article/details/103439101

版权

机器学习专栏收录该内容

4 篇文章

订阅专栏

我选的是sonar数据集进行试验

导入数据

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# 导入sonar.all-data数据集
sonar = pd.read_csv('sonar.all-data', header=None, sep=',')
sonar

数据格式如下：

数据处理

# 数据处理
sonar1 = sonar.iloc[0:208,0:60]
sonar2 = np.mat(sonar1) # 创建矩阵

定义Fisher函数

def LDA(X1,X2,n):
        # 计算两类样本的类均值向量
    m1 = (np.mean(X1, axis=0))  # 按行计算的均值
    m2 = (np.mean(X2, axis=0))
    # 将行向量转换为列向量以便于计算
    m1 = m1.reshape(n,1)
    m2 = m2.reshape(n,1)
    
    # 计算类内离散度矩阵
    S1 = np.zeros((n,n))
    S2 = np.zeros((n,n))
    for i in range(0,97):
        S1 += (X1[i].reshape(n,1)-m1).dot((X1[i].reshape(n,1)-m1).T)
    for i in range(0,110):
        S2 += (X2[i].reshape(n,1)-m2).dot((X2[i].reshape(n,1)-m2).T)
            
    # 计算总类内离散矩阵S_w
    S_w = S1 + S2
    # 计算最优投影方向W
    W = np.linalg.inv(S_w).dot(m1-m2)   # linalg.inv()矩阵求逆
    
    return W

def Class(X,W):
    y = (W.T).dot(X)
    return y

进行试验

G1 = np.ones(98)
G2 = np.ones(110)
p1 = sonar2[0:97, 0:37]
p2 = sonar2[97:208, 0:37]
W = LDA(p1,p2,37)

for i in range(207):
    if i<= 96:
        test = p1[i]
        test = test.reshape(37,1)
        G1[i] = Class(test,W)
    else:
        test = p2[i-97]
        test = test.reshape(37,1)
        G2[i-97] = Class(test,W)

y1 = np.zeros(98)
y2 = np.zeros(110)+0.2
plt.figure(1)
plt.ylim((-0.5,0.5))
plt.xlim((-0.1,0.1))
plt.scatter(G1,y1,c='red',alpha=1,marker='.',label='G1')
plt.scatter(G2,y2,c='k',alpha=1,marker='.',label='G2')
plt.legend()
plt.show()

输出如下：
在这里插入图片描述
我们发现其分类效果不是太好，我便参考Mr_Lowbee的资料进行测试。
加了留一法。
代码如下：

def Fisher(X1,X2,n,c):
    # 计算两类样本的类均值向量
    m1 = (np.mean(X1, axis=0))  # 按行计算的均值
    m2 = (np.mean(X2, axis=0))
    # 将行向量转换为列向量以便于计算
    m1 = m1.reshape(n,1)
    m2 = m2.reshape(n,1)
    
    # 计算类内离散度矩阵
    S1 = np.zeros((n,n))
    S2 = np.zeros((n,n))
    if c == 0:
        for i in range(0,96):
            S1 += (X1[i].reshape(n,1)-m1).dot((X1[i].reshape(n,1)-m1).T)
        for i in range(0,111):
            S2 += (X2[i].reshape(n,1)-m2).dot((X2[i].reshape(n,1)-m2).T)
    if c == 1:
        for i in range(0,97):
            S1 += (X1[i].reshape(n,1)-m1).dot((X1[i].reshape(n,1)-m1).T)
        for i in range(0,110):
            S2 += (X2[i].reshape(n,1)-m2).dot((X2[i].reshape(n,1)-m2).T)
            
    # 计算总类内离散矩阵S_w
    S_w = S1 + S2
    # 计算最优投影方向W
    W = np.linalg.inv(S_w).dot(m1-m2)   # linalg.inv()矩阵求逆
    # 在投影后的一维空间求两类的均值
    m_1 = (W.T).dot(m1)
    m_2 = (W.T).dot(m2)
    
    # 计算分类阈值W0(为一个列向量)
    W0 = -0.5*(m_1 + m_2)
    
    return W,W0
    
def Classify(X,W,W0):
    y = (W.T).dot(X) + W0
    return y

# 观察图像发现当取维度n=37时，准确率与60一样，考虑计算量就选取n=37
G1 = np.ones(98)
G2 = np.ones(110)
p1 = sonar2[0:97, 0:37]
p2 = sonar2[97:208, 0:37]
count = 0
for i in range(207):
    if i <= 96:
        test = p1[i]
        test = test.reshape(37,1)
        train = np.delete(p1,i,axis=0)
        W,W0 = Fisher(train,p2,37,0)
        if (Classify(test,W,W0)) >= 0:
            count += 1
            G1[i] = Classify(test,W,W0)
    else:
        test = p2[i-97]
        test = test.reshape(37,1)
        train = np.delete(p2,i-97,axis=0)
        W,W0 = Fisher(p1,train,37,1)
        if (Classify(test,W,W0)) < 0:
            count += 1
            G2[i-97] = Classify(test,W,W0)

y1 = np.zeros(98)
y2 = np.zeros(110)+0.2
plt.figure(1)
plt.ylim((-0.5,0.5))
plt.xlim((-0.01,0.01))
plt.scatter(G1,y1,c='red',alpha=1,marker='.',label='G1')
plt.scatter(G2,y2,c='k',alpha=1,marker='.',label='G2')
plt.legend()
plt.show()