针对二分类问题的线性判别分析模型
以下代码片内容为周志华著《机器学习》习题3.5的程序(关于二分类问题的线性判别分析模型)。
# 周志华,机器学习,习题3.5,线性判别分类
# 导入库
import numpy as np
import matplotlib.pyplot as plt
# 存入训练集
AttrSet = np.matrix([[0.697,0.460],[0.774,0.376],[0.634,0.264],[0.608,0.318],
[0.556,0.215],[0.403,0.237],[0.481,0.149],[0.437,0.211],
[0.666,0.091],[0.243,0.267],[0.245,0.057],[0.343,0.099],
[0.639,0.161],[0.657,0.198],[0.360,0.370],[0.593,0.042],[0.719,0.103]]).T
FlagSet = np.matrix([np.concatenate((np.ones(8),np.zeros(9)))]).T
# 训练集分类
NumAttr = AttrSet.shape[0]
NumSam = AttrSet.shape[1]
AttrSetGood,AttrSetBad = np.matrix([[] for m in range(NumAttr)]),np.matrix([[] for m in range(NumAttr)])
for m in range(NumSam):
if FlagSet[m,0]==1:
AttrSetGood = np.c_[AttrSetGood,AttrSet[:,m]]
else:
AttrSetBad = np.c_[AttrSetBad,AttrSet[:,m]]
# 求解线性判别分类的系数
UGood,UBad = np.mean(AttrSetGood,1),np.mean(AttrSetBad,1)
# np.cov(A)函数将A的每一个列向量当做随机变量列向量的样本,计算其协方差矩阵,需要注意的是计算结果除了样本的个数-1
SW = np.cov(AttrSetGood)*(AttrSetGood.shape[1]-1) + np.cov(AttrSetBad)*(AttrSetBad.shape[1]-1)
WBest = np.linalg.pinv(SW)*(UGood-UBad)
# 观察习得模型的准确性
plt.close('all')
plt.figure(1)
plt.scatter(np.array(AttrSetGood[0,:]).reshape(AttrSetGood.shape[1],order='C'),np.array(AttrSetGood[1,:]).reshape(AttrSetGood.shape[1],order='C'),marker='o',color='k',label='EsGood')
plt.scatter(np.array(AttrSetBad[0,:]).reshape(AttrSetBad.shape[1],order='C'),np.array(AttrSetBad[1,:]).reshape(AttrSetBad.shape[1],order='C'),marker='o',color='r',label='EsBad')
plt.xlabel('Density')
plt.ylabel('Sugar')
plt.legend(loc='upper left')
plt.title('Exercise Set')
plt.figure(2)
IndexGood,IndexBad = [],[]
for m in range(NumSam):
IndexGood.append(m) if WBest.T*(AttrSet[:,m]-UGood)*(AttrSet[:,m].T-UGood.T)*WBest