针对二分类问题的对数几率模型
以下代码片内容为周志华著《机器学习》习题3.3的程序(关于二分类问题的对数几率模型)。
# 周志华,机器学习,习题3.3,对数几率分类
# 导入库和自编函数
from FunctionsMySelf import Newton
import sympy as sp
import numpy as np
import matplotlib.pyplot as plt
# 存入训练集
AttrSet = np.matrix([[0.697,0.460,1],[0.774,0.376,1],[0.634,0.264,1],[0.608,0.318,1],
[0.556,0.215,1],[0.403,0.237,1],[0.481,0.149,1],[0.437,0.211,1],
[0.666,0.091,1],[0.243,0.267,1],[0.245,0.057,1],[0.343,0.099,1],
[0.639,0.161,1],[0.657,0.198,1],[0.360,0.370,1],[0.593,0.042,1],[0.719,0.103,1]]).T
FlagSet = np.matrix(np.concatenate((np.ones(8),np.zeros(9)))).T
NumSam = FlagSet.shape[0]
# 构造对数几率回归的目标函数
X1,X2,X3,Y = sp.symbols('X1 X2 X3 Y')
Beta,Y = np.matrix([[X1],[X2],[X3]]),0*X1
for m in range(NumSam):
Mid = np.dot(Beta.T,AttrSet[:,m])
Y = Y - FlagSet[m,0]*Mid[0,0] + sp.log(1+sp.exp(1)**(Mid[0,0]))
# 求解对数几率回归的目标函数
FucArray = np.matrix([[X1],[X2],[X3],[Y]])
ErrSet = 1e-14
TimesSet = 1e2
XCurr = np.matrix([[np.random.random() for m in range(1)] for n in range(FucArray.shape[0]-1)])
BetaCal = Newton(FucArray,ErrSet,TimesSet,XCurr)
# 观察习得模型的准确性
plt.close('all')
plt.figure(1)
IndexGood,IndexBad = [],[]
for m in range(NumSam):
IndexGood.append(m) if FlagSet[m,0]==1 else IndexBad.append(m)
plt.scatter(np.array(AttrSet[0,IndexGood]).reshape(len(IndexGood),order='C'),np.array(AttrSet[1,IndexGood]).reshape(len(IndexGood),order='C'),marker='o',color='k',label='EsGood')
plt.scatter(np.array(AttrSet[0,IndexBad]).reshape(len(IndexBad),order='C'),np.array(AttrSet[1,IndexBad]).reshape(len(IndexBad),order='C'),marker='o',color='r',label='EsBad')
plt.xlabel('Density')
plt.ylabel('Sugar')
plt.legend(loc='upper left')
plt.title('Exercise Set')
plt.figure(2)
IndexGood,IndexBad = [],[]
for m in range(NumSam):
IndexGood.append(m) if np.dot(BetaCal.T,AttrSet[:,m])>0 else IndexBad.append(m)
plt.scatter(np.array(AttrSet[0,IndexGood]).reshape(len(IndexGood),order='C'),np.array(AttrSet[1,IndexGood]).reshape(len(IndexGood),order='C'),marker='o',color='k',label='EsGood')
plt.scatter(np.array(AttrSet[0,IndexBad]).reshape(len(IndexBad),order='C'),np.array(AttrSet[1,IndexBad]).reshape(len(IndexBad),order='C'),marker='o',color='r',label='EsBad')
plt.xlabel('Density')
plt.ylabel('Sugar')
plt.legend(loc='upper left')
plt.title('Es Result')
plt.show()
# 牛顿法函数
def Newton(FucArray,ErrSet,TimesSet,XCurr):
# FucArray为自变量和因变量组成的(NumX+1)*1的符号矩阵,最后一个元素为因变量,NumX为自变量的个数
# ErrSet为函数导数模值的允许误差范围
# TimesSet为牛顿法迭代的最大次数
# XCurr为牛顿法的初始点,是一个NumX*1的矩阵
import sympy
import numpy
NumX = FucArray.shape[0]-1
Diff1 = numpy.matrix([[sympy.diff(FucArray[NumX,0],FucArray[n,0],1)] for n in range(NumX)])
Diff2 = numpy.matrix([[sympy.diff(Diff1[n,0],FucArray[m,0],1) for m in range(NumX)] for n in range(NumX)])
NumDiff1 = numpy.matrix([[0.0 for m in range(1)] for n in range(NumX)])
NumDiff2 = numpy.matrix([[0.0 for m in range(NumX)] for n in range(NumX)])
Times = 0
while True:
for n in range(NumX):
NumDiff1[n,0] = Diff1[n,0].subs([(FucArray[nn,0],XCurr[nn,0]) for nn in range(NumX)])
for m in range(NumX):
NumDiff2[n,m] = Diff2[n,m].subs([(FucArray[nn,0],XCurr[nn,0]) for nn in range(NumX)])
if numpy.linalg.norm(NumDiff1)
TimesSet:
break
Times = Times + 1
XCurr = XCurr - numpy.dot(NumDiff2.I,NumDiff1)
print('Times = ',Times)
print('XCurr = ',XCurr)
return XCurr