模型原型
class sklearn.discriminant_analysis.LinearDiscriminantAnalysis(solver=’svd’,
shrinkage=None,priors=None,n_components=None,
store_covariance=False,tol=0.0001)
参数
solver:指定求解最优化问题的算法
- ’svd’:奇异值分解(大规模特征的数据)
- ‘lsqr’:最小平方差算法,可以结合shrinkage参数
- ‘eigen’:特征值分解算法,可以结合shrinkage参数
- shrinkage:当solver=’lsqr’或’eigen’时才有意义
- ’auto’:根据Ledoit-Wolf引理来自动决定shrinkage参数
- None:不使用shrinkage参数
- 浮点数(0~1):指定shrinkage参数
- priors:数组的元素依次指定了每个类别的先验概率(None代表每个类的先验概率都是相等的)
- n_components:指定数据降维后的维度
- store_covariance:是否需要计算每个类别的协方差矩阵
- tol
属性
- coef_
- intercept_
- covariance_:每个类别的协方差矩阵
- means_:每个类别的均值向量
- xbar_:整个样本的均值向量
- niter
方法
- fit(X,y[,sample_weight])
- predict(X)
- predict_log_proba(X)
- predict_proba(X)
- score(X,y[,sample_weight])
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets,linear_model,discriminant_analysis,cross_validation
加载数据
def load_data():
iris=datasets.load_iris()
X_train=iris.data
y_train=iris.target
return cross_validation.train_test_split(X_train,y_train,test_size=0.25,random_state=0,stratify=y_train)
使用LinearDiscriminantAnalysis
def test_LinearDiscriminantAnalysis(*data):
X_train,X_test,y_train,y_test=data
lda=discriminant_analysis.LinearDiscriminantAnalysis()
lda.fit(X_train,y_train)
print('Coefficients:%s,\nintercept %s'%(lda.coef_,lda.intercept_))
print('Score:%.2f'%lda.score(X_test,y_test))
X_train,X_test,y_train,y_test=load_data()
test_LinearDiscriminantAnalysis(X_train,X_test,y_train,y_test)
LDA降维后数据集分布图
def plot_LDA(converted_X,y):
from mpl_toolkits.mplot3d import Axes3D
fig=plt.figure()
ax=Axes3D(fig)
colors='rgb'
markers='o*s'
for target,color,marker in zip([0,1,2],colors,markers):
pos=(y==target).ravel()
X=converted_X[pos,:]
ax.scatter(X[:,0],X[:,1],X[:,2],color=color,
marker=marker,label="Label %d"%target)
ax.legend(loc="best")
fig.suptitle('Iris After LDA')
plt.show()
X_train,X_test,y_train,y_test=load_data()
X=np.vstack((X_train,X_test))
Y=np.vstack((y_train.reshape(y_train.size,1),y_test.reshape(y_test.size,1)))
lda=discriminant_analysis.LinearDiscriminantAnalysis()
lda.fit(X,Y)
converted_X=np.dot(X,np.transpose(lda.coef_))+lda.intercept_
plot_LDA(converted_X,Y)
solver对预测能力的影响
def test_LinearDiscriminantAnalysis_solver(*data):
X_train,X_test,y_train,y_test=data
solvers=['svd','lsqr','eigen']
for solver in solvers:
if(solver=='svd'):
lda=
discriminant_analysis.LinearDiscriminantAnalysis(solver=solver)
else:
lda=discriminant_analysis.LinearDiscriminantAnalysis
(solver=solver,shrinkage=None)
lda.fit(X_train,y_train)
print('Score at solver=%s:%.2f'%(solver,lda.score(X_test,y_test)))
X_train,X_test,y_train,y_test=load_data()
test_LinearDiscriminantAnalysis_solver(X_train,X_test,y_train,y_test)
shrinkage对预测能力的影响
def test_LinearDiscriminantAnalysis_shrinkage(*data):
X_train,X_test,y_train,y_test=data
shrinkages=np.linspace(0.0,1.0,num=20)
scores=[]
for shrinkage in shrinkages:
lda=discriminant_analysis.LinearDiscriminantAnalysis(solver='lsqr',
shrinkage=shrinkage)
lda.fit(X_train,y_train)
scores.append(lda.score(X_train,y_train))
#绘图
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
ax.plot(shrinkages,scores)
ax.set_xlabel(r'shrinkage')
ax.set_ylabel(r'score')
ax.set_ylim(0,1.05)
ax.set_title('LinearDiscriminantAnalysis')
plt.show()
X_train,X_test,y_train,y_test=load_data()
test_LinearDiscriminantAnalysis_shrinkage(X_train,X_test,y_train,y_test)