二维样本数据和标签回归及LDA代码实现
1、先上效果图:
纠正一下横纵坐标,横坐标为x1,纵坐标为x2,这是样本与标签回归的二维结果图。
2、LDA结果图:
图中的是投影直线和分类面
3、上代码:
样本通过高斯分布生成的随机数,标签为+1,-1.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import MultipleLocator
from sklearn import model_selection
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn import metrics
def get_train_data(data_size=100):
data_label=np.zeros((2*data_size,1))
#class 1
x1=np.reshape(np.random.normal(1,0.3,data_size),(data_size,1))
y1=np.reshape(np.random.normal(1,0.5,data_size),(data_size,1))
data_train=np.concatenate((x1,y1),axis=1)
data_label[0:data_size,:]=1
#class 2
x2=np.reshape(np.random.normal(-1,0.3,data_size),(data_size,1))
y2=np.reshape(np.random.normal(-1,0.5,data_size),(data_size,1))
data_train=np.concatenate((data_train,np.concatenate((x2,y2),axis=1)),axis=0)
data_label[data_size:2*data_size,:]=-1
return data_train,data_label
def get_test_data(data_size=10):
testdata_label=np.zeros((2*data_size,1))
#class 1
x1=np.reshape(np.random.normal(1,0.3,data_size),(data_size,1))
y1=np.reshape(np.random.normal(1,0.5,data_size),(data_size,1))
data_test=np.concatenate((x1,y1),axis=1)
testdata_label[0:data_size,:]=1
#class 2
x2=np.reshape(np.random.normal(-1,0.3,data_size),(data_size,1))
y2=np.reshape(np.random.normal(-1,0.5,data_size),(data_size,1))
data_test=np.concatenate((data_test,np.concatenate((x2,y2),axis=1)),axis=0)
testdata_label[data_size:2*data_size,:]=-1
return data_test,testdata_label
#linear regression
def LR(data,label):
datalen=len(data)
y=label
ones=np.ones((trainlen,1))
x=np.concatenate((data,ones),axis=1)
if np.linalg.det(np.dot(x.T,x)) == 0:
print('the matrix is singular, cannot do inverse')
else:
w=np.dot(np.dot(np.linalg.inv(np.dot(x.T,x)),x.T),y)
return w
#square error
def ressError(y, yHat):
n=len(y)
sum1=np.dot((y-yHat).T,(y-yHat))
error=sum1/n
return error
#LDA
def LDA(data,label):
length=len(data)
x1=data[:int(length/2),:]
x2=data[int(length/2):,:]
mu1=np.mean(x1,axis=0)
mu2=np.mean(x2,axis=0)
sw=np.dot((x1-mu1).T,(x1-mu1))+np.dot((x2-mu2).T,(x2-mu2))
w=np.dot(np.linalg.inv(sw),(mu1-mu2))
return w
##########main##########
data_train,data_label=get_train_data()
data_test,testdata_label=get_test_data()
trainlen=len(data_train)
testlen=len(data_test)
##########LDA#########
#classification surface
lda_model = LinearDiscriminantAnalysis(solver='lsqr', shrinkage=None).fit(data_train, data_label)
label_pred = lda_model.predict(data_test)
print(metrics.confusion_matrix(testdata_label, label_pred))
print(metrics.classification_report(testdata_label, label_pred))
h = 0.01
x0, x1 = np.meshgrid(np.arange(-3, 3, h),np.arange(-3, 3, h))
z = lda_model.predict(np.c_[x0.ravel(), x1.ravel()])
z = z.reshape(x0.shape)
plt.contourf(x0, x1, z)
#projection line
w=LDA(data_train,data_label)
ax1=plt.subplot(1,1,1)
ax1.scatter(data_train[:int(trainlen/2),0],data_train[:int(trainlen/2),1],s=15,c='r',label='class1')
ax1.scatter(data_train[int(trainlen/2):trainlen,0],data_train[int(trainlen/2):trainlen,1],s=15,c='y',label='class2')
ax1.scatter(data_test[:int(testlen/2),0],data_test[:int(testlen/2),1],c='b',marker='*',label='test_data')
ax1.scatter(data_test[int(testlen/2):testlen,0],data_test[int(testlen/2):testlen,1],c='b',marker='*')
xmin = -data_train[:, 0].max()
ymin = ( w[1] / w[0] ) * xmin
xmax= data_train[:, 0].max()
ymax = ( w[1] / w[0] ) * xmax
#set LAD axis parameters
ax1.set_xlabel('x1')
ax1.set_ylabel('x2')
x_major_locator=MultipleLocator(0.5)
y_major_locator=MultipleLocator(0.5)
plt.plot([xmin, xmax], [ymin, ymax],label='project line')
plt.legend(loc='upper left')
plt.show()
#draw train_data
ax=plt.subplot(1,1,1)
ax.scatter(data_train[:int(trainlen/2),0],data_train[:int(trainlen/2),1],s=15,c='r',label='class1')
ax.scatter(data_train[int(trainlen/2):trainlen,0],data_train[int(trainlen/2):trainlen,1],s=15,c='y',label='class2')
#draw test_data
ax.scatter(data_test[:int(testlen/2),0],data_test[:int(testlen/2),1],c='b',marker='*',label='test_data')
ax.scatter(data_test[int(testlen/2):testlen,0],data_test[int(testlen/2):testlen,1],c='b',marker='*')
# LR:train
what=LR(data_train,data_label)
# yhat=(what[1]/what[0])*data_train[:,0]
yhat=-(what[0]*data_train[:,0]+what[2])/what[1]
#set regress axis parameters
ax.set_xlabel('x')
ax.set_ylabel('y')
x_major_locator=MultipleLocator(0.5)
y_major_locator=MultipleLocator(0.5)
plt.legend(loc='upper left')
ax.plot(data_train[:,0],yhat,c='g')
#test error
teones=np.ones((testlen,1))
data_test1=np.concatenate((data_test,teones),axis=1)
yHat = np.dot(data_test1,what)
print("square error:",ressError(testdata_label, yHat))
plt.show()
如果有帮助就留下个赞吧!AOA