题目
题目分析
考察线性回归的知识,可以采用最小二乘法、正规方程、以及建立线性回归模型
基础知识
线性回归模型
模型使用的准则是LMS准则,即最小均方误差准则
代码实现
未加正则化
import numpy as np
import matplotlib.pyplot as plt
#准备数据
##注意数据格式:矩阵的维数一定要看好
data_x=[[1,0],[1,1],[1,2],[1,3],[1,4],[1,5],[1,6],[1,7],[1,8],[1,9],[1,10],[1,11],[1,12],[1,13]] #进行数据归一化
data_y=[[2.0],[2.5],[2.9],[3.147],[4.515],[4.903],[5.365],[5.704],[6.853],[7.971],[8.561],[10],[11.280],[12.9]]
#计算当前权重下代价函数的值
def cost(xMat,weights,yMat):
m,n=xMat.shape
hypothesis=np.dot(xMat,weights)#预测值
cost=(1/(2*m))*np.sum(np.square(hypothesis-yMat))
return cost
#采用批处理梯度下降算法
def Training(data_x, data_y, alpha=0.01, maxepochs=3000, epslion=0.0000000001):
xMat=np.mat(data_x)#构造矩阵
yMat=np.mat(data_y)
m,n=xMat.shape
weights=np.zeros((n,1)) # 模型参数n*1的矩阵
epochs_count=0
loss_list=[]
while epochs_count<maxepochs:
loss=cost(xMat,weights,yMat)#上次的损失值
hypothesis=np.dot(xMat,weights)#预测值
error=hypothesis-yMat#(即实际值与预测值的差距)
grad=(1/m)*np.dot(xMat.T,error)
weights=weights-alpha*grad#参数更新,梯度下降
loss_new=cost(xMat,weights,yMat)#weights更新后,损失的值
if abs(loss_new-loss)<epslion: #如果损失值变化小于一定值,表示收敛
break
loss_list.append(loss_new)
epochs_count+=1
print("第"+str(epochs_count)+"轮,weight0="+str(weights[0]+weights[1] * -2000)+",weight1="+str(weights[1]))
weights[0] = weights[0] + weights[1] * -2000# 参数还原
x = 2014
a = weights[0]
b = weights[1]
print("2014年房价:")
print(a + x * b)
plt.plot(loss_list)
plt.show()
return weights
weights_bgd = Training(data_x, data_y, alpha=0.01, maxepochs=200, epslion=0.000001)
运算结果
加入正则化
import numpy as np
import matplotlib.pyplot as plt
#准备数据
##注意数据格式:矩阵的维数一定要看好
data_x=[[1,0],[1,1],[1,2],[1,3],[1,4],[1,5],[1,6],[1,7],[1,8],[1,9],[1,10],[1,11],[1,12],[1,13]]
data_y=[[2.0],[2.5],[2.9],[3.147],[4.515],[4.903],[5.365],[5.704],[6.853],[7.971],[8.561],[10],[11.280],[12.9]]
#计算当前权重下代价函数的值
def cost(xMat,weights,yMat,Lambda):
m,n=xMat.shape
hypothesis=np.dot(xMat,weights)#预测值
# cost=(1/(2*m))*np.sum(np.square(hypothesis-yMat))
cost=(1/(2*m))*(np.sum(np.square(hypothesis-yMat))+Lambda*np.sum(np.square(weights)) )
return cost
#采用批处理梯度下降算法
def Training(data_x, data_y, alpha=0.01,Lambda=0.01 ,maxepochs=3000, epslion=0.0000000001):
xMat=np.mat(data_x)#构造矩阵
yMat=np.mat(data_y)
m,n=xMat.shape
weights=np.zeros((n,1)) # 模型参数n*1的矩阵
epochs_count=0
loss_list=[]
while epochs_count<maxepochs:
loss=cost(xMat,weights,yMat,Lambda)#上次的损失值
hypothesis=np.dot(xMat,weights)#预测值
error=hypothesis-yMat#(即实际值与预测值的差距)
grad=(1/m)*np.dot(xMat.T,error)
weights=weights*(1-alpha*Lambda/m)-alpha*grad#参数更新,梯度下降
loss_new=cost(xMat,weights,yMat,Lambda)#weights更新后,损失的值
if abs(loss_new-loss)<epslion:
break
loss_list.append(loss_new)
epochs_count+=1
print("第"+str(epochs_count)+"轮,weight0="+str(weights[0]+weights[1] * -2000)+",weight1="+str(weights[1]))
weights[0] = weights[0] + weights[1] * -2000# 参数还原
x = 2014
a = weights[0]
b = weights[1]
print("2014年房价:")
print(a + x * b)
plt.plot(loss_list)
plt.show()
return weights
weights_bgd = Training(data_x, data_y, alpha=0.01,Lambda=0.01 ,maxepochs=200, epslion=0.000001)
运算结果
两者对比
根据代价函数下降曲线可以看出,当加入正则化时,曲线并没有发生很大的变化