BGD线性回归
批量梯度下降算法 简写BGD
一个特征(n),两个未知量(n+1)
#1.生成回归数据
from sklearn.datasets import make_regression
X,y=make_regression(n_samples=100,n_features=1,noise=50,random_state=8)
plt.scatter(X,y)
#2.拆分训练集和测试集
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=8)
plt.subplot(1,2,1)
plt.scatter(X_train,y_train,c='g')
plt.title("训练集散点图")
plt.subplot(1,2,2)
plt.scatter(X_test,y_test,c='orange')
plt.title("训练集散点图")
Text(0.5, 1.0, '训练集散点图')
#3,拟合直线y=wx+b
#3.1参数初始化
w=1 #直线斜率
b=-100#直线截距
lr=0.001#学习率learning rate
#3.2针对参数w和b,先分别计算求和号的值
sum_w=0
sum_b=0
for i in range(len(X_train)):
y_hat=w*X_train[i]+b
sum_w+=(y_train[i]-y_hat)*X_train[i]
sum_b+=y_train[i]-y_hat
#3.3 更新参数w与b的值
w+=lr*sum_w
b+=lr*sum_b
w
array([8.47987404])
b
array([-92.20644646])
#3.4 将3.2与3.3重复迭代epochs次'
epochs=1000
for j in range(epochs):
sum_w=0
sum_b=0
for i in range(len(X_train)):
y_hat=w*X_train[i]+b
sum_w+=(y_train[i]-y_hat)*X_train[i]
sum_b+=y_train[i]-y_hat
#更新参数w与b的值
w+=lr*sum_w
b+=lr*sum_b
#3.5 将迭代结果可视化
xx=np.linspace(-4,4,100)
yy=w*xx+b
plt.scatter(X_train,y_train,c='g')
plt.plot(xx,yy)
plt.title("线性回归拟合图")
plt.legend(("拟合直线",'训练集散点'))
<matplotlib.legend.Legend at 0x18221f4cb38>
#3.6 计算在训练集和测试集上的均方误差
total_loss_train=0
for i in range(len(X_train)):
y_hat =y_hat=w*X_train[i]+b
total_loss_train+=(y_hat-y_train[i])**2
total_loss_test=0
for i in range(len(X_test)):
y_hat =y_hat=w*X_test[i]+b
total_loss_train+=(y_hat-y_test[i])**2
print(total_loss_train/len(X_train),total_loss_test/len(X_test))
[3859.46923139] 0.0