#导入模块
import numpy as np
import os
import matplotlib.pyplot as plt
%matplotlib inline
np.random.seed(42)
PROJECT_ROOT_DTR="."
MODEL_ID="linear_models"
#定义一个保存图像的函数
def save_fig(fig_id,tight_layout=True):
#指定保存图像的路径 当前目录下的images文件夹中model_id文件夹中
path=os.path.join(PROJECT_ROOT_DTR,"images",MODEL_ID,fig_id+".png")
print("Saving figure",fig_id)
plt.savefig(path,format="png",dpi=300)
#把讨厌的警告信息过滤
import warnings
warnings.filterwarnings(action="ignore",message="internal gelsd")
import numpy as np
x=2*np.random.rand(100,1)
y=4+3*x+np.random.randn(100,1)
plt.plot(x,y,"b.")
plt.xlabel("$x_1$",fontsize=18)
plt.ylabel("$y$",rotation=0,fontsize=18)
plt.axis([0,2,0,15])
#保存图片
save_fig("generated_data_plot")
plt.show()
x_b=np.c_[np.ones((100,1)),x]
x_b
x_new=np.array([[0],[2]])
x_new_b=np.c_[np.ones((2,1)),x_new]
from sklearn.linear_model import LinearRegression
lin_reg=LinearRegression()
lin_reg.fit(x,y)
lin_reg.intercept_,lin_reg.coef_
eta=0.1
n_iterations=1000
m=100
theta=np.random.randn(2,1)
for iteration in range(n_iterations):
gradients=2/m*x_b.T.dot(x_b.dot(theta)-y)
theta=theta-eta*gradients
x_new_b.dot(theta)
theta_path_bgd=[]
def plot_gradient_descent(theta,eta,theta_path=None):
m=len(x_b)
plt.plot(x,y,"b.")
n_iterations=1000
for iteration in range(n_iterations):
if iteration<10:
y_predict=x_new_b.dot(theta)
style="b-"
plt.plot(x_new,y_predict,style)
gradients=2/m*x_b.T.dot(x_b.dot(theta)-y)
theta=theta-eta*gradients
if theta_path is not None:
theta_path.append(theta)
plt.xlabel("$x_1$",fontsize=18)
plt.axis([0,2,0,15])
plt.title(r"$\eta={}$".format(eta),fontsize=16)
np.random.seed(42)
theta=np.random.randn(2,1)
plt.figure(figsize=(10,4))
plt.subplot(131);plot_gradient_descent(theta,eta=0.02)
plt.ylabel("$y$",rotation=0,fontsize=18)
plt.subplot(132);plot_gradient_descent(theta,eta=0.1,theta_path=theta_path_bgd)
plt.subplot(133);plot_gradient_descent(theta,eta=0.5)
save_fig("gradient_descent_plot")
plt.show()
Mini-batch gradint descent 小批量梯度下降
theta_path_sgd=[]
m=len(x_b)
np.random.seed(42)
n_epochs=50
theta=np.random.randn(2,1)
for epoch in range(n_epochs):
for i in range(m):
if epoch==0 and i<20:
y_predict=x_new_b.dot(theta)
style="b-"
plt.plot(x_new,y_predict,style)
random_index=np.random.randint(m)
xi=x_b[random_index:random_index+1]
yi=y[random_index:random_index+1]
gradients=2*xi.T.dot(xi.dot(theta)-yi)
eta=0.1
theta=theta-eta*gradients
theta_path_sgd.append(theta)
plt.plot(x,y,'b.')
plt.xlabel("$x_1$",fontsize=18)
plt.ylabel("$y$",rotation=0,fontsize=18)
plt.axis([0,2,0,15])
save_fig("sgd_plot")
plt.show()
theta_path_mgd=[]
n_iterations=50
minibatch_size=20
np.random.seed(42)
theta=np.random.randn(2,1)
for epoch in range(n_iterations):
shuffled_indices=np.random.permutation(m)
x_b_shuffled=x_b[shuffled_indices]
y_shuffled=y[shuffled_indices]
for i in range(0,m,minibatch_size):
xi=x_b_shuffled[i:i+minibatch_size]
yi=y_shuffled[i:i+minibatch_size]
gradients=2/minibatch_size*xi.T.dot(xi.dot(theta)-yi)
eta=0.1
theta=theta-eta*gradients
theta_path_mgd.append(theta)
theta_path_bgd=np.array(theta_path_bgd)
theta_path_sgd=np.array(theta_path_sgd)
theta_path_mgd=np.array(theta_path_mgd)
plt.figure(figsize=(7,4))
plt.plot(theta_path_sgd[:,0],theta_path_sgd[:,1],"r-s",linewidth=1,label='Stochastic')
plt.plot(theta_path_mgd[:,0],theta_path_mgd[:,1],"g-+",linewidth=2,label='Mini-batch')
plt.plot(theta_path_bgd[:,0],theta_path_bgd[:,1],"b-o",linewidth=3,label='Batch')
plt.legend(loc='upper left',fontsize=16)
plt.xlabel(r"$\theta_0$",fontsize=20)
plt.ylabel(r'$\theta_1$ ',fontsize=20,rotation=0)
plt.axis=([2.5,4.5,2.3,3.9])
save_fig("gradient_descent_paths_plot")
plt.show()