一、基础知识
1、学习率
2、归一化
利用lab_utils_multi.py里面的函数对数据进行归一化处理,具体详见代码
3、代码
import numpy as np
import matplotlib.pyplot as plt
from lab_utils_multi import (load_house_data,
run_gradient_descent,norm_plot,
plt_equal_scale,plot_cost_i_w)
from lab_utils_common import dlc
np.set_printoptions(precision=2)
plt.style.use('deeplearning.mplstyle')
def zscore_norm_features(x):
"""功能: 按列对x进行x-score归一化\n
返回:\n
axis的参数用于指定沿着哪个轴进行计算操作\n
对于一个二维数组(矩阵)来说:axis=0 表示沿着行的方向进行操作;axis=1 表示沿着列的方向进行操作\n
x_norm: 按列归一化后的x\n
mu: shape=(n,), 每种特征的均值\n
sigma: shape=(n,), 每种特征的标准差\n
"""
mu = np.mean(x,axis=0)
sigma = np.std(x,axis=0)
x_norm = (x-mu)/sigma
return x_norm,mu,sigma
if __name__ == '__main__':
x_train,y_train = load_house_data()
x_features = ["size(sqft)","bedrooms","floors","age"]
# 显示每种特征对房价的影响
# fig,ax = plt.subplots(1,4,figsize=(12,3),sharey=True)
# for i in range(len(ax)):
# ax[i].scatter(x_train[:,i],y_train)
# ax[i].set_xlabel(x_features[i])
# ax[0].set_ylabel("price (1000's)")
# plt.title('features vs. price')
# plt.show()
# 设置alpha为9.9e-7
# _,_,hist = run_gradient_descent(x_train,y_train,10,alpha=9.9e-7)
# plot_cost_i_w(x_train,y_train,hist)
# alpha设置的小一点: 9e-7
# _,_,hist = run_gradient_descent(x_train,y_train,10,alpha=9e-7)
# plot_cost_i_w(x_train,y_train,hist)
# alpha设置的再小一点: 1e-7
# _,_,hist = run_gradient_descent(x_train,y_train,10,alpha=1e-7)
# plot_cost_i_w(x_train,y_train,hist)
# 查看归一化后的数据分布
x_norm,mu,sigma = zscore_norm_features(x_train)
# fig,ax = plt.subplots(1,3,figsize=(12,3))
# ax[0].scatter(x_train[:,0],x_train[:,3])
# ax[0].set_xlabel(x_features[0])
# ax[0].set_ylabel(x_features[3])
# ax[0].set_title("unormalized")
# ax[0].axis('equal')
# x_mean = x_train-mu
# ax[1].scatter(x_mean[:,0],x_mean[:,3])
# ax[1].set_xlabel(x_features[0])
# ax[1].set_ylabel(x_features[3])
# ax[1].set_title(r'x-$\mu$')
# ax[1].axis("equal")
# ax[2].scatter(x_norm[:,0],x_norm[:,3])
# ax[2].set_xlabel(x_features[0])
# ax[2].set_ylabel(x_features[3])
# ax[2].set_title("z-score normalized")
# ax[2].axis('equal')
# plt.tight_layout(rect=[0,0.03,1,0.95])
# fig.suptitle("distribution of features before, during, after normlization")
# plt.show()
# print(f'peak to peak range by column in raw x: {np.ptp(x_train,axis=0)}')
# print(f'peak to peak range by column in normalized x: {np.ptp(x_norm,axis=0)}')
# 查看peak to peak分布情况
# fig,ax = plt.subplots(1,4,figsize=(12,3))
# for i in range(len(ax)):
# norm_plot(ax[i],x_train[:,i])
# ax[i].set_xlabel(x_features[i])
# ax[0].set_ylabel("count")
# fig.suptitle("distribution of features before normalization")
# # plt.show()
# fig,ax = plt.subplots(1,4,figsize=(12,3))
# for i in range(len(ax)):
# norm_plot(ax[i],x_norm[:,i])
# ax[i].set_xlabel(x_features[i])
# ax[0].set_ylabel("count")
# fig.suptitle("distribution of features after normalization")
# plt.show()
# 现在使用归一化后的数据,更大的学习率重新做梯度下降优化
w_norm,b_norm,hist = run_gradient_descent(x_norm,y_train,1000,1.0e-1)
# 查看预测结果与标签结果
# m = x_norm.shape[0]
# yp = np.zeros(m)
# for i in range(m):
# yp[i] = np.dot(x_norm[i],w_norm) + b_norm
# fig,ax = plt.subplots(1,4,figsize=(12,3),sharey=True)
# for i in range(len(ax)):
# ax[i].scatter(x_train[:,i],y_train,label='target')
# ax[i].set_xlabel(x_features[i])
# ax[i].scatter(x_train[:,i],yp,color=dlc['dlorange'],label='predict')
# ax[0].set_ylabel("price")
# ax[0].legend()
# fig.suptitle("target versus prediction using z-score normalized model")
# plt.show()
# prediction
# x_house = np.array([1200,3,1,40])
# x_house_norm = (x_house - mu) - sigma
# print(x_house_norm)
# x_house_predict = np.dot(x_house,w_norm) + b_norm
# print(f'predicted price of a house with 1200 sqrt, \
# 3 bedrooms, 1 floor 40 years old = ${x_house_predict*1000:.0f}')
plt_equal_scale(x_train,x_norm,y_train)
4. 正规方程
正规方程是一种直接求解线性回归模型参数的数学方法,而不需要像梯度下降那样迭代优化
def normalEqn(X, y):
theta = np.linalg.inv(X.T @ X) @ X.T @ y # 计算参数 theta
return theta
整个计算过程可以理解为对于线性回归的损失函数(均方误差)的最小化,通过直接求解矩阵方程,得到最优的参数 theta
,使得损失函数最小化。与梯度下降不同,正规方程是一种解析解,它能够在一次计算中直接得到最优参数,适用于较小的训练数据集和具有正定协方差矩阵的问题。