Feature scaling and Learning Rate

最新推荐文章于 2024-08-10 23:14:03 发布
CopyDragon
最新推荐文章于 2024-08-10 23:14:03 发布
阅读量277
点赞数 4
分类专栏：机器学习文章标签： python
本文链接：https://blog.csdn.net/qq_74847271/article/details/138920922
版权
机器学习专栏收录该内容
5 篇文章 0 订阅
订阅专栏
import numpy as np
np.set_printoptions(precision=2)
import matplotlib.pyplot as plt
dlblue = '#0096ff'; dlorange = '#FF9300'; dldarkred='#C00000'; dlmagenta='#FF40FF'; dlpurple='#7030A0';
#plt.style.use('./deeplearning.mplstyle')
from lab_utils_multi import  load_house_data, compute_cost, run_gradient_descent
from lab_utils_multi import  norm_plot, plt_contour_multi, plt_equal_scale, plot_cost_i_w

# load the dataset
X_train, y_train = load_house_data()
X_features = ['size(sqft)','bedrooms','floors','age']   #获得x的索引

fig,ax=plt.subplots(1, 4, figsize=(12, 3), sharey=True) #四个子图
for i in range(len(ax)):
    ax[i].scatter(X_train[:,i],y_train) #X_train[:, i]选取所有行（:表示所有行），并且选取其中的第 i 列数据
    ax[i].set_xlabel(X_features[i])
ax[0].set_ylabel("Price (1000's)")
plt.show()

#同设置不同的学习率来观察不同的损失函数的图像，找到最合适的

#set alpha to 9.9e-7
_, _, hist = run_gradient_descent(X_train, y_train, 10, alpha = 9.9e-7)
###  _,被用作一个占位符来存储一个不需要的返回值，只关心hist
plot_cost_i_w(X_train, y_train, hist)

#set alpha to 9e-7
_,_,hist = run_gradient_descent(X_train, y_train, 10, alpha = 9e-7)
plot_cost_i_w(X_train, y_train, hist)

#set alpha to 1e-7
_,_,hist = run_gradient_descent(X_train, y_train, 10, alpha = 1e-7)
plot_cost_i_w(X_train,y_train,hist)

#Feature Scaling
#进行均值归一标准化
#（当前特征减均值/方差）
def zscore_normalize_features(X):
    """
    computes  X, zcore normalized by column

    Args:
      X (ndarray): Shape (m,n) input data, m examples, n features

    Returns:
      X_norm (ndarray): Shape (m,n)  input normalized by column
      mu (ndarray):     Shape (n,)   mean of each feature
      sigma (ndarray):  Shape (n,)   standard deviation of each feature
    """
    # find the mean of each column/feature
    mu = np.mean(X, axis=0)  # mu will have shape (n,)  #均值
    # find the standard deviation of each column/feature
    sigma = np.std(X, axis=0)  # sigma will have shape (n,) #方查
    # element-wise, subtract mu for that column from each example, divide by std for that column
    X_norm = (X - mu) / sigma   #结果

    return (X_norm, mu, sigma)

# check our work
# from sklearn.preprocessing import scale
# scale(X_orig, axis=0, with_mean=True, with_std=True, copy=True)

mu     = np.mean(X_train,axis=0)
sigma  = np.std(X_train,axis=0)
X_mean = (X_train - mu)
X_norm = (X_train - mu)/sigma

fig,ax=plt.subplots(1, 3, figsize=(12, 3))
ax[0].scatter(X_train[:,0], X_train[:,3])
ax[0].set_xlabel(X_features[0]); ax[0].set_ylabel(X_features[3]);
ax[0].set_title("unnormalized")
ax[0].axis('equal')

ax[1].scatter(X_mean[:,0], X_mean[:,3])
ax[1].set_xlabel(X_features[0]); ax[0].set_ylabel(X_features[3]);
ax[1].set_title(r"X - $\mu$")
ax[1].axis('equal')

ax[2].scatter(X_norm[:,0], X_norm[:,3])
ax[2].set_xlabel(X_features[0]); ax[0].set_ylabel(X_features[3]);
ax[2].set_title(r"Z-score normalized")
ax[2].axis('equal')
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
fig.suptitle("distribution of features before, during, after normalization")
plt.show()

# normalize the original features
X_norm, X_mu, X_sigma = zscore_normalize_features(X_train)
print(f"X_mu = {X_mu}, \nX_sigma = {X_sigma}")
print(f"Peak to Peak range by column in Raw        X:{np.ptp(X_train,axis=0)}")
print(f"Peak to Peak range by column in Normalized X:{np.ptp(X_norm,axis=0)}")

#将标准化前的与标准化后的进行对比
fig,ax=plt.subplots(1, 4, figsize=(12, 3))
for i in range(len(ax)):
    norm_plot(ax[i],X_train[:,i],)
    ax[i].set_xlabel(X_features[i])
ax[0].set_ylabel("count");
fig.suptitle("distribution of features before normalization")
plt.show()
fig,ax=plt.subplots(1,4,figsize=(12,3))
for i in range(len(ax)):
    norm_plot(ax[i],X_norm[:,i],)
    ax[i].set_xlabel(X_features[i])
ax[0].set_ylabel("count");
fig.suptitle(f"distribution of features after normalization")

plt.show()

w_norm, b_norm, hist = run_gradient_descent(X_norm, y_train, 1000, 1.0e-1, )

#predict target using normalized features用标准化去预测结果，
#绘制出标准化前的图
m = X_norm.shape[0]
yp = np.zeros(m)
for i in range(m):
    yp[i] = np.dot(X_norm[i], w_norm) + b_norm  #归一化后的y值

    # plot predictions and targets versus original features
fig,ax=plt.subplots(1,4,figsize=(12, 3),sharey=True)
for i in range(len(ax)):
    ax[i].scatter(X_train[:,i],y_train, label = 'target')   #绘制归一化前的图
    ax[i].set_xlabel(X_features[i])
    ax[i].scatter(X_train[:,i],yp,color=dlorange, label = 'predict')    #绘制归一化后的预测图
ax[0].set_ylabel("Price"); ax[0].legend();
fig.suptitle("target versus prediction using z-score normalized model")
plt.show()

# First, normalize out example.
x_house = np.array([1200, 3, 1, 40])
x_house_norm = (x_house - X_mu) / X_sigma
print(x_house_norm)
x_house_predict = np.dot(x_house_norm, w_norm) + b_norm
print(f" predicted price of a house with 1200 sqft, 3 bedrooms, 1 floor, 40 years old = ${x_house_predict*1000:0.0f}")

plt_equal_scale(X_train, X_norm, y_train)


'''
特征缩放和学习率:
首先导入需要的数据集，然后绘制出原数据的相应的图像，之后通过梯度下降得出w，b还有历史的损失值并以表格的形式输出w，b和损失值的变化

之后通过改变学习率阿尔法和迭代次数来获得合使的学习率的值

然后将以获得的合适的学习率同过梯度下降来获得最后的w，b的值

下一步就是进行特征缩放
通过绘制出原数据，和将原数据据分别求均值，方差的数据绘制出来，观察分数归一化的步骤转换（只来观察，所以取两组数据作为x，y）

之后就将原始特征归一化，绘制出每一个特征的直方图和概率图（归一化前后），进行对比

之后适当的增大阿尔法的值，使加快数据处理，再次通过梯度下降来获得w,b,hist_loss

缩放的特征得到非常准确的结果，快得多!
注意，在这个相当短的运行结束时，每个参数的梯度都很小。
对于使用归一化特征的回归来说，学习率为0.1是一个良好的开端。
让我们根据目标值绘制我们的预测图。
注意，预测是使用归一化特征进行的，而图是使用原始特征值显示的。

然后使用归一化特征预测目标

最后用一个例子来进行预测验证
# First, normalize out example.
x_house = np.array([1200, 3, 1, 40])
x_house_norm = (x_house - X_mu) / X_sigma
print(x_house_norm)
x_house_predict = np.dot(x_house_norm, w_norm) + b_norm




'''