import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
for_mat = 'Advertising.csv'
advertising = pd.read_csv(for_mat)
advertising.head()
"""
以TV为自变量,以sales为因变量
对TV进行标准化,对sales进行中心化
"""
TV = advertising["TV"].values
sales = advertising["sales"].values
scaled_TV = (TV - np.mean(TV)) / np.std(TV)
centered_sales = sales - np.mean(sales)
n = len(scaled_TV)
"""
画出RSS随着w变化的曲线
画出RSS在w=0处的切线;画出RSS的最小值点
用函数plot_rss_w实现
"""
def plot_rss_w():
w_vec = np.linspace(-2, 10, 100) # 权重向量
rss_vec = [] # 记录不同权重下的RSS值
for w in w_vec:
rss = np.sum((centered_sales - scaled_TV * w) ** 2) / 2 / n
rss_vec.append(rss)
w_0 = 0
rss_0 = np.sum((centered_sales - scaled_TV * w_0) ** 2) / 2 / n
w_min = np.sum(scaled_TV * centered_sales) / np.sum(scaled_TV ** 2)
rss_min = np.sum((centered_sales - scaled_TV * w_min) ** 2) / 2 / n
plt.plot(w_vec, rss_vec)
plt.scatter(w_0, rss_0, s=100, c="y", marker="o")
plt.scatter(w_min, rss_min, s=100, c="r", marker="D")
plt.plot(np.linspace(-1, 1, 50),
np.linspace(-1, 1, 50) * np.mean(-scaled_TV * centered_sales) + np.sum(centered_sales ** 2) / 2 / n, '--',
linewidth=2.0)
plt.xlabel("w", fontsize=16)
plt.ylabel("RSS", fontsize=16)
plt.show()
# plot_rss_w()
"""
使用全数据梯度下降法迭代更新w
"""
w, lr = 0, 0.1
input_i, target = scaled_TV, centered_sales
w_record = []
loss_record = []
for iterm in range(20):
pred = input_i * w
loss = np.sum((pred - target) ** 2) / 2 / n
w_record.append(w)
loss_record.append(loss)
delta = pred - target
w = w - lr - np.sum(input_i * delta) / n
if iterm % 5 == 0 or iterm == 19:
print("iterm: %3d;Loss: %0.3f" % (iterm, loss))
w_record.append(w)
loss_record.append(np.sum((pred - target)**2)/2/n)
3.1.3全数据梯度下降法
最新推荐文章于 2024-09-26 11:55:47 发布