这里的SGD:使用某个随机样本的gradient来update,以求越过鞍点
(现在一般用mini-batch)
import numpy as np
import matplotlib.pyplot as plt
# 创建数据
x_data = np.random.randint(1, 10, size=(3))
y_data = x_data * 2
w = 1.0 # 设置初值
def forward(x):
return x * w
# 对某个随机样本求loss
def loss(x, y):
y_pred = forward(x)
return (y_pred - y) ** 2
# 对某个随机样本求gradient
def gradient(x, y):
grad = 2 * x * (w * x - y)
return grad
loss_list = []
for epoch in range(50):
l_sum = 0
# 以同一顺序打乱x_data和y_data
shuffle_ix = np.random.permutation(np.arange(x_data.shape[0]))
x_data = x_data[shuffle_ix]
y_data = y_data[shuffle_ix]
for x, y in zip(x_data, y_data):
grad = gradient(x, y) # 求随机样本的grad
w = w - 0.005 * grad # 更新w
print("grad: ", grad)
loss_val = loss(x, y) # 求loss
l_sum += loss_val
loss_list.append(l_sum / x_data.shape[0])
print("Epoch: ", epoch, "\t", "w: ", w, "\t", "loss: ", loss_val)
plt.plot(range(50), loss_list)
plt.xlabel("epoch")
plt.ylabel("loss")
plt.show()
plt.close()