- 又名贪心算法
- 上一讲的w用穷举法来找出,但如果w有两个,或者不是线性时,就很难找
- 要找w 使得cost(w)最小
- 求出了w的更新公式~
- 代码:
-
import matplotlib.pyplot as plt # prepare the training set x_data = [1.0, 2.0, 3.0] y_data = [2.0, 4.0, 6.0] # initial guess of weight w = 1.0 # define the model linear model y = w*x def forward(x): return x * w # define the cost function MSE def cost(xs, ys): cost = 0 for x, y in zip(xs, ys): y_pred = forward(x) cost += (y_pred - y) ** 2 return cost / len(xs) # define the gradient function gd def gradient(xs, ys): grad = 0 for x, y in zip(xs, ys): grad += 2 * x * (x * w - y) return grad / len(xs) epoch_list = [] cost_list = [] print('predict (before training)', 4, forward(4)) for epoch in range(100): cost_val = cost(x_data, y_data) grad_val = gradient(x_data, y_data) w -= 0.01 * grad_val # 0.01 learning rate print('epoch:', epoch, 'w=', w, 'loss=', cost_val) epoch_list.append(epoch) cost_list.append(cost_val) print('predict (after training)', 4, forward(4)) plt.plot(epoch_list, cost_list) plt.ylabel('cost') plt.xlabel('epoch') plt.show()
Stochastic Gradient Descent:随机梯度下降
- 不同之处:cost 变成了loss
代码:
import matplotlib.pyplot as plt
# prepare the training set
x_data = [1.0, 2.0, 3.0]
y_data = [2.0, 4.0, 6.0]
# initial guess of weight
w = 1.0
# define the model linear model y = w*x
def forward(x):
return x * w
# define the cost function MSE
def loss(x, y):
y_pred = forward(x)
return (y_pred - y) ** 2
# define the gradient function gd
def gradient(x, y):
return x*x*(x*w-y)
epoch_list = []
cost_list = []
print('predict (before training)', 4, forward(4))
for epoch in range(100):
for x, y in zip(x_data, y_data):
grad = gradient(x, y)
w = w - 0.01 * grad
print("\tgrad: ", x, y, grad)
l = loss(x, y)
print("progress:", epoch, "w=", w, "loss=", l)
epoch_list.append(epoch)
cost_list.append(l)
print('predict (after training)', 4, forward(4))
plt.plot(epoch_list, cost_list)
plt.ylabel('cost')
plt.xlabel('epoch')
plt.show()
progress: 97 w= 1.9999999999999596 loss= 1.4590771302967834e-26
grad: 3.0 6.0 -1.0871303857129533e-12
progress: 98 w= 1.9999999999999705 loss= 7.888609052210118e-27
grad: 3.0 6.0 -7.993605777301127e-13
progress: 99 w= 1.9999999999999785 loss= 4.203839763922772e-27
predict (after training) 4 7.999999999999914