PyTorch基础练习-task6
一、常用优化器简介
了解不同优化器
书写优化器代码
1.1 梯度下降法
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
# 梯度下降法
J = lambda w: 1.5 * w ** 4 - 15 * w ** 3 + 3 * w * 2
J_prime = lambda w: (6 * w ** 3 - 45 * w ** 2 + 6 * w) * ( 1 + np.random.random())
w = np.linspace(-10, 20, 100)
plt.plot(w, J(w))
J = 0
for i in range(100):
J += J_prime(1)
J/100
w = 1
epoch = 100
lr = 0.001
Loss = []
W = []
J = lambda w: 1.5 * w ** 4 - 15 * w ** 3 + 3 * w * 2
J_prime = lambda w: (6 * w ** 3 - 45 * w ** 2 + 6 * w) * ( 1 + np.random.random())
for i in range(epoch):
w = w - lr * (J_prime(w))
Loss.append(J(w))
W.append(w)
plt.plot(Loss)
plt.figure()
plt.plot(W)
print(w)
1.1 Momentum
# 动量法
J = lambda w: 1.5 * w ** 4 - 15 * w ** 3 + 3 * w * 2
J_prime = lambda w: (6 * w ** 3 - 45 * w ** 2 + 6 * w) * ( 1 + np.random.random())
J = 0
JJ = []
for i in range(1000):
J = 0.9 * J + 0.1 * J_prime(1)
JJ.append(J)
plt.plot(JJ)
w = 1
epoch = 100
lr = 0.001
beta = 0.5
y = []
v = 0
Loss = []
W = []
J = lambda w: 1.5 * w ** 4 - 15 * w ** 3 + 3 * w * 2
J_prime = lambda w: (6 * w ** 3 - 45 * w ** 2 + 6 * w) * ( 1 + np.random.random())
for i in range(epoch):
v = beta * v + (1 - beta) * J_prime(w)
w = w - lr * v
Loss.append(J(w))
W.append(w)
plt.plot(Loss)
plt.figure()
plt.plot(W)
w
1.1 二维优化
J = lambda w1, w2:w1 ** 2 + 10 * w2 ** 2
J_prime1 = lambda w1:2 * w1
J_prime2 = lambda w2:20 * w2
w1 = 1
w2 = -1
epoch = 200
lr = 0.01
y = []
v = 0
s = 0
Loss = []
W1 = []
W2 = []
for i in range(epoch):
w1 = w1 - lr * (J_prime1(w1))
w2 = w2 - lr * (J_prime2(w2))
W1.append(w1)
W2.append(w2)
Loss.append(J(w1, w2))
plt.plot(Loss)
plt.figure()
plt.plot(W1)
plt.plot(W2)
w1, w2
1.3 Ada自适应梯度调节法
J = lambda w1, w2:w1 ** 2 + 10 * w2 ** 2
J_prime1 = lambda w1:2 * w1
J_prime2 = lambda w2:20 * w2
w1 = 1
w2 = -1
epoch = 200
lr = 0.1
y = []
v = 0
s = 0
Loss = []
W1 = []
W2 = []
s1 = s2 = 0
for i in range(epoch):
s1 += J_prime1(w1) ** 2
s2 += J_prime2(w2) ** 2
w1 = w1 - lr * (J_prime1(w1)/np.sqrt(s1))
w2 = w2 - lr * (J_prime2(w2)/np.sqrt(s2))
W1.append(w1)
W2.append(w2)
Loss.append(J(w1, w2))
plt.plot(Loss)
plt.figure()
plt.plot(W1)
plt.plot(W2)
w1, w2
# 引入动量的思想
s = 0
S = []
beta = 0.8
for i in range(100):
s = 0.2 * s + J_prime1(w1) ** 2 * 0.9
S.append(np.sqrt(s))
plt.plot(S)
1.4 RMSProp
# 加入不同方向的scalling, 缓解长时间优化梯度变小的问题
J = lambda w1, w2:w1 ** 2 + 10 * w2 ** 2
J_prime1 = lambda w1:2 * w1
J_prime2 = lambda w2:20 * w2
w1 = 1
w2 = -1
epoch = 200
lr = 0.01
beta2 = 0.5
y = []
v = 0
s = 0
Loss = []
W1 = []
W2 = []
s1 = s2 = 0
for i in range(epoch):
s1 = beta2 * s1 + (1 - beta2) * J_prime1(w1) ** 2
s1_correct = s1 / (1 - beta2 ** (i + 1))
w1 = w1 - lr * (J_prime1(w1)/np.sqrt(s1))
s2 = beta2 * s2 + (1 - beta2) * J_prime2(w2) ** 2
s2_correct = s2 / (1 - beta2 ** (i + 1))
w2 = w2 - lr * (J_prime2(w2)/np.sqrt(s2))
W1.append(w1)
W2.append(w2)
Loss.append(J(w1, w2))
plt.plot(Loss)
plt.figure()
plt.plot(W1)
plt.plot(W2)
w1, w2
1.5 Adam
# 一维 Adam优化
w = 1
epoch = 200
lr = 0.1
beta1 = 0.9
beta2 = 0.99
y = []
v = 0
s = 0
Loss = []
W = []
J = lambda w: 1.5 * w ** 4 - 15 * w ** 3 + 3 * w * 2
J_prime = lambda w: (6 * w ** 3 - 45 * w ** 2 + 6 * w) * ( 1 + np.random.random())
for i in range(epoch):
v = beta1 * v + (1 - beta1) * J_prime(w)
v_correct = v / (1 - beta1 ** (i + 1))
s = beta2 * s + ( 1- beta2) * (J_prime(w) ** 2)
s_correct = s / (1 - beta2 ** ( i + 1))
w = w - lr * ( v / np.sqrt(s))
W.append(w)
Loss.append(J(w))
plt.plot(Loss)
plt.figure()
plt.plot(W)
w
# 二维Adam优化
w1 = 1
w2 = -1
epoch = 200
lr = 0.01
beta1 = 0.9
beta2 = 0.99
y = []
v1 = v2 = 0
s1 = s2 = 0
Loss = []
W1 = []
W2 = []
J = lambda w1, w2: w ** 2 + 10 * w2 ** 2
J_prime1 = lambda w1: 2 * w1
J_prime2 = lambda w2: 20 * w2
for i in range(epoch):
v1 = beta1 * v1 + (1 - beta1) * J_prime1(w1)
v_correct1 = v1 / (1 - beta1 ** (i + 1))
s1 = beta2 * s1 + ( 1- beta2) * (J_prime1(w1) ** 2)
s1_correct = s1 / (1 - beta2 ** ( i + 1))
w1 = w1 - lr * ( v1 / np.sqrt(s1))
v2 = beta1 * v2 + (1 - beta1) * J_prime2(w2)
v2_correct = v2 / (1 - beta1 ** (i + 1))
s2 = beta2 * s2 + ( 1- beta2) * (J_prime2(w2) ** 2)
s2_correct = s2 / (1 - beta2 ** ( i + 1))
w2 = w2 - lr * ( v2 / np.sqrt(s2))
W1.append(w1)
W2.append(w2)
Loss.append(J(w1, w2))
plt.plot(Loss)
plt.figure()
plt.plot(W1)
plt.plot(W2)
w1, w2