pytorch task6 理解更多神经网络优化方法

1.了解不同优化器

a.随机梯度下降和二维优化

import numpy as np
import torch
from torch.autograd import Variable
import matplotlib.pyplot as plt

# 梯度下降
# 创建一个较为复杂的函数
J = lambda w: 1.5 * w ** 4 - 15 * w ** 3 + 3 * w ** 2
# 在函数J导数后面加上了一个噪声是为了模拟随机梯度下降,随机梯度下降是每一次在数据里随机选取若干样本得到的梯度
J_prime = lambda w: (6 * w ** 3 - 45 * w ** 2 + 6 * w) * (1 + 10 * np.random.random())

# w = np.linspace(-10, 20, 100)

# print(w)
# plt.plot(w, J(w))
# plt.show()

# 通过将梯度进行累加求平均可以解决噪声带来的影响,但是这样求平均并不是最优的方法
# J = 0
# for i in range(100):
#     J += J_prime(1)
# J / 100

# # 栋梁算法,我们希望每次累加的量更加重视当下的梯度,因此要做一个随着时间变化的甲醛平均值
# J = 0
# # 用来记录梯度的值
# JJ = []
#
# for i in range(1000):
#     # 我们对当下的梯度赋予一个权重,J_prime是当下梯度
#     J = 0.0 * J + 0.1 * J_prime(1)
#     JJ.append(J)

# plt.plot(JJ)
# plt.show()

# # 梯度下降法
# w2 = 1
# epoch = 100
# lr = 0.01
# Loss = []
# W = []
#
# for i in range(epoch):
#     w2 = w2 - lr * (J_prime(w2))  # 更新梯度
#     Loss.append(J(w2))
#     W.append(w2)
#
# plt.plot(Loss)
# plt.figure()
# plt.plot(W)
# print(w2)

# # 动量梯度下降法,一定要初始化
# w = 1
# epoch = 100
# lr = 0.001
# beta = 0.5
# y = []
# v = 0
# Loss = []
# W = []
# for i in range(epoch):
#     # 指定一个速度V,J_prime是当前梯度,beta是权重
#     v - beta * v + (1 - beta) * J_prime(w)
#     w = w - lr * v
#     Loss.append(J(w))
#     W.append(w)
#
# plt.plot(Loss)
# plt.figure()
# plt.plot(W)

# 二维问题
# 一个简单的二维优化,随机梯度下降法进行优化实现
J = lambda w1, w2: w1 ** 2 + 10 * w2 ** 2
J_prime1 = lambda w1: 2 * w1
J_prime2 = lambda w2: 20 * w2

w1 = 1
w2 = -1
epoch = 200
lr = 0.01
y = []
v = 0
s = 0
Loss = []
W1 = []
W2 = []
for i in range(epoch):
    w1 = w1 - lr * (J_prime1(w1))
    w2 = w2 - lr * (J_prime2(w2))
    W1.append(w1)
    W2.append(w2)
    Loss.append(J(w1, w2))
plt.plot(Loss)
plt.figure()
plt.plot(W1)
plt.plot(W2)
plt.show()

b. Ada自适应梯度调节法

import numpy as np
import torch
from torch.autograd import Variable
import matplotlib.pyplot as plt

# Ada
J = lambda w1, w2: w1 ** 2 + 10 * w2 ** 2
J_prime1 = lambda w1: 2 * w1
J_prime2 = lambda w2: 20 * w2

w1 = 1
w2 = -1
epoch = 200
lr = 0.1
y = []
v = 0
s = 0
Loss = []
W1 = []
W2 = []
s1 = s2 = 0
for i in range(epoch):
    s1 += J_prime1(w1) ** 2
    w1 = w1 - lr * (J_prime1(w1) / np.sqrt(s1))
    s2 += J_prime2(w2) ** 2
    w2 = w2 - lr * (J_prime2(w2) / np.sqrt(s2))
    W1.append(w1)
    W2.append(w2)
    Loss.append(J(w1, w2))

plt.plot(Loss)
plt.figure()
plt.plot(W1)
plt.plot(W2)
print(w1, w2)

# 引入动量思想
s = 0
S = []
beta = 0.8
for i in range(100):
    s = 0.2 * s + J_prime1(w1) ** 2 * 0.8
    S.append(np.sqrt(s))
plt.plot(S)

c.RMSProp

import numpy as np
import torch
from torch.autograd import Variable
import matplotlib.pyplot as plt

# RMSProp

# 加入不同方向的scling!,缓解了长时间优化梯度变小的问题

J = lambda w1, w2: w1 ** 2 + 10 * w2 ** 2
J_prime1 = lambda w1: 2 * w1
J_prime2 = lambda w2: 20 * w2

w1 = 1
w2 = -1
epoch = 200
lr = 0.01
beta2 = 0.5
y = []
v = 0
s = 0
Loss = []
W1 = []
W2 = []
s1 = s2 = 0
for i in range(epoch):
    s1 = beta2 * s1 + (1 - beta2) * (J_prime1(w1) ** 2)
    s1_correct = s1 / (1 - beta2 ** (i + 1))
    w1 = w1 - lr * (J_prime1(w1) / np.sqrt(s1))
    s2 = beta2 * s2 + (1 - beta2) * (J_prime2(w2) ** 2)
    s2_correct = s2 / (1 - beta2) ** (i + 1)
    w2 = w2 - lr * (J_prime2(w2) / np.sqrt(s2))
    W1.append(w1)
    W2.append(w2)
    Loss.append(J(w1, w2))

plt.plot(Loss)
plt.figure()
plt.plot(W1)
plt.plot(W2)
print(w1, w2)

d.Adam 

import numpy as np
import torch
from torch.autograd import Variable
import matplotlib.pyplot as plt

# Adam是一个集成了多种优化器优势的方法,通过momentum解决了鞍点问题,通过ada解决了对
# 弱小方向尺度矫正的问题,同时加入偏置,解决初始值过大的问题

# 一维种用Adam优化实现代码
J = lambda w: 1.5 * w ** 4 - 15 * w ** 3 + 3 * w ** 2
# 在函数J导数后面加上了一个噪声是为了模拟随机梯度下降,随机梯度下降是每一次在数据里随机选取若干样本得到的梯度
J_prime = lambda w: (6 * w ** 3 - 45 * w ** 2 + 6 * w) * (1 + 10 * np.random.random())

w = 1
epoch = 200
lr = 0.01
beta1 = 0.9
beta2 = 0.99
y = []
v = 0
s = 0
Loss = []
W = []
for i in range(epoch):
    v = beta1 * v + (1 - beta1) * J_prime(w)
    v_correct = v / (1 - beta1 ** (i + 1))
    s = beta2 * s + (1 - beta2) * (J_prime(w) ** 2)
    s_correct = s / (1 - beta2 ** (i + 1))
    w = w - lr * (v / np.sqrt(s))
    W.append(w)
    Loss.append(J(w))

plt.plot(Loss)
plt.figure()
plt.plot(W)

e.Adam二维优化

import numpy as np
import torch
from torch.autograd import Variable
import matplotlib.pyplot as plt

# Adam是一个集成了多种优化器优势的方法,通过momentum解决了鞍点问题,通过ada解决了对
# 弱小方向尺度矫正的问题,同时加入偏置,解决初始值过大的问题

J = lambda w1, w2: w1 ** 2 + 10 * w2 ** 2
J_prime1 = lambda w1: 2 * w1
J_prime2 = lambda w2: 20 * w2

w1 = 1
w2 = -1
epoch = 200
lr = 0.01
beta1 = 0.9
beta2 = 0.99
y = []
v1 = v2 = 0
s1 = s2 = 0
Loss = []
W1 = []
W2 = []
for i in range(epoch):
    v1 = beta1 * v1 + (1 - beta1) * J_prime1(w1)
    v1_correct = v1 / (1 - beta1 ** (i + 1))
    s1 = beta2 * s1 + (1 - beta2) * (J_prime1(w1) ** 2)
    s1_correct = s1 / (1 - beta2 ** (i + 1))
    w1 = w1 - lr * (v1 / np.sqrt(s1))
    v2 = beta1 * v2 + (1 - beta1) * J_prime2(w2)
    v2_correct = v2 / (1 - beta1 ** (i + 1))
    s2 = beta2 * s2 + (1 - beta2) * (J_prime2(w1) ** 2)
    s2_correct = s2 / (1 - beta2 ** (i + 1))
    w2 = w2 - lr * (v2 / np.sqrt(s2))
    W1.append(w1)
    W2.append(w2)
    Loss.append(J(w1, w2))

plt.plot(Loss)
plt.figure()
plt.plot(W1)
plt.plot(W2)
plt.show()

2.PyTorch种优化器选择

from torch.autograd import Variable
import torch
from torch import nn
from torch import optim
import numpy as np
import matplotlib.pyplot as  plt

xy = np.loadtxt('./data/diabetes.csv.gz', delimiter=',', dtype=np.float32)
x_data = Variable(torch.from_numpy(xy[:, 0:-1]))
y_data = Variable(torch.from_numpy(xy[:, [-1]]))

print(x_data.data.shape)
print(y_data.data.shape)


class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.l1 = nn.Linear(8, 6)
        self.l2 = nn.Linear(6, 4)
        self.l3 = nn.Linear(4, 1)
        self.sigmoid = nn.Sigmoid()

    def foward(self, x):
        out1 = self.sigmoid(self.l1(x))
        out2 = self.sigmoid(self.l2(out1))
        y_pred = self.sigmoid(self.l3(out2))
        return y_pred


model = Model()

criterion = nn.BCELoss(size_average=True)
optimizer = optim.Adam(model.parameters(), lr=0.05, betas=(0.9, 0.999), weight_decay=0.001)

Loss = []
for epoch in range(200):
    y_pred = model(x_data)
    loss = criterion(y_pred, y_data)
    if epoch % 20 == 0:
        print("epoch =", epoch, "loss =", loss.data[0])
        Loss.append(loss.data[0])
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
plt.plot(Loss)
hour_var = Variable(torch.randn(1, 8))
print("predict", model(hour_var).data[0] > 0.5)
plt.show()

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值