pytorch task6 理解更多神经网络优化方法

1.了解不同优化器

a.随机梯度下降和二维优化

import numpy as np
import torch
from torch.autograd import Variable
import matplotlib.pyplot as plt

# 梯度下降
# 创建一个较为复杂的函数
J = lambda w: 1.5 * w ** 4 - 15 * w ** 3 + 3 * w ** 2
# 在函数J导数后面加上了一个噪声是为了模拟随机梯度下降,随机梯度下降是每一次在数据里随机选取若干样本得到的梯度
J_prime = lambda w: (6 * w ** 3 - 45 * w ** 2 + 6 * w) * (1 + 10 * np.random.random())

# w = np.linspace(-10, 20, 100)

# print(w)
# plt.plot(w, J(w))
# plt.show()

# 通过将梯度进行累加求平均可以解决噪声带来的影响,但是这样求平均并不是最优的方法
# J = 0
# for i in range(100):
#     J += J_prime(1)
# J / 100

# # 栋梁算法,我们希望每次累加的量更加重视当下的梯度,因此要做一个随着时间变化的甲醛平均值
# J = 0
# # 用来记录梯度的值
# JJ = []
#
# for i in range(1000):
#     # 我们对当下的梯度赋予一个权重,J_prime是当下梯度
#     J = 0.0 * J + 0.1 * J_prime(1)
#     JJ.append(J)

# plt.plot(JJ)
# plt.show()

# # 梯度下降法
# w2 = 1
# epoch = 100
# lr = 0.01
# Loss = []
# W = []
#
# for i in range(epoch):
#     w2 = w2 - lr * (J_prime(w2))  # 更新梯度
#     Loss.append(J(w2))
#     W.append(w2)
#
# plt.plot(Loss)
# plt.figure()
# plt.plot(W)
# print(w2)

# # 动量梯度下降法,一定要初始化
# w = 1
# epoch = 100
# lr = 0.001
# beta = 0.5
# y = []
# v = 0
# Loss = []
# W = []
# for i in range(epoch):
#     # 指定一个速度V,J_prime是当前梯度,beta是权重
#     v - beta * v + (1 - beta) * J_prime(w)
#     w = w - lr * v
#     Loss.append(J(w))
#     W.append(w)
#
# plt.plot(Loss)
# plt.figure()
# plt.plot(W)

# 二维问题
# 一个简单的二维优化,随机梯度下降法进行优化实现
J = lambda w1, w2: w1 ** 2 + 10 * w2 ** 2
J_prime1 = lambda w1: 2 * w1
J_prime2 = lambda w2: 20 * w2

w1 = 1
w2 = -1
epoch = 200
lr = 0.01
y = []
v = 0
s = 0
Loss = []
W1 = []
W2 = []
for i in range(epoch):
    w1 = w1 - lr * (J_prime1(w1))
    w2 = w2 - lr * (J_prime2(w2))
    W1.append(w1)
    W2.append(w2)
    Loss.append(J(w1, w2))
plt.plot(Loss)
plt.figure()
plt.plot(W1)
plt.plot(W2)
plt.show()

b. Ada自适应梯度调节法

import numpy as np
import torch
from torch.autograd import Variable
import matplotlib.pyplot as plt

# Ada
J = lambda w1, w2: w1 ** 2 + 10 * w2 ** 2
J_prime1 = lambda w1: 2 * w1
J_prime2 = lambda w2: 20 * w2

w1 = 1
w2 = -1
epoch = 200
lr = 0.1
y = []
v = 0
s = 0
Loss = []
W1 = []
W2 = []
s1 = s2 = 0
for i in range(epoch):
    s1 += J_prime1(w1) ** 2
    w1 = w1 - lr * (J_prime1(w1) / np.sqrt(s1))
    s2 += J_prime2(w2) ** 2
    w2 = w2 - lr * (J_prime2(w2) / np.sqrt(s2))
    W1.append(w1)
    W2.append(w2)
    Loss.append(J(w1, w2))

plt.plot(Loss)
plt.figure()
plt.plot(W1)
plt.plot(W2)
print(w1, w2)

# 引入动量思想
s = 0
S = []
beta = 0.8
for i in range(100):
    s = 0.2 * s + J_prime1(w1) ** 2 * 0.8
    S.append(np.sqrt(s))
plt.plot(S)

c.RMSProp

import numpy as np
import torch
from torch.autograd import Variable
import matplotlib.pyplot as plt

# RMSProp

# 加入不同方向的scling!,缓解了长时间优化梯度变小的问题

J = lambda w1, w2: w1 ** 2 + 10 * w2 ** 2
J_prime1 = lambda w1: 2 * w1
J_prime2 = lambda w2: 20 * w2

w1 = 1
w2 = -1
epoch = 200
lr = 0.01
beta2 = 0.5
y = []
v = 0
s = 0
Loss = []
W1 = []
W2 = []
s1 = s2 = 0
for i in range(epoch):
    s1 = beta2 * s1 + (1 - beta2) * (J_prime1(w1) ** 2)
    s1_correct = s1 / (1 - beta2 ** (i + 1))
    w1 = w1 - lr * (J_prime1(w1) / np.sqrt(s1))
    s2 = beta2 * s2 + (1 - beta2) * (J_prime2(w2) ** 2)
    s2_correct = s2 / (1 - beta2) ** (i + 1)
    w2 = w2 - lr * (J_prime2(w2) / np.sqrt(s2))
    W1.append(w1)
    W2.append(w2)
    Loss.append(J(w1, w2))

plt.plot(Loss)
plt.figure()
plt.plot(W1)
plt.plot(W2)
print(w1, w2)

d.Adam 

import numpy as np
import torch
from torch.autograd import Variable
import matplotlib.pyplot as plt

# Adam是一个集成了多种优化器优势的方法,通过momentum解决了鞍点问题,通过ada解决了对
# 弱小方向尺度矫正的问题,同时加入偏置,解决初始值过大的问题

# 一维种用Adam优化实现代码
J = lambda w: 1.5 * w ** 4 - 15 * w ** 3 + 3 * w ** 2
# 在函数J导数后面加上了一个噪声是为了模拟随机梯度下降,随机梯度下降是每一次在数据里随机选取若干样本得到的梯度
J_prime = lambda w: (6 * w ** 3 - 45 * w ** 2 + 6 * w) * (1 + 10 * np.random.random())

w = 1
epoch = 200
lr = 0.01
beta1 = 0.9
beta2 = 0.99
y = []
v = 0
s = 0
Loss = []
W = []
for i in range(epoch):
    v = beta1 * v + (1 - beta1) * J_prime(w)
    v_correct = v / (1 - beta1 ** (i + 1))
    s = beta2 * s + (1 - beta2) * (J_prime(w) ** 2)
    s_correct = s / (1 - beta2 ** (i + 1))
    w = w - lr * (v / np.sqrt(s))
    W.append(w)
    Loss.append(J(w))

plt.plot(Loss)
plt.figure()
plt.plot(W)

e.Adam二维优化

import numpy as np
import torch
from torch.autograd import Variable
import matplotlib.pyplot as plt

# Adam是一个集成了多种优化器优势的方法,通过momentum解决了鞍点问题,通过ada解决了对
# 弱小方向尺度矫正的问题,同时加入偏置,解决初始值过大的问题

J = lambda w1, w2: w1 ** 2 + 10 * w2 ** 2
J_prime1 = lambda w1: 2 * w1
J_prime2 = lambda w2: 20 * w2

w1 = 1
w2 = -1
epoch = 200
lr = 0.01
beta1 = 0.9
beta2 = 0.99
y = []
v1 = v2 = 0
s1 = s2 = 0
Loss = []
W1 = []
W2 = []
for i in range(epoch):
    v1 = beta1 * v1 + (1 - beta1) * J_prime1(w1)
    v1_correct = v1 / (1 - beta1 ** (i + 1))
    s1 = beta2 * s1 + (1 - beta2) * (J_prime1(w1) ** 2)
    s1_correct = s1 / (1 - beta2 ** (i + 1))
    w1 = w1 - lr * (v1 / np.sqrt(s1))
    v2 = beta1 * v2 + (1 - beta1) * J_prime2(w2)
    v2_correct = v2 / (1 - beta1 ** (i + 1))
    s2 = beta2 * s2 + (1 - beta2) * (J_prime2(w1) ** 2)
    s2_correct = s2 / (1 - beta2 ** (i + 1))
    w2 = w2 - lr * (v2 / np.sqrt(s2))
    W1.append(w1)
    W2.append(w2)
    Loss.append(J(w1, w2))

plt.plot(Loss)
plt.figure()
plt.plot(W1)
plt.plot(W2)
plt.show()

2.PyTorch种优化器选择

from torch.autograd import Variable
import torch
from torch import nn
from torch import optim
import numpy as np
import matplotlib.pyplot as  plt

xy = np.loadtxt('./data/diabetes.csv.gz', delimiter=',', dtype=np.float32)
x_data = Variable(torch.from_numpy(xy[:, 0:-1]))
y_data = Variable(torch.from_numpy(xy[:, [-1]]))

print(x_data.data.shape)
print(y_data.data.shape)


class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.l1 = nn.Linear(8, 6)
        self.l2 = nn.Linear(6, 4)
        self.l3 = nn.Linear(4, 1)
        self.sigmoid = nn.Sigmoid()

    def foward(self, x):
        out1 = self.sigmoid(self.l1(x))
        out2 = self.sigmoid(self.l2(out1))
        y_pred = self.sigmoid(self.l3(out2))
        return y_pred


model = Model()

criterion = nn.BCELoss(size_average=True)
optimizer = optim.Adam(model.parameters(), lr=0.05, betas=(0.9, 0.999), weight_decay=0.001)

Loss = []
for epoch in range(200):
    y_pred = model(x_data)
    loss = criterion(y_pred, y_data)
    if epoch % 20 == 0:
        print("epoch =", epoch, "loss =", loss.data[0])
        Loss.append(loss.data[0])
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
plt.plot(Loss)
hour_var = Variable(torch.randn(1, 8))
print("predict", model(hour_var).data[0] > 0.5)
plt.show()

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
对抗神经网络是一种在PyTorch中实现的网络结构,它由一个生成器模型和一个判别器模型组成。生成器模型旨在生成以假乱真的样本数据,而判别器模型则负责鉴别真实数据和生成器生成的数据。通过迭代训练,生成器和判别器相互对抗,不断优化自己的能力。最终,对抗神经网络的目标是实现纳什均衡,即判别器对生成器输出数据的鉴别结果为50%真实、50%虚假。 在PyTorch中实现对抗神经网络,可以使用各种各样的训练方法。一种常见的方法是在优化过程的每个步骤中同时对生成器和判别器进行优化,另一种方法则是采取不同的优化步骤。通过大量的迭代训练,生成器模型逐渐学会生成逼真的样本,判别器模型也能更准确地鉴别真伪数据,最终实现对抗神经网络的纳什均衡状态。 在对抗神经网络中,如果判别器是一个多层网络,梯度截断可能会导致梯度消失或梯度“爆炸”的问题。为了解决这个问题,可以适当调整梯度截断的阀值,使每经过一层网络,梯度都会稍微减小一些,以避免指数衰减的情况发生。<span class="em">1</span><span class="em">2</span><span class="em">3</span> #### 引用[.reference_title] - *1* [PyTorch生成对抗网络编程](https://download.csdn.net/download/qq_42079146/20268480)[target="_blank" data-report-click={"spm":"1018.2226.3001.9630","extra":{"utm_source":"vip_chatgpt_common_search_pc_result","utm_medium":"distribute.pc_search_result.none-task-cask-2~all~insert_cask~default-1-null.142^v92^chatsearchT0_1"}}] [.reference_item style="max-width: 50%"] - *2* *3* [【Pytorch神经网络理论篇】 23 对抗神经网络:概述流程 + WGAN模型 + WGAN-gp模型 + 条件GAN + WGAN-div + ...](https://blog.csdn.net/qq_39237205/article/details/123718856)[target="_blank" data-report-click={"spm":"1018.2226.3001.9630","extra":{"utm_source":"vip_chatgpt_common_search_pc_result","utm_medium":"distribute.pc_search_result.none-task-cask-2~all~insert_cask~default-1-null.142^v92^chatsearchT0_1"}}] [.reference_item style="max-width: 50%"] [ .reference_list ]
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值