- 【写在前面】没有写博客好久了,好的习惯总是很难坚持啊!
1.什么是Pytorch,为什么选择Pytroch?
这里我不想去贴别人的标准答案,以我自身的感觉就是,pytorch是框架里最简单的一个,用户友好型,说别的没用。它和tensorflow一样,是深度学习的一个框架。
2.Pytroch的安装
上次接触pytorch的时候好像还不能支持windows,这次看看行不行。
好吧,搞开发还是老老实实用linux。。
打开pytorch官网(https://pytorch.org/get-started/locally/),用命令行安装,注意电脑没有GPU的话就选择no CUDA版本的,不要搞错了。
conda install pytorch-cpu torchvision-cpu -c pytorch
下载的速度比较慢,昨天提前走了,让电脑自己运行;今天来了发现还有几个文件没有安装好,又重新执行了一次上面的安装命令,就ok了。
下面来验证一下:
以上,,先进入Python环境,我的环境是3.7.2,然后执行torch模块的验证语句即可。
不使用Windows的原因:昨天执行安装下载命令的时候,配置的清华下载源都报错,那就算了吧。
3.通用代码实现流程(实现一个深度学习的代码流程)
使用CIFAR-10数据集实践pytorch的代码流程:
- 使用torchvision加载并预处理数据集
- 定义网络
- 定义损失函数和优化器
- 训练网络并更新网络参数
- 测试网络
数据加载及预处理
import torchvision as tv
import torchvision.transforms as transforms
from torchvision.transforms import ToPILImage
show = ToPILImage()
# 第一次运行torchvision会自动下载数据集
# 定义对数据的预处理
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ])
# 训练集
trainset = tv.datasets.CIFAR10(root='/home/cy/tmp/data', train=True, download=True, transform=transform)
trainloader = t.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=2)
# 测试集
testset = tv.datasets.CIFAR10('/home/cy/tmp/data/', train=False, download=True, transform=transform)
testloader = t.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=2)
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
定义网络
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16*5*5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
x = F.max_pool2d(F.relu(self.conv2(x)), 2)
x = F.view(x.size()[0], -1)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
net = Net()
# print(net)
定义损失函数和优化器
from torch import optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
训练网络
所有网络的训练流程都是类似的,不断地执行以下流程:
- 输入数据
- 前向传播 + 反向传播
- 更新参数
t.set_num_threads(8)
for epoch in range(2):
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
inputs, labels = data;
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
if i % 2000 == 1999:
print('[%d, %5d] loss: %.3f' \ % (epoch+1, i+1, running_loss / 2000))
running_loss = 0.0
print('Finished Training')
# 得出整个测试集上的准确率
correct = 0
total = 0
with t.no_grad():
for data in testloader:
images, labels = data
outputs = net(images)
_, predicted = t.max(outputs, 1)
total += labels.size(0)
correct += (predicted == labels).sum()
print('10000张测试集中的准确率为:%d %%' % (100 * correct / total))
- 【第二天】的作业就看不懂了,只能学习别人的了
1.使用numpy和pytorch实现梯度下降
设定初始值
求梯度
在梯度方向上进行参数的更新
import numpy as np
# 定义求导函数
def numerical_gradient(f, x):
h = 1e-1
grad = np.zeros_like(x)
for idx in range(x.size):
tmp_val = x[idx]
x[idx] = tmp_val + h
fxh1 = f(x)
x[idx] = tmp_val - h
fxh2 = f(x)
grad[idx] = (fxh1 - fxh2) / (2*h)
x[idx] = tmp_val
return grad
# 梯度下降法
def gradient_descent(f, init_x, lr=0.01, step_num=100):
x = init_x
for i in range(step_num):
grad = numerical_gradient(f, x)
x -= lr * grad
return x
# 测试函数
def f_2(x):
return x[0]**2 + x[1]**2
init_x = np.array([-3.0, 4.0])
gradient_descent(f_2, init_x=init_x, lr=0.1, step_num=100)
2.numpy和pytorch实现线性回归
# numpy的线性回归
import numpy as np
x = np.array([1,3,2,1,3])
y = np.array([14,32,15,18,24])
# 得到回归方程
def fit(x, y):
if len(x) != len(y):
return
numerator = 0.0
denominator = 0.0
x_mean = np.mean(x)
y_mean = np.mean(y)
for i in range(len(x)):
numerator += (x[i] - x_mean) * (y[i] - y_mean)
denominator += np.square((x[i] - x_mean))
b0 = numerator / denominator
b1 = y_mean - b0 * x_mean
return b0, b1
def predict(x, b0, b1):
return b0 * x + b1
莫烦python mark一下
# pytorch回归神经网络
import torch
from torch.autograd import Variable
import matplotlib.pyplot as plt
import torch.nn.functional as F
x = torch.unsqueeze(torch.linspace(-1, 1, 100), dim=1)
y = x.pow(2) + 0.2*torch.rand(x.size())
x, y = Variable(x), Variable(y)
class Net(torch.nn.Module):
def __init__(self, n_feature, n_hidden, n_output):
super(Net, self).__init__()
self.hidden = torch.nn.Linear(n_feature, n_hidden)
self.predict = torch.nn.Linear(n_hidden, n_output)
def forward(self, x):
x = F.relu(self.hidden(x))
x = self.predict(x)
return x
net = Net(n_feature=1, n_hidden=10, n_output=1)
optimizer = torch.optim.SGD(net.parameters(), lr=0.5)
loss_func = torch.nn.MSELoss()
plt.ion()
for t in range(100):
prediction = net(x)
loss = loss_func(prediction, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if t % 5 == 0:
plt.cla()
plt.scatter(x.data.numpy(), y.data.numpy())
plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5)
plt.text(0.5, 0, 'Loss=%.4f' % loss.data[0], fontdict={'size': 20, 'color': 'red'})
plt.pause(0.5)
plt.ioff()
plt.show()
第二天的作业理解了整体思想,但是细节还要进一步落实
【第三天】
pytorch实现Logistic regression
import torch
from torch.autograd import Variable
x = Variable(torch.Tensor([[0.6], [1.0], [3.5], [4.0]]))
y = Variable(torch.Tensor([[0.], [0.], [1.], [1.]]))
class Model(torch.nn.Module):
def __init__(self):
super(Model, self).__init__()
self.linear = torch.nn.Linear(1, 1)
self.sigmoid = torch.nn.Sigmoid()
def forward(self, x):
y_pred = self.sigmoid(self.linear(x))
return y_pred
model = Model()
criterion = torch.nn.BCELoss(size_average=True)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
for epoch in range(10000):
y_pred = model(x)
loss = criterion(y_pred, y_data)
if epoch % 20 == 0:
print(epoch, loss.data.numpy())
oprimizer.zero_grad()
loss.backward()
optimizer.step()
model.eval()
hour_var = Variable(torch.Tensor([[0.5]]))
print("predict (after training)", 0.5, model(hour_var).data[0][0].numpy())
hour_var = Variable(torch.Tensor([[0.7]]))
print("predict (after training)", 0.7, model(hour_var).data[0][0].numpy())
逻辑回归和昨天的线性回归比较相似
【第四天】
用PyTorch实现多层网络
1.引入模块,读取数据
2.构建计算图(构建网络模型)
3.损失函数与优化器
4.开始训练模型
5.对训练的模型预测结果进行评估
多层网络的构建只有第2步不一样
学习别人的三层网络
from torch import nn
class simpleNet(nn.Module):
def __init__(self, in_dim, n_hidden_1, n_hidden_2, out_dim):
super(simpleNet, self).__init__()
self.layer1 = nn.Linear(in_dim, n_hidden_1)
self.layer2 = nn.Linear(n_hidden_1, n_hidden_2)
self.layer3 = nn.Linear(n_hidden_2, out_dim)
def forward(self, x):
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
return x
class Activation_Net(nn.Module):
def __init__(self, in_dim, n_hidden_1, n_hidden_2, out_dim):
super(Activation_Net, self).__init__()
self.layer1 = nn.Sequential(nn.Linear(in_dim, n_hidden_1), nn.ReLU(True))
self.layer2 = nn.Sequential(nn.Linear(n_hidden_1, n_hidden_2), nn.ReLU(True))
self.layer3 = nn.Sequential(nn.Linear(n_hidden_2, out_dim))
def forward(self, x):
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
return x
class Batch_Net(nn.Module):
def __init__(self, in_dim, n_hidden_1, n_hidden_2, out_dim):
super(Batch_Net, self).__init__()
self.layer1 = nn.Sequential(nn.Linear(in_dim, n_hidden_1), nn.BatchNorm1d(n_hidden_1), nn.ReLU(True))
self.layer2 = nn.Sequential(nn.Linear(n_hidden_1, n_hidden_2), nn.BatchNorm1d(n_hidden_2), nn.ReLU(True))
self.layer3 = nn.Sequential(nn.Linear(n_hidden_2, out_dim))
def forward(self, x):
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
return x
【第五天】
- Pytorch实现Dropout原理
1.dropout是指在深度学习网络中的训练过程中,按照神经网络单元以一定的概率将其暂时丢弃
2.L1范数:参数矩阵W的绝对值之和
regular_loss = 0
for x in model.parameters():
regular_loss += torch.sum(abs(x))
3.L2范数:参数矩阵元素的平方之和
optimizer = torch.optim.SGD(model.parameters(), lr=1e-1, momentum=0.9, weight_decay=0.001)
4.Dropout的numpy实现
import numpy as np
X = np.array([ [0,0,1],[0,1,1],[1,0,1],[1,1,1] ])
y = np.array([[0,1,1,0]]).T
alpha,hidden_dim,dropout_percent,do_dropout = (0.5,4,0.2,True)
synapse_0 = 2*np.random.random((3,hidden_dim)) - 1
synapse_1 = 2*np.random.random((hidden_dim,1)) - 1
for j in xrange(60000):
layer_1 = (1/(1+np.exp(-(np.dot(X,synapse_0)))))
if(do_dropout):
layer_1 *= np.random.binomial([np.ones((len(X),hidden_dim))],1-dropout_percent)[0] * (1.0/(1-dropout_percent))
layer_2 = 1/(1+np.exp(-(np.dot(layer_1,synapse_1))))
layer_2_delta = (layer_2 - y)*(layer_2*(1-layer_2))
layer_1_delta = layer_2_delta.dot(synapse_1.T) * (layer_1 * (1-layer_1))
synapse_1 -= (alpha * layer_1.T.dot(layer_2_delta))
synapse_0 -= (alpha * X.T.dot(layer_1_delta))
5.pytorch实现dropout
import torch
from torch.autograd import Variable
import matplotlib.pyplot as plt
# torch.manual_seed(1) # reproducible
N_SAMPLES = 20
N_HIDDEN = 300
# training data
x = torch.unsqueeze(torch.linspace(-1, 1, N_SAMPLES), 1)
y = x + 0.3*torch.normal(torch.zeros(N_SAMPLES, 1), torch.ones(N_SAMPLES, 1))
x, y = Variable(x), Variable(y)
# test data
test_x = torch.unsqueeze(torch.linspace(-1, 1, N_SAMPLES), 1)
test_y = test_x + 0.3*torch.normal(torch.zeros(N_SAMPLES, 1), torch.ones(N_SAMPLES, 1))
test_x, test_y = Variable(test_x, volatile=True), Variable(test_y, volatile=True)
# show data
'''
plt.scatter(x.data.numpy(), y.data.numpy(), c='magenta', s=50, alpha=0.5, label='train')
plt.scatter(test_x.data.numpy(), test_y.data.numpy(), c='cyan', s=50, alpha=0.5, label='test')
plt.legend(loc='upper left')
plt.ylim((-2.5, 2.5))
plt.show()
'''
net_overfitting = torch.nn.Sequential(
torch.nn.Linear(1, N_HIDDEN),
torch.nn.ReLU(),
torch.nn.Linear(N_HIDDEN, N_HIDDEN),
torch.nn.ReLU(),
torch.nn.Linear(N_HIDDEN, 1),
)
net_dropped = torch.nn.Sequential(
torch.nn.Linear(1, N_HIDDEN),
torch.nn.Dropout(0.5), # drop 50% of the neuron
torch.nn.ReLU(),
torch.nn.Linear(N_HIDDEN, N_HIDDEN),
torch.nn.Dropout(0.5), # drop 50% of the neuron
torch.nn.ReLU(),
torch.nn.Linear(N_HIDDEN, 1),
)
print(net_overfitting) # net architecture
print(net_dropped)
optimizer_ofit = torch.optim.Adam(net_overfitting.parameters(), lr=0.01)
optimizer_drop = torch.optim.Adam(net_dropped.parameters(), lr=0.01)
loss_func = torch.nn.MSELoss()
plt.ion() # something about plotting
for t in range(500):
pred_ofit = net_overfitting(x)
pred_drop = net_dropped(x)
loss_ofit = loss_func(pred_ofit, y)
loss_drop = loss_func(pred_drop, y)
optimizer_ofit.zero_grad()
optimizer_drop.zero_grad()
loss_ofit.backward()
loss_drop.backward()
optimizer_ofit.step()
optimizer_drop.step()
if t % 10 == 0:
# change to eval mode in order to fix drop out effect
net_overfitting.eval()
net_dropped.eval() # parameters for dropout differ from train mode
# plotting
plt.cla()
test_pred_ofit = net_overfitting(test_x)
test_pred_drop = net_dropped(test_x)
plt.scatter(x.data.numpy(), y.data.numpy(), c='magenta', s=50, alpha=0.3, label='train')
plt.scatter(test_x.data.numpy(), test_y.data.numpy(), c='cyan', s=50, alpha=0.3, label='test')
plt.plot(test_x.data.numpy(), test_pred_ofit.data.numpy(), 'r-', lw=3, label='overfitting')
plt.plot(test_x.data.numpy(), test_pred_drop.data.numpy(), 'b--', lw=3, label='dropout(50%)')
plt.text(0, -1.2, 'overfitting loss=%.4f' % loss_func(test_pred_ofit, test_y).data[0], fontdict={'size': 20, 'color': 'red'})
plt.text(0, -1.5, 'dropout loss=%.4f' % loss_func(test_pred_drop, test_y).data[0], fontdict={'size': 20, 'color': 'blue'})
plt.legend(loc='upper left'); plt.ylim((-2.5, 2.5));plt.pause(0.1)
# change back to train mode
net_overfitting.train()
net_dropped.train()
plt.ioff()
plt.show()
- 总结:深度学习是AI的一方面,从小白入手会走很多弯路,但是更重要的是你将来真的想从事这一行业吗。
依然是看不懂。
参考:https://blog.csdn.net/mireyaaa/article/details/90410847
【第六天】
pytorch 中的各种优化方法
不同类型的优化方法有三种:
1.梯度下降发;2.动量优化法;3.自适应学习率优化算法
-
梯度下降法包括:标准梯度下降法、随机梯度下降法、批量梯度下降法。
随机梯度下降法计算速度快,收敛效果好;但是会引入噪声。
-
动量优化法包括:标准动量优化法、NAG动量优化法
标准动量优化法的思想是引入一个积攒历史梯度信息动量来加速SGD,但是这样容易发生错误;而牛顿加速梯度法则在此基础上添加一个校正因子,使小球不会盲目地跟从下坡的梯度。
-
自适应学习率算法包括:AdaGrad算法、RMSProp算法、Adam算法、AdaDelta算法
1.AdaGrad算法,独立地适应所有模型参数的学习率,缩放每个参数反比于其所有梯度历史平均值总和的平方根。具有代价函数最大梯度的参数相应地有个快速下降的学习率,而具有小梯度的参数在学习率上有相对较小的下降。
2.RMSProp算法修改了AdaGrad的梯度积累为指数加权的移动平均,使得其在非凸设定下效果更好。
3.首先,Adam中动量直接并入了梯度一阶矩(指数加权)的估计。其次,相比于缺少修正因子导致二阶矩估计可能在训练初期具有很高偏置的RMSProp,Adam包括偏置修正,修正从原点初始化的一阶矩(动量项)和(非中心的)二阶矩估计。
4.AdaGrad算法和RMSProp算法都需要指定全局学习率,AdaDelta算法结合两种算法每次参数的更新步长。
大神其实也有很多种:https://blog.csdn.net/weixin_40170902/article/details/80092628
【第7天】
pytorch手写数字识别:
三层FC实现MNIST手写数字分类
from torch import nn
class simpleNet(nn.Module):
"""
定义了一个简单的三层全连接神经网络,每一层都是线性的
"""
def __init__(self, in_dim, n_hidden_1, n_hidden_2, out_dim):
super(simpleNet, self).__init__()
self.layer1 = nn.Linear(in_dim, n_hidden_1)
self.layer2 = nn.Linear(n_hidden_1, n_hidden_2)
self.layer3 = nn.Linear(n_hidden_2, out_dim)
def forward(self, x):
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
return x
class Activation_Net(nn.Module):
"""
在上面的simpleNet的基础上,在每层的输出部分添加了激活函数
"""
def __init__(self, in_dim, n_hidden_1, n_hidden_2, out_dim):
super(Activation_Net, self).__init__()
self.layer1 = nn.Sequential(nn.Linear(in_dim, n_hidden_1), nn.ReLU(True))
self.layer2 = nn.Sequential(nn.Linear(n_hidden_1, n_hidden_2), nn.ReLU(True))
self.layer3 = nn.Sequential(nn.Linear(n_hidden_2, out_dim))
"""
这里的Sequential()函数的功能是将网络的层组合到一起。
"""
def forward(self, x):
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
return x
class Batch_Net(nn.Module):
"""
在上面的Activation_Net的基础上,增加了一个加快收敛速度的方法——批标准化
"""
def __init__(self, in_dim, n_hidden_1, n_hidden_2, out_dim):
super(Batch_Net, self).__init__()
self.layer1 = nn.Sequential(nn.Linear(in_dim, n_hidden_1), nn.BatchNorm1d(n_hidden_1), nn.ReLU(True))
self.layer2 = nn.Sequential(nn.Linear(n_hidden_1, n_hidden_2), nn.BatchNorm1d(n_hidden_2), nn.ReLU(True))
self.layer3 = nn.Sequential(nn.Linear(n_hidden_2, out_dim))
def forward(self, x):
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
return x