本博文为本人的学习笔记。参考材料为《深度学习入门之——PyTorch》
pytorch中文网:https://www.pytorchtutorial.com/
关于反卷积:https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
关于卷积和反卷积函数中的参数——“dilation(int
or tuple
, optional
) – 卷积核元素之间的间距”,相当于将卷积核变得稀疏了。
对于全连接神经网络,网络参数太多了。如,对于一张28*28的图片输入,第一个隐含层的单个神经元的权重数目就达28*28=784个。若多设置几层隐含层、输入图片再大一点,参数量十分庞大。
卷积神经网络是一个3D容量的神经元。卷积层和全连接层包含参数,而激活层和池化层不包含参数。参数通过梯度下降法来更新(或者Adam)。
卷积层中滤波器的参数是通过学习得到的。
与神经元链接的空间大小叫神经元的感受野(receptive field)。感受野的大小即filters size(滤波器的尺寸)。而感受野的深度必须和输入输入的深度一致。输出的感受野深度等于the number of filters
CNN——参数共享、稀疏链接(局部链接)
设置网络时,要注意步长限制
参数共享可以有效减少参数的个数。
下面开始构建简单的卷积神经网络
import torch
import numpy as np
import torch.nn as nn
#define the model
class SimpleCNN(nn.Module):
"""docstring for SimpleCNN"""
def __init__(self):
super(SimpleCNN, self).__init__()
layer1=nn.Sequential()#Container class, We can add some basic modules in it.
layer1.add_module('conv1',nn.Conv2d(in_channels=3,out_channels=32,kernel_size=3,stride=1,padding=1))
layer1.add_module('relu1',nn.ReLU(True))
layer1.add_module('pool1',nn.MaxPool2d(2,2))
self.layer1=layer1
layer2=nn.Sequential()
layer2.add_module('conv2',nn.Conv2d(in_channels=32,out_channels=64,kernel_size=3,stride=1,padding=1))
layer2.add_module('relu2',nn.ReLU(True))
layer2.add_module('pool2',nn.MaxPool2d(2,2))
self.layer2=layer2
layer3=nn.Sequential()
layer3.add_module('conv3',nn.Conv2d(in_channels=64,out_channels=128,kernel_size=3,stride=1,padding=1))
layer3.add_module('relu3',nn.ReLU(True))
layer3.add_module('pool3',nn.MaxPool2d(2,2))
self.layer3=layer3
layer4=nn.Sequential()
layer4.add_module('fc1',nn.Linear(2048,512))
layer4.add_module('fc_relu1',nn.ReLU(True))
layer4.add_module('fc2',nn.Linear(512,64))
layer4.add_module('fc_relu2',nn.ReLU(True))
layer4.add_module('fc3',nn.Linear(64,10))
self.layer4=layer4
def forward(self,x):
conv1=self.layer1(x)
conv2=self.layer2(conv1)
conv3=self.layer3(conv2)
fc_input=conv3.view(conv3.size(0),-1)#A multi line Tensor is spliced into a row.
fc_out=self.layer4(fc_input)
return fc_out
model=SimpleCNN()
print(model)
run之后的结果:
for param in model.named_parameters():#get the name of the layyer, and the Iterator of parameters
print(param[0])
结果如下图所示
通过增加1*1的卷积层可以降低输入层的维度,使网络参数减少,从而减少网络里的复杂性。
在pytorch中的torchvision.model里面有很多定义好的网络,同时大部分网络都有训练好的参数。详细可参考链接:
https://www.pytorchtutorial.com/docs/torchvision/torchvision-models/
下面实现一个demo,对MNIST数据集中手写数字进行分类。MNIST数据集是一个手写字体数据集,包含了0~9这10个数字,有55000张训练集,10000张测试集i,5000张验证集,图片大小是28*28的灰度图
import torch
from torch import optim
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import datasets,transforms
torch.manual_seed(1) # reproducible
#Hyperparameters
batch_size=50
learning_rate=1e-3
EPOCH=1
#Data preprocessing
data_tf=transforms.Compose([transforms.ToTensor(),transforms.Normalize([0.5],[0.5])])#take all of the preprocessing together
#.ToTensor():Standardization of Image
#normalization,Subtract the mean, divide by variance.
#download the MNIST
train_dataset=datasets.MNIST(root='./MNIST_data',train=True,transform=data_tf,download=True)
test_data=datasets.MNIST(root='./MNIST_data',train=False,transform=data_tf)
train_loader=DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True)# mess up the data
#####################################################################################################################
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.layer1=nn.Sequential(nn.Conv2d(in_channels=1,out_channels=16,kernel_size=3,stride=1,padding=0),#the number of feature=16*26*26
nn.BatchNorm2d(16),
nn.ReLU(),)#inplace=True,Changing the input data
self.layer2=nn.Sequential(nn.Conv2d(in_channels=16,out_channels=32,kernel_size=3,stride=1,padding=0),#32*24*24
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2,stride=2),)#32*12*12
self.layer3=nn.Sequential(nn.Conv2d(in_channels=32,out_channels=64,kernel_size=3,stride=1,padding=0),#64*10*10
nn.BatchNorm2d(64),
nn.ReLU(),)
self.layer4=nn.Sequential(nn.Conv2d(in_channels=64,out_channels=128,kernel_size=3,stride=1,padding=0),#128*8*8
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2,stride=2),)#128*4*4
self.fc=nn.Sequential(nn.Linear(128*4*4,1024),
nn.ReLU(),
nn.Linear(1024,128),
nn.ReLU(),
nn.Linear(128,10),)
def forward(self,x):
x=self.layer1(x)
x=self.layer2(x)
x=self.layer3(x)
x=self.layer4(x)
x=x.view(x.size(0),-1)
output=self.fc(x)
return output
###########################################################################################################
#train
model=CNN()
print(model)
if torch.cuda.is_available():
model=model.cuda()
criterion=nn.CrossEntropyLoss()
optimizer=optim.Adam(model.parameters(),lr=learning_rate)
for epoch in range(EPOCH):
for step,(img,label) in enumerate(train_loader):
if torch.cuda.is_available():
img=Variable(img).cuda()#Nodes with a volatile attribute of True will not be derivation. and default is False
label=Variable(label).cuda()
else:
img=Variable(img)
label=Variable(label)
output=model(img)
loss=criterion(output,label)
#reset gradients
optimizer.zero_grad()
#backward pass
loss.backward()
#update parameters
optimizer.step()
#test
model.eval()#evaluation Pattern,
#The dropout is turned off during the test, and the parameters in the BN are also used to retain the parameters during training,
#so the test should enter the evaluation mode.
上面代码运行有点问题,下面给出新的代码
import torch
import torch.nn as nn
import torchvision
#It includes the popular data set, model structure and commonly used image conversion tools.
import torchvision.transforms as transforms
#Device configuration
device=torch.device('cuda:0'if torch.cuda.is_available() else 'cpu')
#Hyper parameters
num_epochs=6
num_classes=10#number 0~9
batch_size=100
learning_rate=0.001
#MNIST dataset
train_dataset=torchvision.datasets.MNIST(root='./MNIST_data',train=True,transform=transforms.ToTensor(),download=True)
test_dataset=torchvision.datasets.MNIST(root='./MNIST_data',train=False,transform=transforms.ToTensor())
#data loader or you can call it data Preprocessing
#According to batch size, it is encapsulated into Tensor.
#After that, Variable is only needed to be input into the model.
train_loader=torch.utils.data.DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True)
test_loader=torch.utils.data.DataLoader(dataset=test_dataset,batch_size=batch_size,shuffle=False)
##########################################################
#define the CNN
class ConvNet(nn.Module):
def __init__(self,num_classes=10):
super(ConvNet,self).__init__()#input 1*28*28
self.layer1=nn.Sequential(
nn.Conv2d(in_channels=1,out_channels=16,kernel_size=5,stride=1,padding=2),#16*28*28
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2,stride=2)#16*14*14
)
self.layer2=nn.Sequential(
nn.Conv2d(in_channels=16,out_channels=32,kernel_size=5,stride=1,padding=2),#32*14*14
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2,stride=2)#32*7*7
)
self.fc=nn.Linear(7*7*32,num_classes)
def forward(self,x):
out=self.layer1(x)
out=self.layer2(out)
out=out.reshape(out.size(0),-1)
out=self.fc(out)
return out
model=ConvNet(num_classes).to(device)#this Sentence is see wherether CPU or GPU speed up
#loss and optimizer
criterion=nn.CrossEntropyLoss()
optimizer=torch.optim.Adam(model.parameters(),lr=learning_rate)
#traian the model
total_step=len(train_loader)#all of the train data, each itertation is the number of batch_size. the
for epoch in range(num_epochs):
for i,(images,labels) in enumerate(train_loader):
images=images.to(device)
labels=labels.to(device)
#Forward pass
outputs=model(images)
loss=criterion(outputs,labels)
#backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
if(i+1)%100==0:
print('Epoch[{}/{}],Step[{}/{}],Loss:{:.4f}'
.format(epoch+1,num_epochs,i+1,total_step,loss.item()))
#################################################################################
#test the model
model.eval()# eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)
with torch.no_grad():#Remove the gradient
correct=0
total=0
for images,labels in test_loader:
images=images.to(device)
labels=labels.to(device)
outputs=model(images)
_,predicted=torch.max(outputs.data,1)#Returns the maximum value on the dimension=1.
total+=labels.size(0)
correct += (predicted == labels).sum().item()
print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total))
# Save the model checkpoint
#torch.save(model.state_dict(), 'model.ckpt')
运行结果截图
关于DataLoader(https://blog.csdn.net/u014380165/article/details/79058479)
该接口主要用来将自定义的数据读取接口的输出或者PyTorch已有的数据读取接口的输入按照batch size封装成Tensor,后续只需要再包装成Variable即可作为模型的输入。
关于ReLU(inplace=True)
关于PyTorch进行训练和测试时指定实例化的model模式为:train/eval
https://www.cnblogs.com/king-lps/p/8570021.html
关于optimizer.step()
关于torch.no_grad()
关于torch.max