第十讲-卷积神经网络(基础篇)
卷积神经网络的输入、输出及卷积核
convolutional layer,设置输入的宽和高都是100,输入、输出的通道数分别是5和10,卷积核大小为3*3,batchsize为1,测试输入和输出的维度以及卷积层权重的形状。
import torch
in_channels,out_channels = 5,10 #分别为n,m
width , height = 100,100
kernel_size = 3 #卷积核大小
batch_size =1 #输入必须是小批量的数据
input = torch.randn(batch_size,
in_channels,
width,
height) #randn:从标准正态分布的采样的随机数 B,n,w,h
conv_layer = torch.nn.Conv2d(in_channels,out_channels, kernel_size = kernel_size)
output = conv_layer(input)
print(input.shape)
print(output.shape)
print(conv_layer.weight.shape)
输出:
Padding
padding=1时,把input周围扩了一圈0,如下图。
编程检验一下:
import torch
input = [3,4,6,5,7,
2,4,6,8,2,
1,6,7,8,4,
9,7,4,6,2,
3,7,5,4,1]
input = torch.Tensor(input).view(1,1,5,5) #view(B,C,W,H)
#torch.Tesor:直接根据数据创建Tensor,pytorch的view()相当于numpy中的resize()函数,用来重构(或调整)张量维度。
conv_layer = torch.nn.Conv2d(1,1,kernel_size=3,padding=1,bias=False) #不加偏置量,卷积本质上也是线性计算
kernel = torch.Tensor([1,2,3,4,5,6,7,8,9]).view(1,1,3,3) #(输出通道,输入通道,W,H)
conv_layer.weight.data = kernel.data #权重的值赋过去
output = conv_layer(input)
print(output)
print(output.shape)
输出:
Stride
stride=2时,跳两步,Conv2d函数中默认stride=1。如下图。这里padding不设值,默认0。
编程检验一下,跟上个代码只变Conv2d函数中的赋值:
import torch
input = [3,4,6,5,7,
2,4,6,8,2,
1,6,7,8,4,
9,7,4,6,2,
3,7,5,4,1]
input = torch.Tensor(input).view(1,1,5,5) #view(B,C,W,H)
#torch.Tesor:直接根据数据创建Tensor,pytorch的view()相当于numpy中的resize()函数,用来重构(或调整)张量维度。
conv_layer = torch.nn.Conv2d(1,1,kernel_size=3,bias=False,stride=2) #不加偏置量,卷积本质上也是线性计算
kernel = torch.Tensor([1,2,3,4,5,6,7,8,9]).view(1,1,3,3) #(输出通道,输入通道,W,H)
conv_layer.weight.data = kernel.data #权重的值赋过去
output = conv_layer(input)
print(output)
print(output.shape)
输出:
Max Pooling
下采样,用的较多的是最大池化Max Pooling,通道数不变,默认stride=2。如下图。注意,最大池化没有卷积核。
编程实现:
import torch
input = [3,4,6,5,
2,4,6,8,
1,6,7,8,
9,7,4,6]
input = torch.Tensor(input).view(1,1,4,4) #view(B,C,W,H)
#torch.Tesor:直接根据数据创建Tensor,pytorch的view()相当于numpy中的resize()函数,用来重构(或调整)张量维度。
maxpooling_layer = torch.nn.MaxPool2d(kernel_size=2) #不加偏置量,卷积本质上也是线性计算
output = maxpooling_layer(input)
print(output)
print(output.shape)
输出:
Simple CNN-MNIST
然后编写一个基于MNIST数据集的简单CNN网络,计算图:
代码:
train、test模块和上一讲基本一样,只是加入了Move Tensors to GPU的代码
import torch
from torch.utils.data import DataLoader
from torchvision import transforms #针对图像进行处理的工具包
from torchvision import datasets
import torch.nn.functional as F #for using ReLU
batch_size = 64
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,),(0.3081,))])
train_dataset = datasets.MNIST(root='../dataset/mnist',train=True,transform=transform,download=True) #mnist为28*28大小的灰度图像
test_dataset = datasets.MNIST(root='../dataset/mnist',train=False,transform=transform,download=True)
train_loader = DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True) #打乱->shuffle=true,训练要打乱
test_loader = DataLoader(dataset=test_dataset,batch_size=batch_size,shuffle=False) #测试不用shuffle,可以观察结果
class Net(torch.nn.Module):
def __init__(self): #定义需要调用的函数
super(Net,self).__init__()
self.conv1 = torch.nn.Conv2d(1,10,kernel_size=5) #卷积1
self.conv2 = torch.nn.Conv2d(10, 20,kernel_size=5) #卷积2
self.pool= torch.nn.MaxPool2d(kernel_size=2) #池化
self.fc = torch.nn.Linear(320,10) #全连接
def forward(self,x):
#Flatten data from(n,1,28,28) to (n,784)
batch_size = x.size(0)
x = self.pool(F.relu(self.conv1(x))) #relu作为激活函数,先卷积再ReLU再池化
x = self.pool(F.relu(self.conv2(x)))
x = x.view(batch_size,-1) #flatten,展平
x = self.fc(x) #要用交叉熵损失,所以最后一层不做激活
return x
model = Net()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#使用GPU,可以选,不同的任务使用不同的显卡,第一块:cuda:0,第二块:cuda:1,这取决于显卡的数量
model.to(device) #整个模型的缓存、模块都放到cuda中,转成cuda Tensor
criterion = torch.nn.CrossEntropyLoss() #相当于size_average=True
optimizer = torch.optim.SGD(model.parameters(), lr=0.01,momentum=0.5) #momentum为动量参数
def train(epoch):
running_loss = 0.0
for batch_idx,data in enumerate(train_loader,0): #(x,y)放到data中,dataloader自动将它们转成tensor batch_idx相当于i
# prepare data
inputs, target = data #x存到inputs中,y存到target中
inputs, target = inputs.to(device),target.to(device) #放到同一块显卡中
optimizer.zero_grad()
# forward+backward+update
outputs = model(inputs)
loss = criterion(outputs,target)
loss.backward()
optimizer.step()
running_loss += loss.item() #用item,不然会构建计算图 计算一个batch的损失
if batch_idx % 300 ==299:
print('[%d %5d] loss:%.3f' % (epoch+1,batch_idx+1,running_loss/300))
running_loss = 0.0
def test():
correct = 0
total = 0
with torch.no_grad(): #测试时不需要计算梯度
for data in test_loader:
images,target = data
images, target = images.to(device), target.to(device) # 放到同一块显卡中
outputs = model(images)
_,predicted = torch.max(outputs.data,dim=1)
#torch.max()函数返回的是两个值,第一个值是具体的value,即输出的最大值(用下划线表示),第二个值是value所在的index(也就是predicted)
#dim=1表示输出所在行的最大值,dim=0表示输出所在列的最大值
total += target.size(0) #求样本总数
correct += (predicted==target).sum().item() #.item()将tensor转化为普通的float或int型
print('Accuracy on test set:%d %% [%d/%d]'%(100*correct/total,correct,total)) #%%表示文字% 后面的%表示传入数据
if __name__ =='__main__':
for epoch in range(10):
train(epoch)
test()
结果:
Exercise
从计算图可以看出,有三个整的块(卷积-ReLU-池化),然后flatten展平(view函数),再有两个线性层,最后一个线性层为全连接层FC。
输入:(batch,1,28,28)
经过conv1 变为(batch,16,24,24),经过最大池化(batch,16,12,12)
经过conv2 变为(batch,32,12,12),经过最大池化(batch,32,6,6)
经过conv3变为(batch,64,6,6),经过最大池化(batch,64,3,3),最后的像素为64*3*3=576个
import torch
from torch.utils.data import DataLoader
from torchvision import transforms #针对图像进行处理的工具包
from torchvision import datasets
import torch.nn.functional as F #for using ReLU
batch_size = 64
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,),(0.3081,))])
train_dataset = datasets.MNIST(root='../dataset/mnist',train=True,transform=transform,download=True) #mnist为28*28大小的灰度图像
test_dataset = datasets.MNIST(root='../dataset/mnist',train=False,transform=transform,download=True)
train_loader = DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True) #打乱->shuffle=true,训练要打乱
test_loader = DataLoader(dataset=test_dataset,batch_size=batch_size,shuffle=False) #测试不用shuffle,可以观察结果
class Net(torch.nn.Module):
def __init__(self): #定义需要调用的函数
super(Net,self).__init__()
self.conv1 = torch.nn.Conv2d(1,16,kernel_size=5) #卷积1
self.conv2 = torch.nn.Conv2d(16, 32,kernel_size=3,padding=1) #卷积2
self.conv3 = torch.nn.Conv2d(32, 64,kernel_size=3,padding=1) #卷积3
self.pool= torch.nn.MaxPool2d(kernel_size=2) #池化
self.l1 = torch.nn.Linear(576,256)
self.l2 = torch.nn.Linear(256,128)
self.fc = torch.nn.Linear(128,10) #全连接
def forward(self,x):
#Flatten data from(n,1,28,28) to (n,784)
batch_size = x.size(0)
x = self.pool(F.relu(self.conv1(x))) #relu作为激活函数,先卷积再ReLU再池化
x = self.pool(F.relu(self.conv2(x)))
x = self.pool(F.relu(self.conv3(x)))
x = x.view(batch_size,-1) #flatten,展平
x = self.l1(x)
x = self.l2(x)
x = self.fc(x) #要用交叉熵损失,所以最后一层不做激活
return x
model = Net()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#使用GPU,可以选,不同的任务使用不同的显卡,第一块:cuda:0,第二块:cuda:1,这取决于显卡的数量
model.to(device) #整个模型的缓存、模块都放到cuda中,转成cuda Tensor
criterion = torch.nn.CrossEntropyLoss() #相当于size_average=True
optimizer = torch.optim.SGD(model.parameters(), lr=0.01,momentum=0.5) #momentum为动量参数
def train(epoch):
running_loss = 0.0
for batch_idx,data in enumerate(train_loader,0): #(x,y)放到data中,dataloader自动将它们转成tensor batch_idx相当于i
# prepare data
inputs, target = data #x存到inputs中,y存到target中
inputs, target = inputs.to(device),target.to(device) #放到同一块显卡中
optimizer.zero_grad()
# forward+backward+update
outputs = model(inputs)
loss = criterion(outputs,target)
loss.backward()
optimizer.step()
running_loss += loss.item() #用item,不然会构建计算图 计算一个batch的损失
if batch_idx % 300 ==299:
print('[%d %5d] loss:%.3f' % (epoch+1,batch_idx+1,running_loss/300))
running_loss = 0.0
def test():
correct = 0
total = 0
with torch.no_grad(): #测试时不需要计算梯度
for data in test_loader:
images,target = data
images, target = images.to(device), target.to(device) # 放到同一块显卡中
outputs = model(images)
_,predicted = torch.max(outputs.data,dim=1)
#torch.max()函数返回的是两个值,第一个值是具体的value,即输出的最大值(用下划线表示),第二个值是value所在的index(也就是predicted)
#dim=1表示输出所在行的最大值,dim=0表示输出所在列的最大值
total += target.size(0) #求样本总数
correct += (predicted==target).sum().item() #.item()将tensor转化为普通的float或int型
print('Accuracy on test set:%d %% [%d/%d]'%(100*correct/total,correct,total)) #%%表示文字% 后面的%表示传入数据
if __name__ =='__main__':
for epoch in range(10):
train(epoch)
test()
输出:
最终准确率为99%。