卷积神经网络CNN(Convolutional Neural Network)
-
下采样(Subsampling):减少数据量,减少元素数量,降低运算需求
-
卷积神经网络主要包括:特征提取器——Feature Extraction(对图像做卷积运算,通过卷积运算来找到图像的某种特征)和分类器(通过分类器转化为向量,再利用全连接网络做分类)
-
卷积过程:(单通道)
多通道卷积运算
要求:
1>每一个卷积和的通道的数量和输入通道的数量要是一致的
2>卷积和的总数和输出通道的数量是一致的
解析:有一个n*w*h输入维度,如果想得到m*w*h维度的输出,那么卷积核的维度就要是n*w'*h',而这样的卷积和一共要有m个(上图就是所构建卷积的权重:输出通道*输入通道*卷积核宽*卷积核高)
-
如果卷积核是3*3的话,那么输出图像宽度=输入图像宽度-2
-
padding:因为如果用3*3卷积核的话,输出的宽度就一定会比输入的宽度小2个单位,如果想让输出和输入像素一样的话,就要加一个padding(计算几圈padding的方法——整除:用卷积核的宽度除以2,取整数部分,例如:宽度为3,就是3/2的整数部分是1,那么就加1圈padding;如果卷积核的宽度是5,就是5/2,其整数部分就是2,那么就加2圈padding)
-
当设置MaxPool2d中的kernel_size=2时,那么步长stride也等于2
-
卷积的变换过程(最后是全连接层:用view做一个线性变换——torch.nn.Linear(320,10))
-
卷积输出大小计算公式
-
如果再对第七步进行池化的话,batch和输出通道都不变,只不过长宽要缩小为原来的一半
-
池化:如果没有权重的话,做一个池化层就可以了,如果有权重的话,每一层都要做一个池化
-
用显卡GPU计算分如下几步:
代码1:用PyTorch实现卷积网络、
import torch
# 定义相关参数
in_channels, out_channels = 5, 10
width, height = 100, 100
kernel_size = 3
batch_size = 1
# 定义输入图片变量
input = torch.randn(batch_size, in_channels, width, height)
# 定义卷积层
conv_layer = torch.nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size)
# 求出输出
output = conv_layer(input)
print(input.shape)
print(output.shape)
print(conv_layer.weight.shape)
代码2:带padding(保证输出和输入的维度一样)
import torch
input = [3, 4, 6, 5, 7,
2, 4, 6, 8, 2,
1, 6, 7, 8, 4,
9, 7, 4, 6, 2,
3, 7, 5, 4, 1]
# view:无论怎么变,元素的总数量是不变的,如果行列中出现-1,那么就要动态补全所有元素量
# view(batch,Channel,width,height)
input = torch.Tensor(input).view(1, 1, 5, 5)
conv_layer = torch.nn.Conv2d(1, 1, kernel_size=3, padding=1, bias=False)
# 定义卷积核的具体形式;view(output,input,weight,height)
kernel = torch.Tensor([1, 2, 3, 4, 5, 6, 7, 8, 9]).view(1, 1, 3, 3)
conv_layer.weight.data = kernel.data
output = conv_layer(input)
print(output)
代码3:带stride
import torch
input = [3, 4, 6, 5, 7,
2, 4, 6, 8, 2,
1, 6, 7, 8, 4,
9, 7, 4, 6, 2,
3, 7, 5, 4, 1]
input=torch.Tensor(input).view(1,1,5,5)
conv_layer=torch.nn.Conv2d(1,2,kernel_size=3,stride=2,bias=False)
kernel=torch.Tensor([1,2,3,4,5,6,7,8,9]).view(1,1,3,3)
conv_layer.weight.data=kernel.data
output=conv_layer(input)
print(output.data)
代码4:最大池化层
import torch
input = [3, 4, 6, 5,
2, 4, 6, 8,
1, 6, 7, 8,
9, 7, 4, 6]
input=torch.Tensor(input).view(1,1,4,4)
maxpooling_layer=torch.nn.MaxPool2d(kernel_size=2)#定义最大池化层
output=maxpooling_layer(input)
print(output.data)
用代码实现卷积神经网络
import torch
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim
batch_size = 64
# 如果transforms.ToTensor()不加后面的括号,则会报错:__init__() takes 1 positional argument but 2 were given
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
# 如果train_dataset中的参数transform赋值为空,或者赋错值了,就会报错:‘module‘ object is not callable
train_dataset = datasets.MNIST(root='./data/mnist/', train=True, download=False, transform=transform)
train_dataloader = DataLoader(dataset=train_dataset, shuffle=True, batch_size=batch_size)
test_dataset = datasets.MNIST(root='./data/mnist/', train=False, download=False, transform=transform)
test_dataloader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = torch.nn.Conv2d(1, 10, kernel_size=5)
self.conv2 = torch.nn.Conv2d(10, 20, kernel_size=5)
self.pooling = torch.nn.MaxPool2d(2)
# fcl:Fullly Connected Layer(全连接层)
self.fc = torch.nn.Linear(320, 10)
def forward(self, x):
batch_size = x.size(0)
x = F.relu(self.pooling(self.conv1(x)))
x = F.relu(self.pooling(self.conv2(x)))
x = x.view(batch_size, -1)
x = self.fc(x)
return x
model = Net()
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
# 封装训练方法和测试方法
def train(epoch):
running_loss = 0.0
for batch_index, data in enumerate(train_dataloader, 0):
inputs, target = data
y_pred = model(inputs)
loss = criterion(y_pred, target)
optimizer.zero_grad()
loss.backward()
optimizer.step()
running_loss += loss.item()
if batch_index % 300 == 299:
print('[%d,%5d] loss: %.3f' % (epoch + 1, batch_index + 1, running_loss / 300))
running_loss = 0.0
def test():
correct = 0
total = 0
with torch.no_grad():
for data in test_dataloader:
images, labels = data
outputs = model(images)
# 求出每一行最大值的下标
_, predicted = torch.max(outputs.data, dim=1) # dim=0表示行(矩阵从上到下),dim=1表示列(矩阵从左到右)
total += labels.size(0) # labels是一个(N,1)的元组
correct += (predicted == labels).sum().item()
print('Accuracy on testset %d %%' % (100 * correct / total))
if __name__ == '__main__':
for epoch in range(10):
train(epoch)
test()
GPU版本
import torch
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
batch_size = 64
# 如果transforms.ToTensor()不加后面的括号,则会报错:__init__() takes 1 positional argument but 2 were given
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
# 如果train_dataset中的参数transform赋值为空,或者赋错值了,就会报错:‘module‘ object is not callable
train_dataset = datasets.MNIST(root='./data/mnist/', train=True, download=False, transform=transform)
train_dataloader = DataLoader(dataset=train_dataset, shuffle=True, batch_size=batch_size)
test_dataset = datasets.MNIST(root='./data/mnist/', train=False, download=False, transform=transform)
test_dataloader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = torch.nn.Conv2d(1, 10, kernel_size=5)
self.conv2 = torch.nn.Conv2d(10, 20, kernel_size=5)
self.pooling = torch.nn.MaxPool2d(2)
# fcl:Fullly Connected Layer(全连接层)
self.fc = torch.nn.Linear(320, 10)
def forward(self, x):
batch_size = x.size(0)
x = F.relu(self.pooling(self.conv1(x)))
x = F.relu(self.pooling(self.conv2(x)))
x = x.view(batch_size, -1)
x = self.fc(x)
return x
model = Net()
# GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
# 封装训练方法和测试方法
def train(epoch):
running_loss = 0.0
for batch_index, data in enumerate(train_dataloader, 0):
inputs, target = data
inputs, target = inputs.to(device), target.to(device)# GPU
y_pred = model(inputs)
loss = criterion(y_pred, target)
optimizer.zero_grad()
loss.backward()
optimizer.step()
running_loss += loss.item()
if batch_index % 300 == 299:
print('[%d,%5d] loss: %.3f' % (epoch + 1, batch_index + 1, running_loss / 300))
running_loss = 0.0
def test():
correct = 0
total = 0
with torch.no_grad():
for data in test_dataloader:
images, labels = data
images, labels = images.to(device), labels.to(device)# GPU
outputs = model(images)
# 求出每一行最大值的下标
_, predicted = torch.max(outputs.data, dim=1) # dim=0表示行(矩阵从上到下),dim=1表示列(矩阵从左到右)
total += labels.size(0) # labels是一个(N,1)的元组
correct += (predicted == labels).sum().item()
print('Accuracy on testset %d %%' % (100 * correct / total))
return correct / total
if __name__ == '__main__':
epoc_list = []
accu_list = []
for epoch in range(10):
train(epoch)
acc = test()
epoc_list.append(epoch)
accu_list.append(acc)
plt.plot(epoc_list, accu_list)
plt.xlabel('epoch')
plt.ylabel('Accuracy')
plt.show()