卷积神经网络(简称CNN)是一类特殊的人工神经网络,是深度学习中重要的一个分支。CNN在很多领域都表现优异,精度和速度比传统计算学习算法高很多。特别是在计算机视觉领域,CNN是解决图像分类、图像检索、物体检测和语义分割的主流模型。
CNN每一层由众多的卷积核组成,每个卷积核对输入的像素进行卷积操作,得到下一次的输入。随着网络层的增加卷积核会逐渐扩大感受野,并缩减图像的尺寸。
CNN是一种层次模型,输入的是原始的像素数据。CNN通过卷积(convolution)、池化(pooling)、非线性激活函数(non-linear activation function)和全连接层(fully connected layer)构成。
import os,sys,glob,shutil,json
import cv2
from PIL import Image
import numpy as np
import torch
from torch.utils.data.dataset import Dataset
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import torchvision.models as models
import torchvision.datasets as datasets
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
torch.manual_seed(0)
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.benchmark = True
在Pytorch中构建CNN模型非常简单,只需要定义好模型的参数和正向传播即可,Pytorch会根据正向传播自动计算反向传播。
train_json = json.load(open(r'D:\study\cv\mchar_train.json'))
#数据标注处理
def parse_json(d):
arr = np.array([
d['top'],d['height'],d['left'],d['width'],d['label']
])
arr = arr.astype(int)
return arr
img = cv2.imread(r'D:\study\cv\mchar_train\000000.png')
arr = parse_json(train_json['000000.png'])
plt.figure(figsize=(10,10))
plt.subplot(1,arr.shape[1]+1,1) # Arr.shape[1]表示图片中元素的个数; +1是为了显示图片整体
plt.imshow(img)
plt.xticks([])
plt.yticks([])
for idx in range(arr.shape[1]):
plt.subplot(1,arr.shape[1]+1, idx+2) #从第一个子图开始循环
plt.imshow(img[arr[0,idx]:arr[0,idx]+arr[1,idx],arr[2,idx]:arr[2,idx]+arr[3,idx]])
'''
此处是截取图片区域:
top(0) + height(1) = 图片的高度
left(2) + width(3) = 图片的宽度
'''
plt.title(arr[4,idx])
plt.xticks([])
plt.yticks([])
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-A48zVmZ6-1590508485479)(output_4_0.png)]
class SVHNDataset(Dataset):
def __init__(self,img_path,img_label, transform=None):
self.img_path = img_path
self.img_label = img_label
if transform is not None:
self.transform =transform
else:
self.transform = None
def __getitem__(self,index):
img = Image.open(self.img_path[index]).convert('RGB')
if self.transform is not None:
img = self.transform(img)
# 原始SVHN中类别10为数字0
lbl = np.array(self.img_label[index], dtype=np.int)
lbl = list(lbl) + (5-len(lbl)) *[10]
return img,torch.from_numpy(np.array(lbl[:5]))
def __len__(self):
return len(self.img_path)
train_path = glob.glob(r'D:\study\cv\mchar_train\*.png')
train_path.sort()
train_json = json.load(open(r'D:\study\cv\mchar_train.json'))
train_label = [train_json[x]['label'] for x in train_json]
data = SVHNDataset(train_path, train_label,
transforms.Compose([
# 缩放到固定尺寸
transforms.Resize((64,128)),
#随机颜色变换
transforms.ColorJitter(0.2,0.2,0.2),
#加入随机旋转
transforms.RandomRotation(5),
#将图片转换为pytorch 的tesntor
# transforms.ToTensor(),
#将图像像素进行归一化
#transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
]))
class SVHNDataset(Dataset):
def __init__(self,img_path,img_label, transform=None):
self.img_path = img_path
self.img_label = img_label
if transform is not None:
self.transform =transform
else:
self.transform = None
def __getitem__(self,index):
img = Image.open(self.img_path[index]).convert('RGB')
if self.transform is not None:
img = self.transform(img)
# 原始SVHN中类别10为数字0
lbl = np.array(self.img_label[index], dtype=np.int)
lbl = list(lbl) + (5-len(lbl)) *[10]
return img,torch.from_numpy(np.array(lbl[:5]))
def __len__(self):
return len(self.img_path)
train_path = glob.glob(r'D:\study\cv\mchar_train\*.png')
train_path.sort()
train_json = json.load(open(r'D:\study\cv\mchar_train.json'))
train_label = [train_json[x]['label'] for x in train_json]
train_loader = torch.utils.data.DataLoader(
SVHNDataset(train_path,train_label,
transforms.Compose([
transforms.Resize((65,128)),
transforms.ColorJitter(0.3,0.3,0.2),
transforms.RandomRotation(5),
transforms.ToTensor(),
transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])),
batch_size = 10, #每批样本个数
shuffle=False, #是否打乱顺序
num_workers=0 #读取的线程个数
)
# 定义模型
class SVHN_Model1(nn.Module):
def __init__(self):
super(SVHN_Model1,self).__init__()
# CNN提取特征块
self.cnn = nn.Sequential(
nn.Conv2d(3,16,kernel_size=(3,3),stride=(2,2)),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Conv2d(16,32,kernel_size=(3,3),stride=(2,2)),
nn.ReLU(),
nn.MaxPool2d(2),
)
#
self.fc1 = nn.Linear(32*3*7,11)
self.fc2 = nn.Linear(32*3*7,11)
self.fc3 = nn.Linear(32*3*7,11)
self.fc4 = nn.Linear(32*3*7,11)
self.fc5 = nn.Linear(32*3*7,11)
self.fc6 = nn.Linear(32*3*7,11)
def forward(self,img):
feat = self.cnn(img)
feat = feat.view(feat.shape[0],-1)
c1 = self.fc1(feat)
c2 = self.fc2(feat)
c3 = self.fc3(feat)
c4 = self.fc4(feat)
c5 = self.fc5(feat)
c6 = self.fc6(feat)
return c1,c2,c3,c4,c5,c6
model = SVHN_Model1()
# 损失函数
criterion = nn.CrossEntropyLoss()
#优化器
optimizer = torch.optim.Adam(model.parameters(), 0.005)
loss_plot, c0_plot = [],[]
# 迭代10个Epoch
for epoch in range(10):
for data in train_loader:
c0,c1,c2,c3,c4,c5 = model(data[0])
loss = criterion(c0, data[1][:,0]) +\
criterion(c1, data[1][:,1]) +\
criterion(c2, data[1][:,2]) +\
criterion(c3, data[1][:,3]) +\
criterion(c4, data[1][:,4]) +\
criterion(c5, data[1][:,5])
loss /=6
optimizer.zero_grad()
loss.backward()
optimizer.step()
loss_plot.append(loss.item())
cn_plot.append((c0.argmax(1) == data[1][:,0]).sum().item()*1.0 / c0.shape[0])
print(epoch)
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-18-6def6e10ced7> in <module>
15 criterion(c2, data[1][:,2]) +\
16 criterion(c3, data[1][:,3]) +\
---> 17 criterion(c4, data[1][:,4]) +\
18 criterion(c5, data[1][:,5])
19 loss /=6
D:\study\anaconda\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
--> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)
D:\study\anaconda\lib\site-packages\torch\nn\modules\loss.py in forward(self, input, target)
930 def forward(self, input, target):
931 return F.cross_entropy(input, target, weight=self.weight,
--> 932 ignore_index=self.ignore_index, reduction=self.reduction)
933
934
D:\study\anaconda\lib\site-packages\torch\nn\functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction)
2315 if size_average is not None or reduce is not None:
2316 reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 2317 return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction)
2318
2319
D:\study\anaconda\lib\site-packages\torch\nn\functional.py in nll_loss(input, target, weight, size_average, ignore_index, reduce, reduction)
2113 .format(input.size(0), target.size(0)))
2114 if dim == 2:
-> 2115 ret = torch._C._nn.nll_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
2116 elif dim == 4:
2117 ret = torch._C._nn.nll_loss2d(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
RuntimeError: expected scalar type Long but found Int
报错,待查
当然为了追求精度,也可以使用在ImageNet数据集上的预训练模型,具体方法如下:
class SVHN_Model2(nn.Module):
def __init__(self):
super(SVHN_Model1,self).__init__()
model_conv = models.resnet18(pretrained=True)
model_conv.avgpool = nn.AdaptiveAvgPool2d(1)
model_conv = nn.Sequential(*list(model_conv.children())[:-1])
self.fc1 = nn.Linear(512,11)
self.fc2 = nn.Linear(512.11)
self.fc3 = nn.Linear(512.11)
self.fc4 = nn.Linear(512.11)
self.fc5 = nn.Linear(512.11)
def forward(self, img):
feat = self.cnn(img)
# print(feat.shape)
feat = feat.view(feat.shape[0],-1)
c1 = self.fc1(feat)
c2 = self.fc2(feat)
c3 = self.fc3(feat)
c4 = self.fc4(feat)
c5 = self.fc5(feat)
return c1,c2,c3,c4,c5