Task3 字符识别模型

卷积神经网络(简称CNN)是一类特殊的人工神经网络,是深度学习中重要的一个分支。CNN在很多领域都表现优异,精度和速度比传统计算学习算法高很多。特别是在计算机视觉领域,CNN是解决图像分类、图像检索、物体检测和语义分割的主流模型。
CNN每一层由众多的卷积核组成,每个卷积核对输入的像素进行卷积操作,得到下一次的输入。随着网络层的增加卷积核会逐渐扩大感受野,并缩减图像的尺寸。
CNN是一种层次模型,输入的是原始的像素数据。CNN通过卷积(convolution)、池化(pooling)、非线性激活函数(non-linear activation function)和全连接层(fully connected layer)构成。

import os,sys,glob,shutil,json
import cv2

from PIL import Image
import numpy as np

import torch
from torch.utils.data.dataset import Dataset
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

import torchvision.models as models

import torchvision.datasets as datasets
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

torch.manual_seed(0)
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.benchmark = True


在Pytorch中构建CNN模型非常简单,只需要定义好模型的参数和正向传播即可,Pytorch会根据正向传播自动计算反向传播。





train_json = json.load(open(r'D:\study\cv\mchar_train.json'))

#数据标注处理
def parse_json(d):
    arr = np.array([
        d['top'],d['height'],d['left'],d['width'],d['label']
    ])
    arr = arr.astype(int)
    return arr

img = cv2.imread(r'D:\study\cv\mchar_train\000000.png')
arr = parse_json(train_json['000000.png'])

plt.figure(figsize=(10,10))
plt.subplot(1,arr.shape[1]+1,1) # Arr.shape[1]表示图片中元素的个数; +1是为了显示图片整体
plt.imshow(img)
plt.xticks([])
plt.yticks([])


for idx in range(arr.shape[1]):
    plt.subplot(1,arr.shape[1]+1, idx+2) #从第一个子图开始循环
    plt.imshow(img[arr[0,idx]:arr[0,idx]+arr[1,idx],arr[2,idx]:arr[2,idx]+arr[3,idx]])
    '''
    此处是截取图片区域:
        top(0) + height(1) = 图片的高度
        left(2) + width(3) = 图片的宽度
    '''
    plt.title(arr[4,idx])
    plt.xticks([])
    plt.yticks([])


[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-A48zVmZ6-1590508485479)(output_4_0.png)]

class SVHNDataset(Dataset):
    def __init__(self,img_path,img_label, transform=None):
        self.img_path = img_path
        self.img_label = img_label
        if transform is not None:
            self.transform =transform
        else:
            self.transform = None
            
    def __getitem__(self,index):
        img = Image.open(self.img_path[index]).convert('RGB')
        
        if self.transform is not None:
            img = self.transform(img)
        
        # 原始SVHN中类别10为数字0
        lbl = np.array(self.img_label[index], dtype=np.int)
        lbl = list(lbl) + (5-len(lbl)) *[10]
        
        return img,torch.from_numpy(np.array(lbl[:5]))
    
    def __len__(self):
        return len(self.img_path)
    
    
train_path = glob.glob(r'D:\study\cv\mchar_train\*.png')
train_path.sort()
train_json = json.load(open(r'D:\study\cv\mchar_train.json'))
train_label = [train_json[x]['label'] for x in train_json]

data = SVHNDataset(train_path, train_label,
                  transforms.Compose([
                      # 缩放到固定尺寸
                      transforms.Resize((64,128)),
                      
                      #随机颜色变换
                      transforms.ColorJitter(0.2,0.2,0.2),
                      
                      #加入随机旋转
                      transforms.RandomRotation(5),
                      
                      #将图片转换为pytorch 的tesntor
                      # transforms.ToTensor(),
                      
                      #将图像像素进行归一化
                      #transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
                  ]))
class SVHNDataset(Dataset):
    def __init__(self,img_path,img_label, transform=None):
        self.img_path = img_path
        self.img_label = img_label
        if transform is not None:
            self.transform =transform
        else:
            self.transform = None
            
    def __getitem__(self,index):
        img = Image.open(self.img_path[index]).convert('RGB')
        
        if self.transform is not None:
            img = self.transform(img)
        
        # 原始SVHN中类别10为数字0
        lbl = np.array(self.img_label[index], dtype=np.int)
        lbl = list(lbl) + (5-len(lbl)) *[10]
        
        return img,torch.from_numpy(np.array(lbl[:5]))
    
    def __len__(self):
        return len(self.img_path)
    
    
train_path = glob.glob(r'D:\study\cv\mchar_train\*.png')
train_path.sort()
train_json = json.load(open(r'D:\study\cv\mchar_train.json'))
train_label = [train_json[x]['label'] for x in train_json]

train_loader = torch.utils.data.DataLoader(
        SVHNDataset(train_path,train_label,
                   transforms.Compose([
                       transforms.Resize((65,128)),
                       transforms.ColorJitter(0.3,0.3,0.2),
                       transforms.RandomRotation(5),
                       transforms.ToTensor(),
                       transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
                   ])),
        batch_size = 10, #每批样本个数
        shuffle=False, #是否打乱顺序
        num_workers=0 #读取的线程个数
)
        



# 定义模型
class SVHN_Model1(nn.Module):
    def __init__(self):
        super(SVHN_Model1,self).__init__()
        # CNN提取特征块
        self.cnn = nn.Sequential(
            nn.Conv2d(3,16,kernel_size=(3,3),stride=(2,2)),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(16,32,kernel_size=(3,3),stride=(2,2)),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        #
        self.fc1 = nn.Linear(32*3*7,11)
        self.fc2 = nn.Linear(32*3*7,11)
        self.fc3 = nn.Linear(32*3*7,11)
        self.fc4 = nn.Linear(32*3*7,11)
        self.fc5 = nn.Linear(32*3*7,11)
        self.fc6 = nn.Linear(32*3*7,11)
    
    def forward(self,img):
        feat = self.cnn(img)
        feat = feat.view(feat.shape[0],-1)
        c1 = self.fc1(feat)
        c2 = self.fc2(feat)
        c3 = self.fc3(feat)
        c4 = self.fc4(feat)
        c5 = self.fc5(feat)
        c6 = self.fc6(feat)
        return c1,c2,c3,c4,c5,c6
    
model = SVHN_Model1()
        
# 损失函数
criterion = nn.CrossEntropyLoss()

#优化器
optimizer = torch.optim.Adam(model.parameters(), 0.005)

loss_plot, c0_plot = [],[]
# 迭代10个Epoch
for epoch in range(10):
    for data in train_loader:
        
        c0,c1,c2,c3,c4,c5 = model(data[0])
        loss = criterion(c0, data[1][:,0]) +\
                criterion(c1, data[1][:,1]) +\
                criterion(c2, data[1][:,2]) +\
                criterion(c3, data[1][:,3]) +\
                criterion(c4, data[1][:,4]) +\
                criterion(c5, data[1][:,5])       
        loss /=6
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        loss_plot.append(loss.item())
        cn_plot.append((c0.argmax(1) == data[1][:,0]).sum().item()*1.0 / c0.shape[0])
        
    print(epoch)
---------------------------------------------------------------------------

RuntimeError                              Traceback (most recent call last)

<ipython-input-18-6def6e10ced7> in <module>
     15                 criterion(c2, data[1][:,2]) +\
     16                 criterion(c3, data[1][:,3]) +\
---> 17                 criterion(c4, data[1][:,4]) +\
     18                 criterion(c5, data[1][:,5])
     19         loss /=6


D:\study\anaconda\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
    548             result = self._slow_forward(*input, **kwargs)
    549         else:
--> 550             result = self.forward(*input, **kwargs)
    551         for hook in self._forward_hooks.values():
    552             hook_result = hook(self, input, result)


D:\study\anaconda\lib\site-packages\torch\nn\modules\loss.py in forward(self, input, target)
    930     def forward(self, input, target):
    931         return F.cross_entropy(input, target, weight=self.weight,
--> 932                                ignore_index=self.ignore_index, reduction=self.reduction)
    933 
    934 


D:\study\anaconda\lib\site-packages\torch\nn\functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction)
   2315     if size_average is not None or reduce is not None:
   2316         reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 2317     return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction)
   2318 
   2319 


D:\study\anaconda\lib\site-packages\torch\nn\functional.py in nll_loss(input, target, weight, size_average, ignore_index, reduce, reduction)
   2113                          .format(input.size(0), target.size(0)))
   2114     if dim == 2:
-> 2115         ret = torch._C._nn.nll_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
   2116     elif dim == 4:
   2117         ret = torch._C._nn.nll_loss2d(input, target, weight, _Reduction.get_enum(reduction), ignore_index)


RuntimeError: expected scalar type Long but found Int

报错,待查

当然为了追求精度,也可以使用在ImageNet数据集上的预训练模型,具体方法如下:

class SVHN_Model2(nn.Module):
    def __init__(self):
        super(SVHN_Model1,self).__init__()
        
        model_conv = models.resnet18(pretrained=True)
        model_conv.avgpool = nn.AdaptiveAvgPool2d(1)
        model_conv = nn.Sequential(*list(model_conv.children())[:-1])
        
        self.fc1 = nn.Linear(512,11)
        self.fc2 = nn.Linear(512.11)
        self.fc3 = nn.Linear(512.11)
        self.fc4 = nn.Linear(512.11)
        self.fc5 = nn.Linear(512.11)
        
    def forward(self, img):
        feat = self.cnn(img)
        # print(feat.shape)
        feat = feat.view(feat.shape[0],-1)
        c1 = self.fc1(feat)
        c2 = self.fc2(feat)
        c3 = self.fc3(feat)
        c4 = self.fc4(feat)
        c5 = self.fc5(feat)
        return c1,c2,c3,c4,c5



  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值