Pytorch项目Dog vs Cat

  本项目是根据陈云《深度学习框架PyTorch入门与实践》所写的代码,但是直接根据书上的代码写,会出现各种各样的问题。
  我并没有按照书上说的那样,装了这么多可视化工具,只实现了它基本的分类功能。代码根据自己的理解添加了注释。

项目结构

  其中,__pycache__是运行时生成的,checkpoints用于保存训练后的模型(里面需要放置一个.gitkeep文件),data包用于数据预处理,models存放了几个神经网络的模型,config是配置文件,而main则是程序的主体。

data

__init__.py

#空文件

dataset.py

import os
from PIL import Image
from torch.utils import data
from torchvision import transforms as T
from torch.utils.data import DataLoader

class DogCat(data.Dataset):
    def __init__(self,root,transforms=None,train=True,test=False):
        self.test=test
        '''root是根目录,img是数据集(图片)的名字'''
        imgs=[os.path.join(root, img) for img in os.listdir(root)]
        
        '''按照文件名中的序号排序'''
        #测试集
        '''测试集的结构为data/test1/01.jpg,以下操作先按/分,取最后一部分,即01.jpg,再按.分,取倒数第二部分,即01,并将其转化为整数1,作为关键字排序'''
        if self.test:
            imgs=sorted(imgs,key=lambda x:int(x.split('.')[-2].split('/')[-1]))
        #训练集
        else:
            imgs=sorted(imgs,key=lambda x:int(x.split('.')[-2]))
        #数据数目
        imgs_num=len(imgs)
        
        
        if self.test:
            self.imgs=imgs
        #将训练集按7:3划分为训练集与验证集
        elif train:
            self.imgs=imgs[:int(0.7*imgs_num)]
        else:
            self.imgs=imgs[int(0.7*imgs_num):]
        
        if transforms is None:
            
            normalize=T.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
            
            #原始图像的尺寸并不一致,将图片像素统一为227*227
            self.transforms=T.Compose([
                T.Resize(size=(227, 227)),
                T.RandomRotation(20),
                T.RandomHorizontalFlip(),
                T.ToTensor(),  # 将图片转换为Tensor,归一化至[0,1]
                normalize
            ])
           
           
    def __getitem__(self, index):
        '''提取标签'''
        
        img_path=self.imgs[index]
        if self.test:
            label=int(self.imgs[index].split('.')[-2].split('/')[-1])
        else:
            #猫0狗1
            label=1 if 'dog' in img_path.split('/')[-1] else 0
            
        data=Image.open(img_path)
        data=self.transforms(data)
        return data,label
       
    def __len__(self):
        return len(self.imgs)

models

__init__.py

from .AlexNet import AlexNet
from .ResNet34 import ResNet34

BasicModule.py

import torch as t
import time

class BasicModule(t.nn.Module):
    def __init__(self):
        super(BasicModule, self).__init__()
        self.model_name=str(type(self))  #取名
    
    def load(self,path):
        '''加载指定路径的模型'''
        self.load_state_dict(t.load(path))
    def save(self,name=None):
        '''保存模型,命名规则:模型名字+时间'''
        if name is None:
            prefix='D:/TheMoth/Cat_vs_Dog/checkpoints/'+self.model_name+'_'
            name=time.strftime(prefix+'%m%d_%H%M%S.pth')
        t.save(self.state_dict(),name)
        return name

AlexNet.py

    在书中所提供的代码中,卷积池化层中间没用BatchNorm2d层,可能会影响效果。

#coding:utf8
from torch import nn
from .BasicModule import BasicModule

class AlexNet(BasicModule):
    '''
    code from torchvision/models/alexnet.py
    结构参考 <https://arxiv.org/abs/1404.5997>
    '''
    def __init__(self, num_classes=2):
        
        super(AlexNet, self).__init__()
        
        self.model_name = 'alexnet'

        self.features = nn.Sequential(
            nn.Conv2d(in_channels=3,out_channels=96,kernel_size=11,stride=4,padding=2,bias=False),
            nn.BatchNorm2d(96),
            nn.ReLU(True),
 
            nn.MaxPool2d(kernel_size=3,stride=2,padding=0),
            nn.Conv2d(in_channels=96,out_channels=192,kernel_size=5,stride=1,padding=2,bias=False),
            nn.BatchNorm2d(192),
            nn.ReLU(True),
 
            nn.MaxPool2d(kernel_size=3,stride=2,padding=0),
            nn.Conv2d(in_channels=192,out_channels=384,kernel_size=3,stride=1,padding=1,bias=False),
            nn.BatchNorm2d(384),
            nn.ReLU(True),
 
            nn.Conv2d(in_channels=384,out_channels=256,kernel_size=3,stride=1,padding=1,bias=False),
            nn.BatchNorm2d(256),
            nn.ReLU(True),
 
            nn.Conv2d(in_channels=256,out_channels=256,kernel_size=3,stride=1,padding=1,bias=False),
            nn.BatchNorm2d(256),
            nn.ReLU(True),
 
            nn.MaxPool2d(kernel_size=3, stride=2, padding=0),
        )
        self.classifier = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(in_features=256*6*6,out_features=4096),
            nn.ReLU(True),
            nn.Dropout(p=0.5),
            nn.Linear(in_features=4096, out_features=4096),
            nn.ReLU(True),
            nn.Linear(in_features=4096, out_features=num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        #print('中段输出')
        x = x.view(x.size(0), 256 * 6 * 6)
        x = self.classifier(x)
        #print(x)
        return x

ResNet.py

  本例并没有用ResNet训练。

#coding:utf8
from .BasicModule import BasicModule
from torch import nn
from torch.nn import functional as F

class ResidualBlock(nn.Module):
    '''
    实现子module: Residual Block
    '''
    def __init__(self, inchannel, outchannel, stride=1, shortcut=None):
        super(ResidualBlock, self).__init__()
        self.left = nn.Sequential(
                nn.Conv2d(inchannel, outchannel, 3, stride, 1, bias=False),
                nn.BatchNorm2d(outchannel),
                nn.ReLU(inplace=True),
                nn.Conv2d(outchannel, outchannel, 3, 1, 1, bias=False),
                nn.BatchNorm2d(outchannel) )
        self.right = shortcut

    def forward(self, x):
        out = self.left(x)
        residual = x if self.right is None else self.right(x)
        out += residual
        return F.relu(out)

class ResNet34(BasicModule):
    '''
    实现主module:ResNet34
    ResNet34包含多个layer,每个layer又包含多个Residual block
    用子module来实现Residual block,用_make_layer函数来实现layer
    '''
    def __init__(self, num_classes=2):
        super(ResNet34, self).__init__()
        self.model_name = 'resnet34'

        # 前几层: 图像转换
        self.pre = nn.Sequential(
                nn.Conv2d(3, 64, 7, 2, 3, bias=False),
                nn.BatchNorm2d(64),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(3, 2, 1))
        
        # 重复的layer,分别有3,4,6,3个residual block
        self.layer1 = self._make_layer( 64, 128, 3)
        self.layer2 = self._make_layer( 128, 256, 4, stride=2)
        self.layer3 = self._make_layer( 256, 512, 6, stride=2)
        self.layer4 = self._make_layer( 512, 512, 3, stride=2)

        #分类用的全连接
        self.fc = nn.Linear(512, num_classes)
    
    def _make_layer(self,  inchannel, outchannel, block_num, stride=1):
        '''
        构建layer,包含多个residual block
        '''
        shortcut = nn.Sequential(
                nn.Conv2d(inchannel,outchannel,1,stride, bias=False),
                nn.BatchNorm2d(outchannel))
        
        layers = []
        layers.append(ResidualBlock(inchannel, outchannel, stride, shortcut))
        
        for i in range(1, block_num):
            layers.append(ResidualBlock(outchannel, outchannel))
        return nn.Sequential(*layers)
        
    def forward(self, x):
        x = self.pre(x)
        
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = F.avg_pool2d(x, 7)
        x = x.view(x.size(0), -1)
        return self.fc(x)

config.py

import warnings
class DefaultConfig(object):
    env='default'
    model='AlexNet'
    
    train_data_root='D:/TheMoth/kaggle/train_part/' #训练集路径
    test_data_root='D:/TheMoth/kaggle/test_part/'  #测试集路径
    load_model_path=None;                   #模型路径
    
    batch_size=128    #分批训练,每次投喂64组数据
    use_gpu=False
    num_workers=0     #书中是4,但是这里设为0,否则dataloader会把电脑卡死
    print_freq=20
    
    debug_file='D:/TheMoth/kaggle/'
    result_file='result.csv'
    
    max_epoch=20
    lr=0.0014         #注意,如果按书中所写,学习率为0.1,会造成损失函数不收敛的情况
    lr_decay=0.95
    weigh_decay=1e-4

    def parse(self,kwargs):
        '''
        根据字典kwargs 更新 config参数
        '''
        for k,v in kwargs.items():
            if not hasattr(self,k):
                warnings.warn("Warning: opt has not attribut %s" %k)
            setattr(self,k,v)

        print('user config:')
        for k,v in self.__class__.__dict__.items():
            if not k.startswith('__'):
                print(k,getattr(self,k))

main.py

import models
from config import DefaultConfig
from data.dataset import DogCat
from torch.utils.data import DataLoader
import torch as t
from torch.autograd import Variable

def train():
    #数据
    #训练集
    train_data=DogCat(opt.train_data_root,train=True)
    
    #验证集
    val_data=DogCat(opt.train_data_root,train=False)
    
    #加载数据
    train_dataloader=DataLoader(train_data,opt.batch_size,
                                shuffle=True,num_workers=opt.num_workers)
    
    val_dataloader=DataLoader(val_data,opt.batch_size,
                              shuffle=True,num_workers=opt.num_workers)
    
    #设置损失函数和优化器
    criterion=t.nn.CrossEntropyLoss()  #交叉熵损失
    lr=opt.lr    #学习率
    optimizer=t.optim.Adam(model.parameters(),lr=lr)   #Adam优化
    
    #训练    
    for epoch in range(opt.max_epoch):
        for i,(data,label) in enumerate(train_dataloader):
            #输入
            input=Variable(data)
            #目标
            target=Variable(label)           
            #投喂            
            score=model(input)            
            loss=criterion(score,target)
             #梯度置零
            optimizer.zero_grad()
            #反向传播
            loss.backward()
            optimizer.step()
                
        model.save()
        print('验证结果......')
        val_accurancy=val(model,val_dataloader)
        print(val_accurancy)
        
        

#验证
def val(model,dataloader):
    
    #把模型设置为验证模式
    model.eval()
    total1=0
    correct1=0
    for i,data in enumerate(dataloader):
        input,label=data
        with t.no_grad():
            val_input=Variable(input)
        with t.no_grad():
            val_label = Variable(label.type(t.LongTensor))
        #投喂
        score=model(val_input)
        
        _, predicted = t.max(score.data, 1)
        total1 += val_label.size(0)
        correct1 += (predicted == val_label).sum().item()

    #恢复为训练模式
    model.train()    
    #计算acc
    accurancy=100*correct1/total1
    return accurancy

#测试
def test():

    #加载模型,设置为验证模式
    model.eval()
    #加载数据
    test_data = DogCat(opt.test_data_root,test=True)
    
    test_dataloader = DataLoader(test_data,opt.batch_size,
                                 shuffle=False,num_workers=opt.num_workers)
    #计算acc  
    print('测试结果')
    for i,(data,path) in enumerate(test_dataloader):
        with t.no_grad():
            val_input=Variable(data)
        #投喂
        
        score=model(val_input)
        _, predicted = t.max(score.data, 1)
        print(predicted.data)    #输出结果,0猫1狗
        
opt=DefaultConfig()
model=getattr(models, opt.model)()    #使用AlexNet
train()
test()

 

©️2020 CSDN 皮肤主题: 大白 设计师:CSDN官方博客 返回首页