本项目是根据陈云《深度学习框架PyTorch入门与实践》所写的代码,但是直接根据书上的代码写,会出现各种各样的问题。
我并没有按照书上说的那样,装了这么多可视化工具,只实现了它基本的分类功能。代码根据自己的理解添加了注释。
项目结构
其中,__pycache__是运行时生成的,checkpoints用于保存训练后的模型(里面需要放置一个.gitkeep文件),data包用于数据预处理,models存放了几个神经网络的模型,config是配置文件,而main则是程序的主体。
data
__init__.py
#空文件
dataset.py
import os
from PIL import Image
from torch.utils import data
from torchvision import transforms as T
from torch.utils.data import DataLoader
class DogCat(data.Dataset):
def __init__(self,root,transforms=None,train=True,test=False):
self.test=test
'''root是根目录,img是数据集(图片)的名字'''
imgs=[os.path.join(root, img) for img in os.listdir(root)]
'''按照文件名中的序号排序'''
#测试集
'''测试集的结构为data/test1/01.jpg,以下操作先按/分,取最后一部分,即01.jpg,再按.分,取倒数第二部分,即01,并将其转化为整数1,作为关键字排序'''
if self.test:
imgs=sorted(imgs,key=lambda x:int(x.split('.')[-2].split('/')[-1]))
#训练集
else:
imgs=sorted(imgs,key=lambda x:int(x.split('.')[-2]))
#数据数目
imgs_num=len(imgs)
if self.test:
self.imgs=imgs
#将训练集按7:3划分为训练集与验证集
elif train:
self.imgs=imgs[:int(0.7*imgs_num)]
else:
self.imgs=imgs[int(0.7*imgs_num):]
if transforms is None:
normalize=T.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
#原始图像的尺寸并不一致,将图片像素统一为227*227
self.transforms=T.Compose([
T.Resize(size=(227, 227)),
T.RandomRotation(20),
T.RandomHorizontalFlip(),
T.ToTensor(), # 将图片转换为Tensor,归一化至[0,1]
normalize
])
def __getitem__(self, index):
'''提取标签'''
img_path=self.imgs[index]
if self.test:
label=int(self.imgs[index].split('.')[-2].split('/')[-1])
else:
#猫0狗1
label=1 if 'dog' in img_path.split('/')[-1] else 0
data=Image.open(img_path)
data=self.transforms(data)
return data,label
def __len__(self):
return len(self.imgs)
models
__init__.py
from .AlexNet import AlexNet
from .ResNet34 import ResNet34
BasicModule.py
import torch as t
import time
class BasicModule(t.nn.Module):
def __init__(self):
super(BasicModule, self).__init__()
self.model_name=str(type(self)) #取名
def load(self,path):
'''加载指定路径的模型'''
self.load_state_dict(t.load(path))
def save(self,name=None):
'''保存模型,命名规则:模型名字+时间'''
if name is None:
prefix='D:/TheMoth/Cat_vs_Dog/checkpoints/'+self.model_name+'_'
name=time.strftime(prefix+'%m%d_%H%M%S.pth')
t.save(self.state_dict(),name)
return name
AlexNet.py
在书中所提供的代码中,卷积池化层中间没用BatchNorm2d层,可能会影响效果。
#coding:utf8
from torch import nn
from .BasicModule import BasicModule
class AlexNet(BasicModule):
'''
code from torchvision/models/alexnet.py
结构参考 <https://arxiv.org/abs/1404.5997>
'''
def __init__(self, num_classes=2):
super(AlexNet, self).__init__()
self.model_name = 'alexnet'
self.features = nn.Sequential(
nn.Conv2d(in_channels=3,out_channels=96,kernel_size=11,stride=4,padding=2,bias=False),
nn.BatchNorm2d(96),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=3,stride=2,padding=0),
nn.Conv2d(in_channels=96,out_channels=192,kernel_size=5,stride=1,padding=2,bias=False),
nn.BatchNorm2d(192),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=3,stride=2,padding=0),
nn.Conv2d(in_channels=192,out_channels=384,kernel_size=3,stride=1,padding=1,bias=False),
nn.BatchNorm2d(384),
nn.ReLU(True),
nn.Conv2d(in_channels=384,out_channels=256,kernel_size=3,stride=1,padding=1,bias=False),
nn.BatchNorm2d(256),
nn.ReLU(True),
nn.Conv2d(in_channels=256,out_channels=256,kernel_size=3,stride=1,padding=1,bias=False),
nn.BatchNorm2d(256),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=3, stride=2, padding=0),
)
self.classifier = nn.Sequential(
nn.Dropout(p=0.5),
nn.Linear(in_features=256*6*6,out_features=4096),
nn.ReLU(True),
nn.Dropout(p=0.5),
nn.Linear(in_features=4096, out_features=4096),
nn.ReLU(True),
nn.Linear(in_features=4096, out_features=num_classes),
)
def forward(self, x):
x = self.features(x)
#print('中段输出')
x = x.view(x.size(0), 256 * 6 * 6)
x = self.classifier(x)
#print(x)
return x
ResNet.py
本例并没有用ResNet训练。
#coding:utf8
from .BasicModule import BasicModule
from torch import nn
from torch.nn import functional as F
class ResidualBlock(nn.Module):
'''
实现子module: Residual Block
'''
def __init__(self, inchannel, outchannel, stride=1, shortcut=None):
super(ResidualBlock, self).__init__()
self.left = nn.Sequential(
nn.Conv2d(inchannel, outchannel, 3, stride, 1, bias=False),
nn.BatchNorm2d(outchannel),
nn.ReLU(inplace=True),
nn.Conv2d(outchannel, outchannel, 3, 1, 1, bias=False),
nn.BatchNorm2d(outchannel) )
self.right = shortcut
def forward(self, x):
out = self.left(x)
residual = x if self.right is None else self.right(x)
out += residual
return F.relu(out)
class ResNet34(BasicModule):
'''
实现主module:ResNet34
ResNet34包含多个layer,每个layer又包含多个Residual block
用子module来实现Residual block,用_make_layer函数来实现layer
'''
def __init__(self, num_classes=2):
super(ResNet34, self).__init__()
self.model_name = 'resnet34'
# 前几层: 图像转换
self.pre = nn.Sequential(
nn.Conv2d(3, 64, 7, 2, 3, bias=False),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
nn.MaxPool2d(3, 2, 1))
# 重复的layer,分别有3,4,6,3个residual block
self.layer1 = self._make_layer( 64, 128, 3)
self.layer2 = self._make_layer( 128, 256, 4, stride=2)
self.layer3 = self._make_layer( 256, 512, 6, stride=2)
self.layer4 = self._make_layer( 512, 512, 3, stride=2)
#分类用的全连接
self.fc = nn.Linear(512, num_classes)
def _make_layer(self, inchannel, outchannel, block_num, stride=1):
'''
构建layer,包含多个residual block
'''
shortcut = nn.Sequential(
nn.Conv2d(inchannel,outchannel,1,stride, bias=False),
nn.BatchNorm2d(outchannel))
layers = []
layers.append(ResidualBlock(inchannel, outchannel, stride, shortcut))
for i in range(1, block_num):
layers.append(ResidualBlock(outchannel, outchannel))
return nn.Sequential(*layers)
def forward(self, x):
x = self.pre(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = F.avg_pool2d(x, 7)
x = x.view(x.size(0), -1)
return self.fc(x)
config.py
import warnings
class DefaultConfig(object):
env='default'
model='AlexNet'
train_data_root='D:/TheMoth/kaggle/train_part/' #训练集路径
test_data_root='D:/TheMoth/kaggle/test_part/' #测试集路径
load_model_path=None; #模型路径
batch_size=128 #分批训练,每次投喂64组数据
use_gpu=False
num_workers=0 #书中是4,但是这里设为0,否则dataloader会把电脑卡死
print_freq=20
debug_file='D:/TheMoth/kaggle/'
result_file='result.csv'
max_epoch=20
lr=0.0014 #注意,如果按书中所写,学习率为0.1,会造成损失函数不收敛的情况
lr_decay=0.95
weigh_decay=1e-4
def parse(self,kwargs):
'''
根据字典kwargs 更新 config参数
'''
for k,v in kwargs.items():
if not hasattr(self,k):
warnings.warn("Warning: opt has not attribut %s" %k)
setattr(self,k,v)
print('user config:')
for k,v in self.__class__.__dict__.items():
if not k.startswith('__'):
print(k,getattr(self,k))
main.py
import models
from config import DefaultConfig
from data.dataset import DogCat
from torch.utils.data import DataLoader
import torch as t
from torch.autograd import Variable
def train():
#数据
#训练集
train_data=DogCat(opt.train_data_root,train=True)
#验证集
val_data=DogCat(opt.train_data_root,train=False)
#加载数据
train_dataloader=DataLoader(train_data,opt.batch_size,
shuffle=True,num_workers=opt.num_workers)
val_dataloader=DataLoader(val_data,opt.batch_size,
shuffle=True,num_workers=opt.num_workers)
#设置损失函数和优化器
criterion=t.nn.CrossEntropyLoss() #交叉熵损失
lr=opt.lr #学习率
optimizer=t.optim.Adam(model.parameters(),lr=lr) #Adam优化
#训练
for epoch in range(opt.max_epoch):
for i,(data,label) in enumerate(train_dataloader):
#输入
input=Variable(data)
#目标
target=Variable(label)
#投喂
score=model(input)
loss=criterion(score,target)
#梯度置零
optimizer.zero_grad()
#反向传播
loss.backward()
optimizer.step()
model.save()
print('验证结果......')
val_accurancy=val(model,val_dataloader)
print(val_accurancy)
#验证
def val(model,dataloader):
#把模型设置为验证模式
model.eval()
total1=0
correct1=0
for i,data in enumerate(dataloader):
input,label=data
with t.no_grad():
val_input=Variable(input)
with t.no_grad():
val_label = Variable(label.type(t.LongTensor))
#投喂
score=model(val_input)
_, predicted = t.max(score.data, 1)
total1 += val_label.size(0)
correct1 += (predicted == val_label).sum().item()
#恢复为训练模式
model.train()
#计算acc
accurancy=100*correct1/total1
return accurancy
#测试
def test():
#加载模型,设置为验证模式
model.eval()
#加载数据
test_data = DogCat(opt.test_data_root,test=True)
test_dataloader = DataLoader(test_data,opt.batch_size,
shuffle=False,num_workers=opt.num_workers)
#计算acc
print('测试结果')
for i,(data,path) in enumerate(test_dataloader):
with t.no_grad():
val_input=Variable(data)
#投喂
score=model(val_input)
_, predicted = t.max(score.data, 1)
print(predicted.data) #输出结果,0猫1狗
opt=DefaultConfig()
model=getattr(models, opt.model)() #使用AlexNet
train()
test()