1.导库
import os
import time
import numpy as np
import pandas as pd
import torch
import torch. nn as nn
import torch. nn. functional as F
from torch. utils. data import Dataset
from torch. utils. data import DataLoader
from torchvision import datasets
from torchvision import transforms
import matplotlib. pyplot as plt
from PIL import Image
DEVICE = torch. device( "cuda" if torch. cuda. is_available( ) else "cpu" )
2.获取数据集
batch_size = 64
train_dataset = datasets. CIFAR10( root= 'data' ,
train= True ,
transform= transforms. ToTensor( ) ,
download= True )
test_dataset = datasets. CIFAR10( root= 'data' ,
train= False ,
transform= transforms. ToTensor( ) )
train_loader = DataLoader( dataset= train_dataset,
batch_size= batch_size,
shuffle= True )
test_loader = DataLoader( dataset= test_dataset,
batch_size= batch_size,
shuffle= False )
3.创建ResNet101模型
def conv3x3 ( in_planes, out_planes, stride= 1 ) :
"""3x3 convolution with padding"""
return nn. Conv2d( in_planes, out_planes, kernel_size= 3 , stride= stride,
padding= 1 , bias= False )
class Bottleneck ( nn. Module) :
expansion = 4
def __init__ ( self, inplanes, planes, stride= 1 , downsample= None ) :
super ( Bottleneck, self) . __init__( )
self. conv1 = nn. Conv2d( inplanes, planes, kernel_size= 1 , bias= False )
self. bn1 = nn. BatchNorm2d( planes)
self. conv2 = nn. Conv2d( planes, planes, kernel_size= 3 , stride= stride,
padding= 1 , bias= False )
self. bn2 = nn. BatchNorm2d( planes)
self. conv3 = nn. Conv2d( planes, planes * 4 , kernel_size= 1 , bias= False )
self. bn3 = nn. BatchNorm2d( planes * 4 )
self. relu = nn. ReLU( inplace= True )
self. downsample = downsample
self. stride = stride
def forward ( self, x) :
residual = x
out = self. conv1( x)
out = self. bn1( out)
out = self. relu( out)
out = self. conv2( out)
out = self. bn2( out)
out = self. relu( out)
out = self. conv3( out)
out = self. bn3( out)
if self. downsample is not None :
residual = self. downsample( x)
out += residual
out = self. relu( out)
return out
class ResNet ( nn. Module) :
def __init__ ( self, block, layers, num_classes, grayscale) :
self. inplanes = 64
if grayscale:
in_dim = 1
else :
in_dim = 3
super ( ResNet, self) . __init__( )
self. conv1 = nn. Conv2d( in_dim, 64 , kernel_size= 7 , stride= 2 , padding= 3 ,
bias= False )
self. bn1 = nn. BatchNorm2d( 64 )
self. relu = nn. ReLU( inplace= True )
self. maxpool = nn. MaxPool2d( kernel_size= 3 , stride= 2 , padding= 1 )
self. layer1 = self. _make_layer( block, 64 , layers[ 0 ] )
self. layer2 = self. _make_layer( block, 128 , layers[ 1 ] , stride= 2 )
self. layer3 = self. _make_layer( block, 256 , layers[ 2 ] , stride= 2 )
self. layer4 = self. _make_layer( block, 512 , layers[ 3 ] , stride= 2 )
self. avgpool = nn. AvgPool2d( 7 , stride= 1 )
self. fc = nn. Linear( 512 * block. expansion, num_classes)
def _make_layer ( self, block, planes, blocks, stride= 1 ) :
downsample = None
if stride != 1 or self. inplanes != planes * block. expansion:
downsample = nn. Sequential(
nn. Conv2d( self. inplanes, planes * block. expansion,
kernel_size= 1 , stride= stride, bias= False ) ,
nn. BatchNorm2d( planes * block. expansion) ,
)
layers = [ ]
layers. append( block( self. inplanes, planes, stride, downsample) )
self. inplanes = planes * block. expansion
for i in range ( 1 , blocks) :
layers. append( block( self. inplanes, planes) )
return nn. Sequential( * layers)
def forward ( self, x) :
x = self. conv1( x)
x = self. bn1( x)
x = self. relu( x)
x = self. maxpool( x)
x = self. layer1( x)
x = self. layer2( x)
x = self. layer3( x)
x = self. layer4( x)
x = x. view( x. size( 0 ) , - 1 )
logits = self. fc( x)
probas = F. softmax( logits, dim= 1 )
return logits, probas
def resnet101 ( num_classes) :
"""Constructs a ResNet-34 model."""
model = ResNet( block= Bottleneck,
layers= [ 3 , 4 , 23 , 3 ] ,
num_classes= num_classes,
grayscale= False )
return model
4.开启训练
NUM_EPOCHS = 16
model = resnet101( num_classes= 10 )
model = model. to( DEVICE)
optimizer = torch. optim. Adam( model. parameters( ) , lr= 0.0001 )
valid_loader = test_loader
def compute_accuracy_and_loss ( model, data_loader, device) :
correct_pred, num_examples = 0 , 0
cross_entropy = 0 .
for i, ( features, targets) in enumerate ( data_loader) :
features = features. to( device)
targets = targets. to( device)
logits, probas = model( features)
cross_entropy += F. cross_entropy( logits, targets) . item( )
_, predicted_labels = torch. max ( probas, 1 )
num_examples += targets. size( 0 )
correct_pred += ( predicted_labels == targets) . sum ( )
return correct_pred. float ( ) / num_examples * 100 , cross_entropy/ num_examples
start_time = time. time( )
train_acc_lst, valid_acc_lst = [ ] , [ ]
train_loss_lst, valid_loss_lst = [ ] , [ ]
for epoch in range ( NUM_EPOCHS) :
model. train( )
for batch_idx, ( features, targets) in enumerate ( train_loader) :
features = features. to( DEVICE)
targets = targets. to( DEVICE)
logits, probas = model( features)
cost = F. cross_entropy( logits, targets)
optimizer. zero_grad( )
cost. backward( )
optimizer. step( )
if not batch_idx % 200 :
print ( f'Epoch: { epoch+ 1 : 03d } / { NUM_EPOCHS: 03d } | '
f'Batch { batch_idx: 04d } / { len ( train_loader) : 04d } |'
f' Cost: { cost: .4f } ' )
model. eval ( )
with torch. set_grad_enabled( False ) :
train_acc, train_loss = compute_accuracy_and_loss( model, train_loader, device= DEVICE)
valid_acc, valid_loss = compute_accuracy_and_loss( model, valid_loader, device= DEVICE)
train_acc_lst. append( train_acc)
valid_acc_lst. append( valid_acc)
train_loss_lst. append( train_loss)
valid_loss_lst. append( valid_loss)
print ( f'Epoch: { epoch+ 1 : 03d } / { NUM_EPOCHS: 03d } Train Acc.: { train_acc: .2f } %'
f' | Validation Acc.: { valid_acc: .2f } %' )
elapsed = ( time. time( ) - start_time) / 60
print ( f'Time elapsed: { elapsed: .2f } min' )
elapsed = ( time. time( ) - start_time) / 60
print ( f'Total Training Time: { elapsed: .2f } min' )
训练结果
Epoch: 001 / 016 | Batch 0000 / 0782 | Cost: 2.6448
Epoch: 001 / 016 | Batch 0200 / 0782 | Cost: 1.9297
Epoch: 001 / 016 | Batch 0400 / 0782 | Cost: 2.0488
Epoch: 001 / 016 | Batch 0600 / 0782 | Cost: 2.0431
Epoch: 001 / 016 Train Acc. : 39.93 % | Validation Acc. : 36.83 %
Time elapsed: 1.64 min
Epoch: 002 / 016 | Batch 0000 / 0782 | Cost: 1.6723
Epoch: 002 / 016 | Batch 0200 / 0782 | Cost: 1.5365
Epoch: 002 / 016 | Batch 0400 / 0782 | Cost: 1.6170
Epoch: 002 / 016 | Batch 0600 / 0782 | Cost: 1.3389
Epoch: 002 / 016 Train Acc. : 49.90 % | Validation Acc. : 43.29 %
Time elapsed: 3.29 min
Epoch: 003 / 016 | Batch 0000 / 0782 | Cost: 1.2280
Epoch: 003 / 016 | Batch 0200 / 0782 | Cost: 1.0847
Epoch: 003 / 016 | Batch 0400 / 0782 | Cost: 1.5266
Epoch: 003 / 016 | Batch 0600 / 0782 | Cost: 1.3456
Epoch: 003 / 016 Train Acc. : 56.43 % | Validation Acc. : 48.08 %
Time elapsed: 4.94 min
Epoch: 004 / 016 | Batch 0000 / 0782 | Cost: 1.4013
Epoch: 004 / 016 | Batch 0200 / 0782 | Cost: 1.2829
Epoch: 004 / 016 | Batch 0400 / 0782 | Cost: 1.2168
Epoch: 004 / 016 | Batch 0600 / 0782 | Cost: 1.2001
Epoch: 004 / 016 Train Acc. : 60.69 % | Validation Acc. : 50.01 %
Time elapsed: 6.58 min
Epoch: 005 / 016 | Batch 0000 / 0782 | Cost: 1.3680
Epoch: 005 / 016 | Batch 0200 / 0782 | Cost: 1.2036
Epoch: 005 / 016 | Batch 0400 / 0782 | Cost: 1.0483
Epoch: 005 / 016 | Batch 0600 / 0782 | Cost: 1.2299
Epoch: 005 / 016 Train Acc. : 67.86 % | Validation Acc. : 54.16 %
Time elapsed: 8.22 min
Epoch: 006 / 016 | Batch 0000 / 0782 | Cost: 1.1428
Epoch: 006 / 016 | Batch 0200 / 0782 | Cost: 0.9702
Epoch: 006 / 016 | Batch 0400 / 0782 | Cost: 1.0632
Epoch: 006 / 016 | Batch 0600 / 0782 | Cost: 1.2375
Epoch: 006 / 016 Train Acc. : 66.81 % | Validation Acc. : 52.34 %
Time elapsed: 9.86 min
Epoch: 007 / 016 | Batch 0000 / 0782 | Cost: 0.8962
Epoch: 007 / 016 | Batch 0200 / 0782 | Cost: 1.0879
Epoch: 007 / 016 | Batch 0400 / 0782 | Cost: 1.1377
Epoch: 007 / 016 | Batch 0600 / 0782 | Cost: 1.1011
Epoch: 007 / 016 Train Acc. : 72.14 % | Validation Acc. : 55.30 %
Time elapsed: 11.49 min
Epoch: 008 / 016 | Batch 0000 / 0782 | Cost: 0.8141
Epoch: 008 / 016 | Batch 0200 / 0782 | Cost: 0.6959
Epoch: 008 / 016 | Batch 0400 / 0782 | Cost: 0.9784
Epoch: 008 / 016 | Batch 0600 / 0782 | Cost: 1.1159
Epoch: 008 / 016 Train Acc. : 76.08 % | Validation Acc. : 58.57 %
Time elapsed: 13.14 min
Epoch: 009 / 016 | Batch 0000 / 0782 | Cost: 0.6706
Epoch: 009 / 016 | Batch 0200 / 0782 | Cost: 0.7745
Epoch: 009 / 016 | Batch 0400 / 0782 | Cost: 0.7790
Epoch: 009 / 016 | Batch 0600 / 0782 | Cost: 1.0540
Epoch: 009 / 016 Train Acc. : 78.26 % | Validation Acc. : 58.76 %
Time elapsed: 14.78 min
Epoch: 010 / 016 | Batch 0000 / 0782 | Cost: 0.4955
Epoch: 010 / 016 | Batch 0200 / 0782 | Cost: 0.5948
Epoch: 010 / 016 | Batch 0400 / 0782 | Cost: 0.7049
Epoch: 010 / 016 | Batch 0600 / 0782 | Cost: 0.6488
Epoch: 010 / 016 Train Acc. : 77.58 % | Validation Acc. : 58.31 %
Time elapsed: 16.43 min
Epoch: 011 / 016 | Batch 0000 / 0782 | Cost: 0.6648
Epoch: 011 / 016 | Batch 0200 / 0782 | Cost: 0.4936
Epoch: 011 / 016 | Batch 0400 / 0782 | Cost: 0.6123
Epoch: 011 / 016 | Batch 0600 / 0782 | Cost: 0.5989
Epoch: 011 / 016 Train Acc. : 86.26 % | Validation Acc. : 60.56 %
Time elapsed: 18.07 min
Epoch: 012 / 016 | Batch 0000 / 0782 | Cost: 0.3107
Epoch: 012 / 016 | Batch 0200 / 0782 | Cost: 0.4070
Epoch: 012 / 016 | Batch 0400 / 0782 | Cost: 0.5987
Epoch: 012 / 016 | Batch 0600 / 0782 | Cost: 0.6067
Epoch: 012 / 016 Train Acc. : 88.04 % | Validation Acc. : 61.81 %
Time elapsed: 19.71 min
Epoch: 013 / 016 | Batch 0000 / 0782 | Cost: 0.3936
Epoch: 013 / 016 | Batch 0200 / 0782 | Cost: 0.6761
Epoch: 013 / 016 | Batch 0400 / 0782 | Cost: 0.7042
Epoch: 013 / 016 | Batch 0600 / 0782 | Cost: 0.5793
Epoch: 013 / 016 Train Acc. : 80.50 % | Validation Acc. : 55.80 %
Time elapsed: 21.35 min
Epoch: 014 / 016 | Batch 0000 / 0782 | Cost: 0.3836
Epoch: 014 / 016 | Batch 0200 / 0782 | Cost: 0.3163
Epoch: 014 / 016 | Batch 0400 / 0782 | Cost: 0.5610
Epoch: 014 / 016 | Batch 0600 / 0782 | Cost: 0.4092
Epoch: 014 / 016 Train Acc. : 91.57 % | Validation Acc. : 62.75 %
Time elapsed: 23.00 min
Epoch: 015 / 016 | Batch 0000 / 0782 | Cost: 0.2652
Epoch: 015 / 016 | Batch 0200 / 0782 | Cost: 0.2393
Epoch: 015 / 016 | Batch 0400 / 0782 | Cost: 0.3272
Epoch: 015 / 016 | Batch 0600 / 0782 | Cost: 0.4533
Epoch: 015 / 016 Train Acc. : 92.90 % | Validation Acc. : 61.18 %
Time elapsed: 24.64 min
Epoch: 016 / 016 | Batch 0000 / 0782 | Cost: 0.1563
Epoch: 016 / 016 | Batch 0200 / 0782 | Cost: 0.2644
Epoch: 016 / 016 | Batch 0400 / 0782 | Cost: 0.3742
Epoch: 016 / 016 | Batch 0600 / 0782 | Cost: 0.2075
Epoch: 016 / 016 Train Acc. : 95.18 % | Validation Acc. : 63.73 %
Time elapsed: 26.29 min
Total Training Time: 26.29 min
训练损失和测试损失关系图
训练精度和测试精度关系图
5.测试阶段
model. eval ( )
with torch. set_grad_enabled( False ) :
test_acc, test_loss = compute_accuracy_and_loss( model, test_loader, DEVICE)
print ( f'Test accuracy: { test_acc: .2f } %' )
Test accuracy: 63.73 %
6.查看效果图
from PIL import Image
import matplotlib. pyplot as plt
for features, targets in train_loader:
break
_, predictions = model. forward( features[ : 8 ] . to( DEVICE) )
predictions = torch. argmax( predictions, dim= 1 )
print ( predictions)
features = features[ : 7 ]
fig = plt. figure( )
tname = [ 'airplane' , 'automobile' , 'bird' , 'cat' , 'deer' , 'dog' , 'frog' , 'horse' , 'ship' , 'truck' ]
for i in range ( 6 ) :
plt. subplot( 2 , 3 , i+ 1 )
plt. tight_layout( )
tmp = features[ i]
plt. imshow( np. transpose( tmp, ( 1 , 2 , 0 ) ) )
plt. title( "Actual value: {}" . format ( tname[ targets[ i] ] ) + '\n' + "Prediction value: {}" . format ( tname[ predictions[ i] ] ) , size = 10 )
plt. show( )