本篇博客是学习B站霹雳吧啦Wz教学视频的总结
本节所用到的程序和教学视频链接:
程序 视频 数据集下载,提取码:dw2d
VGG网络特性
通过堆叠多个
3
×
3
3\times 3
3 × 3 的卷积核来代替大尺度卷积核(减少所需参数)
程序结构
model.py
import torch. nn as nn
import torch
from torch. nn. modules. dropout import Dropout
from torch. nn. modules. linear import Linear
class VGG ( nn. Module) :
def __init__ ( self, features, num_classes= 1000 , init_weights= False ) :
super ( VGG, self) . __init__( )
self. features = features
self. classifier = nn. Sequential(
nn. Linear( 512 * 7 * 7 , 4096 ) ,
nn. ReLU( True ) ,
nn. Dropout( p= 0.5 ) ,
nn. Linear( 4096 , 4096 ) ,
nn. ReLU( True ) ,
nn. Dropout( p= 0.5 ) ,
nn. Linear( 4096 , num_classes)
)
if init_weights:
self. _initialize_weights( )
def _initialize_weights ( self) :
for m in self. modules( ) :
if isinstance ( m, nn. Conv2d) :
nn. init. xavier_uniform_( m. weight)
if m. bias is not None :
nn. init. constant_( m. bias, 0 )
elif isinstance ( m, nn. Linear) :
nn. init. xavier_uniform_( m. weight)
nn. init. constant_( m. bias, 0 )
def make_features ( cfg: list ) :
layers = [ ]
in_channels= 3
for v in cfg:
if v == "M" :
layers+= [ nn. MaxPool2d( kernel_size= 2 , stride= 2 ) ]
else :
conv2d = nn. Conv2d( in_channels, v, kernel_size= 3 , padding= 1 )
layers += [ conv2d, nn. ReLU( True ) ]
in_channels = v
return nn. Sequential( * layers)
cfgs = {
'vgg11' : [ 64 , 'M' , 128 , 'M' , 256 , 256 , 'M' , 512 , 512 , 'M' , 512 , 512 , 'M' ] ,
'vgg13' : [ 64 , 64 , 'M' , 128 , 128 , 'M' , 256 , 256 , 'M' , 512 , 512 , 'M' , 512 , 512 , 'M' ] ,
'vgg16' : [ 64 , 64 , 'M' , 128 , 128 , 'M' , 256 , 256 , 256 , 'M' , 512 , 512 , 512 , 'M' , 512 , 512 , 512 , 'M' ] ,
'vgg19' : [ 64 , 64 , 'M' , 128 , 128 , 'M' , 256 , 256 , 256 , 256 , 'M' , 512 , 512 , 512 , 512 , 'M' , 512 , 512 , 512 , 512 , 'M' ] ,
}
def VGG ( model_name= "vgg16" , ** kwargs) :
assert model_name in cfgs, "warning: model number {} not in cfgs dict!" . format ( model_name)
cfg = cfgs[ model_name]
model = VGG( make_features( cfg) , ** kwargs)
return model
为应对不同的VGG参数结构,程序中设置了cfgs字典,来存储不同的结构的features 参数,在实例化vgg时,首先根据输入的网络名称从字典中选取对应的参数。 *layers 表示将数组的元素分离成独立的部分 **kwargs 表示可变长度的参数
train.py
import os
import json
import torch
from torch. _C import import_ir_module
import torch. nn as nn
from torchvision import transforms, datasets
import torch. optim as optim
from tqdm import tqdm
from model import vgg
def main ( ) :
device = torch. device( "cuda:0" if torch. cuda. is_available( ) else "cpu" )
print ( "using {} device." . format ( device) )
data_transform = {
"train" : transforms. Compose( [ transforms. RandomResizedCrop( 224 ) ,
transforms. RandomHorizontalFlip( ) ,
transforms. ToTensor( ) ,
transforms. Normalize( ( 0.5 , 0.5 , 0.5 ) , ( 0.5 , 0.5 , 0.5 ) ) ] ) ,
"val" : transforms. Compose( [ transforms. Resize( ( 224 , 224 ) ) ,
transforms. ToTensor( ) ,
transforms. Normalize( ( 0.5 , 0.5 , 0.5 ) , ( 0.5 , 0.5 , 0.5 ) ) ] ) }
data_root = os. path. abspath( os. path. join( os. getcwd( ) , "../.." ) )
image_path = os. path. join( data_root, "data" , "flower_data" )
assert os. path. exists( image_path) , "file {} does not exists." . format ( image_path)
train_dataset = datasets. ImageFolder( root= os. pardir. join( image_path, "train" ) ,
transform= data_transform[ "train" ] )
val_dataset = datasets. ImageFolder( root= os. path. join( image_path, "val" ) ,
transform= data_transform[ "val" ] )
train_num= len ( train_dataset)
val_num= len ( val_dataset)
flower_list = train_dataset. class_to_idx
cla_list = dict ( ( val, key) for key, val in flower_list. item( ) )
json_str = json. dumps( cla_list, indent= 4 )
with open ( 'class_indices.json' , 'w' ) as json_file:
json_file. write( json_str)
batch_size = 32
nw= 0
print ( 'Using {} dataloader workers every process' . format ( nw) )
train_loader = torch. utils. data. Dataloader( train_dataset,
batch_size= batch_size, shuffle= True ,
num_workers= nw)
validate_loader = torch. utils. data. Dataloder( val_dataset,
batch_size= batch_size, shuffle= False ,
num_workers= nw)
print ( "Using {} images for training, {} images for validation." . format ( train_num, val_num) )
model_name= "vgg16"
net = vgg( model_name= model_name, num_classes= 5 , init_weights= True )
net. to( device)
loss_function= nn. CrossEntropyLoss( )
optimizer = optim. Adam( net. parameters( ) , lr= 0.0001 )
epochs = 30
best_acc = 0.0
save_path= './{}Net.pth' . format ( model_name)
train_steps = len ( train_loader)
for epoch in range ( epochs) :
net. train( )
running_loss= 0.0
train_bar = tqdm( train_loader)
for setp, data in enumerate ( train_bar) :
images, labels = data
optimizer. zero_grad( )
outputs = net( images. to( device) )
loss = loss_function( outputs, labels. to( device) )
loss. backward( )
optimizer. step( )
running_loss+= loss. item( )
train_bar. desc= "train epoch[{}/{}] loss{:3f}" . format ( epoch+ 1 ,
epochs,
loss)
net. eval ( )
acc= 0.0
with torch. no_grad( ) :
val_bar = tqdm( validate_loader)
for val_data in val_bar:
val_images, val_lables = val_data
outputs = net( val_images. to( device) )
predict_y = torch. max ( outputs, dim= 1 ) [ 1 ]
acc+= torch. eq( predict_y, val_lables. to( device) ) . sum ( ) . item( )
val_accurate = acc / val_num
print ( '[epoch %d] train loss: %.3f% val_accuracy: %.3f%' % ( epoch+ 1 , running_loss/ train_steps, val_accurate) )
if val_accurate> best_acc:
best_acc = val_accurate
torch. save( net. state. dict ( ) , save_path)
print ( 'Finished Training' )
if __name__ == '__main__' :
main( )
predict.py
import os
import json
import torch
from PIL import Image
from torch. cuda import device_count
from torchvision import transforms
import matplotlib. pyplot as plt
from model import vgg
def main ( ) :
device = torch. device( "cuda:0" if torch. cuda. is_avaliable( ) else "cpu" )
data_transform = transforms. Compose(
[ transforms. Resize( ( 224 , 224 ) ) ,
transforms. ToTensor( ) ,
transforms. Normalize( ( 0.5 , 0.5 , 0.5 ) , ( 0.5 , 0.5 , 0.5 ) ) ]
)
img_path = "../tulip.jpg"
assert os. path. exists( img_path) , "file {} dose not exist." . format ( img_path)
img = Image. open ( img_path)
plt. imshow( img)
img= data_transform( img)
img = torch. unsqueeze( img, dim= 0 )
json_path= './class_indices.json'
assert os. path. exists( json_path) , "file: {} dose not exist" . format ( json_path)
json_file= open ( json_path, "r" )
class_indict= json. load( json_file)
model = vgg( model_name= "vgg16" , num_classes= 5 ) . to( device)
weight_path = "./vgg16Net.pth"
assert os. path. exists( weight_path) , "file: {} dose not exist." . format ( weight_path)
model. load_state_dict( torch. load( weight_path, map_location= device) )
model. eval ( )
with torch. no_grad( ) :
output = torch. squeeze( model( img. to( device) ) ) . cpu( )
predict = torch. softmax( output, dim= 0 )
predict_cla = torch. argmax( predict) . numpy( )
print_ref = "class: {} prob: {:.3}" . format ( class_indict[ str ( predict_cla) ] ,
predict[ predict_cla] . numpy( ) )
plt. title( print_ref)
print ( print_ref)
plt. show( )
if __name__== '__main___' :
main( )
文章链接
pytorch学习(四)—— GoogLeNet网络搭建