import torch.nn as nn
import torch
# official pretrain weights
model_urls ={'vgg11':'https://download.pytorch.org/models/vgg11-bbd30ac9.pth','vgg13':'https://download.pytorch.org/models/vgg13-c768596a.pth','vgg16':'https://download.pytorch.org/models/vgg16-397923af.pth','vgg19':'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth'}classVGG(nn.Module):def__init__(self, features, num_classes=1000, init_weights=False):super(VGG, self).__init__()# 将特征提取和分类分成两个模块
self.features = features
self.classifier = nn.Sequential(
nn.Linear(512*7*7,4096),
nn.ReLU(True),
nn.Dropout(p=0.5),
nn.Linear(4096,4096),
nn.ReLU(True),
nn.Dropout(p=0.5),
nn.Linear(4096, num_classes))if init_weights:
self._initialize_weights()defforward(self, x):# N x 3 x 224 x 224
x = self.features(x)# N x 512 x 7 x 7
x = torch.flatten(x, start_dim=1)# N x 512*7*7
x = self.classifier(x)return x
def_initialize_weights(self):for m in self.modules():ifisinstance(m, nn.Conv2d):# nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
nn.init.xavier_uniform_(m.weight)if m.bias isnotNone:
nn.init.constant_(m.bias,0)elifisinstance(m, nn.Linear):
nn.init.xavier_uniform_(m.weight)# nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias,0)# 特征提取网络defmake_features(cfg:list):
layers =[]
in_channels =3for v in cfg:if v =="M":
layers +=[nn.MaxPool2d(kernel_size=2, stride=2)]else:
conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
layers +=[conv2d, nn.ReLU(True)]
in_channels = v
# *layers -> 使用非关键字参数将layers中的元素传入Sequential中return nn.Sequential(*layers)# 通过堆叠两层3x3的卷积核替代5x5的卷积核, 通过堆叠三层3x3的卷积核替代7x7的卷积核# 原因是拥有相同的感受野,且需要更少的参数:假设输入通道数为C, 则所需要的参数为:# 三个3x3的卷积核:3 x 3 x C x C + 3 x 3 x C x C + 3 x 3 x C x C = 27 x C x C# 一个7x7的卷积核:7 x 7 x C x C = 49 x C x C# 感受野的计算: F(i) = (F(i+1) - 1) x S + K F(i)为第i层的感受野, S为步距, K为卷积核的大小
cfgs ={'vgg11':[64,'M',128,'M',256,256,'M',512,512,'M',512,512,'M'],'vgg13':[64,64,'M',128,128,'M',256,256,'M',512,512,'M',512,512,'M'],'vgg16':[64,64,'M',128,128,'M',256,256,256,'M',512,512,512,'M',512,512,512,'M'],'vgg19':[64,64,'M',128,128,'M',256,256,256,256,'M',512,512,512,512,'M',512,512,512,512,'M'],}defvgg(model_name="vgg16",**kwargs):assert model_name in cfgs,"Warning: model number {} not in cfgs dict!".format(model_name)
cfg = cfgs[model_name]
model = VGG(make_features(cfg),**kwargs)return model
if __name__ =='__main__':
model_name ="vgg16"
net = vgg(model_name=model_name, num_classes=5, init_weights=True)
train.py
import os
import json
import torch
import torch.nn as nn
from torchvision import transforms, datasets
import torch.optim as optim
from tqdm import tqdm
from model import vgg
defmain():
device = torch.device("cuda:0"if torch.cuda.is_available()else"cpu")print("using {} device.".format(device))
data_transform ={"train": transforms.Compose([transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))]),"val": transforms.Compose([transforms.Resize((224,224)),
transforms.ToTensor(),
transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])}
data_root = os.path.abspath(os.path.join(os.getcwd(),"../.."))# get data root path
image_path = os.path.join(data_root,"data_set","flower_data")# flower data set pathassert os.path.exists(image_path),"{} path does not exist.".format(image_path)
train_dataset = datasets.ImageFolder(root=os.path.join(image_path,"train"),
transform=data_transform["train"])
train_num =len(train_dataset)# {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
flower_list = train_dataset.class_to_idx
cla_dict =dict((val, key)for key, val in flower_list.items())# write dict into json file
json_str = json.dumps(cla_dict, indent=4)withopen('class_indices.json','w')as json_file:
json_file.write(json_str)
batch_size =1
nw =min([os.cpu_count(), batch_size if batch_size >1else0,8])# number of workersprint('Using {} dataloader workers every process'.format(nw))
train_loader = torch.utils.data.DataLoader(train_dataset,
batch_size=batch_size, shuffle=True,
num_workers=nw)
validate_dataset = datasets.ImageFolder(root=os.path.join(image_path,"val"),
transform=data_transform["val"])
val_num =len(validate_dataset)
validate_loader = torch.utils.data.DataLoader(validate_dataset,
batch_size=batch_size, shuffle=False,
num_workers=nw)print("using {} images for training, {} images for validation.".format(train_num,
val_num))# test_data_iter = iter(validate_loader)# test_image, test_label = test_data_iter.next()
model_name ="vgg16"
net = vgg(model_name=model_name, num_classes=5, init_weights=True)
net.to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.0001)
epochs =30
best_acc =0.0
save_path ='./{}Net.pth'.format(model_name)
train_steps =len(train_loader)for epoch inrange(epochs):# train
net.train()
running_loss =0.0
train_bar = tqdm(train_loader)for step, data inenumerate(train_bar):
images, labels = data
optimizer.zero_grad()
outputs = net(images.to(device))
loss = loss_function(outputs, labels.to(device))
loss.backward()
optimizer.step()# print statistics
running_loss += loss.item()
train_bar.desc ="train epoch[{}/{}] loss:{:.3f}".format(epoch +1,
epochs,
loss)# validate
net.eval()
acc =0.0# accumulate accurate number / epochwith torch.no_grad():
val_bar = tqdm(validate_loader)for val_data in val_bar:
val_images, val_labels = val_data
outputs = net(val_images.to(device))
predict_y = torch.max(outputs, dim=1)[1]
acc += torch.eq(predict_y, val_labels.to(device)).sum().item()
val_accurate = acc / val_num
print('[epoch %d] train_loss: %.3f val_accuracy: %.3f'%(epoch +1, running_loss / train_steps, val_accurate))if val_accurate > best_acc:
best_acc = val_accurate
torch.save(net.state_dict(), save_path)print('Finished Training')if __name__ =='__main__':
torch.cuda.empty_cache()
main()
predict.py
import os
import json
import torch
import torch.nn as nn
from torchvision import transforms, datasets
import torch.optim as optim
from tqdm import tqdm
from model import vgg
defmain():
device = torch.device("cuda:0"if torch.cuda.is_available()else"cpu")print("using {} device.".format(device))
data_transform ={"train": transforms.Compose([transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))]),"val": transforms.Compose([transforms.Resize((224,224)),
transforms.ToTensor(),
transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])}
data_root = os.path.abspath(os.path.join(os.getcwd(),"../.."))# get data root path
image_path = os.path.join(data_root,"data_set","flower_data")# flower data set pathassert os.path.exists(image_path),"{} path does not exist.".format(image_path)
train_dataset = datasets.ImageFolder(root=os.path.join(image_path,"train"),
transform=data_transform["train"])
train_num =len(train_dataset)# {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
flower_list = train_dataset.class_to_idx
cla_dict =dict((val, key)for key, val in flower_list.items())# write dict into json file
json_str = json.dumps(cla_dict, indent=4)withopen('class_indices.json','w')as json_file:
json_file.write(json_str)
batch_size =1
nw =min([os.cpu_count(), batch_size if batch_size >1else0,8])# number of workersprint('Using {} dataloader workers every process'.format(nw))
train_loader = torch.utils.data.DataLoader(train_dataset,
batch_size=batch_size, shuffle=True,
num_workers=nw)
validate_dataset = datasets.ImageFolder(root=os.path.join(image_path,"val"),
transform=data_transform["val"])
val_num =len(validate_dataset)
validate_loader = torch.utils.data.DataLoader(validate_dataset,
batch_size=batch_size, shuffle=False,
num_workers=nw)print("using {} images for training, {} images for validation.".format(train_num,
val_num))# test_data_iter = iter(validate_loader)# test_image, test_label = test_data_iter.next()
model_name ="vgg16"
net = vgg(model_name=model_name, num_classes=5, init_weights=True)
net.to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.0001)
epochs =30
best_acc =0.0
save_path ='./{}Net.pth'.format(model_name)
train_steps =len(train_loader)for epoch inrange(epochs):# train
net.train()
running_loss =0.0
train_bar = tqdm(train_loader)for step, data inenumerate(train_bar):
images, labels = data
optimizer.zero_grad()
outputs = net(images.to(device))
loss = loss_function(outputs, labels.to(device))
loss.backward()
optimizer.step()# print statistics
running_loss += loss.item()
train_bar.desc ="train epoch[{}/{}] loss:{:.3f}".format(epoch +1,
epochs,
loss)# validate
net.eval()
acc =0.0# accumulate accurate number / epochwith torch.no_grad():
val_bar = tqdm(validate_loader)for val_data in val_bar:
val_images, val_labels = val_data
outputs = net(val_images.to(device))
predict_y = torch.max(outputs, dim=1)[1]
acc += torch.eq(predict_y, val_labels.to(device)).sum().item()
val_accurate = acc / val_num
print('[epoch %d] train_loss: %.3f val_accuracy: %.3f'%(epoch +1, running_loss / train_steps, val_accurate))if val_accurate > best_acc:
best_acc = val_accurate
torch.save(net.state_dict(), save_path)print('Finished Training')if __name__ =='__main__':
torch.cuda.empty_cache()
main()
VGG16网络的优点:1.通过堆叠两层3x3的卷积核替代5x5的卷积核, 通过堆叠三层3x3的卷积核替代7x7的卷积核, 原因是拥有相同的感受野,且需要更少的参数:假设输入通道数为C, 则所需要的参数为:三个3x3的卷积核:3 x 3 x C x C + 3 x 3 x C x C + 3 x 3 x C x C = 27 x C x C一个7x7的卷积核:7 x 7 x C x C = 49 x C x C感受野的计算: F(i) = (F(i+1) - 1) x S + K , F