其优点是利用小卷积块代替了大卷积核,两个3*3相当于一个5*5的感受野,三个3*3相当于一个7*7的感受野,但减少了参数量。池化也是用了2*2.其测试阶段用三个卷积替换了三个全连接,可以实现任意高和宽的输入。
下面是vgg网络
import torch.nn as nn
import torch
# official pretrain weights
model_urls = {
'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth',
'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth',
'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth',
'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth'
}
class VGG(nn.Module):
def __init__(self, features, num_classes=3, init_weights=False):
super(VGG, self).__init__()
self.features = features
self.classifier = nn.Sequential(
nn.Linear(512*3*3, 4096),
nn.ReLU(True),
nn.Dropout(p=0.5),
nn.Linear(4096, 4096),
nn.ReLU(True),
nn.Dropout(p=0.5),
nn.Linear(4096, num_classes)
)
if init_weights:
self._initialize_weights()
# 权重初始化
def forward(self, x):
# N x 3 x 224 x 224
x = self.features(x)
# N x 512 x 7 x 7,如果输入是112*112的话,这里应该是512*3*3
x = torch.flatten(x, start_dim=1)
# N x 512*7*7
x = self.classifier(x)
return x
def _initialize_weights(self):
for m in self.modules():
# 在模型的每一步里
if isinstance(m, nn.Conv2d):#如果m是卷积层
# nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
nn.init.xavier_uniform_(m.weight)
# 这里没用kaiming初始化方法,而是用的xavier初始化系数
if m.bias is not None:
nn.init.constant_(m.bias, 0)
# 如果有偏置,将偏置设置为0
elif isinstance(m, nn.Linear):
nn.init.xavier_uniform_(m.weight)
# nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)
# 下面是将11、13、16、19层网络结构很巧妙的存在cfg列表中
def make_features(cfg: list):
layers = []
in_channels = 3
for v in cfg:
if v == "M":
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
else:
conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
layers += [conv2d, nn.ReLU(True)]
in_channels = v
return nn.Sequential(*layers)
# 一个星(*):表示接收的参数作为元组来处理,元组相当于不可以改变的列表
# 两个星(**):表示接收的参数作为字典来处理
# {}为字典、[]为列表、()为元组
cfgs = {
'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}
def vgg(model_name="vgg16", **kwargs):
assert model_name in cfgs, "Warning: model number {} not in cfgs dict!".format(model_name)
cfg = cfgs[model_name]
model = VGG(make_features(cfg), **kwargs)
# 两个星号代表的是可变长度的字典变量,是可以输入num_class 和 init_weights两个变量的,具体的输入在train中第64行上
# 但是我的程序直接 net = vgg(model_name="vgg16")即可
return model
其对应我的数据集的主函数
import torch
import torch.nn as nn
import creatdataset
from accurary import learning_curve
from alexnet import AlexNet
from resnext import Resnext
from test import test
from train import train
from vgg import VGG, vgg
def load_dataset(batch_size):
root = r"C:\Users\Jia\PycharmProjects\pythonProject\resnet_dataset"
train_set = creatdataset.MyDataset(root, mode="train")
train_iter = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_set = creatdataset.MyDataset(root, mode="val")
val_iter = torch.utils.data.DataLoader(val_set, batch_size=batch_size, shuffle=False, num_workers=0)
test_set = creatdataset.MyDataset(root, mode="test")
test_iter = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0)
return train_iter, val_iter, test_iter
import torch.optim as optim
BATCH_SIZE = 128 # 批大小
NUM_EPOCHS = 12 # Epoch大小
NUM_CLASSES = 3 # 分类的个数
LEARNING_RATE = 0.01 # 梯度下降学习率
MOMENTUM = 0.9 # 冲量大小
WEIGHT_DECAY = 0.0005 # 权重衰减系数
NUM_PRINT = 1
DEVICE = "cuda" if torch.cuda.is_available() else "cpu" # GPU or CPU运行
def main():
net = vgg(model_name="vgg16")
net = net.to(DEVICE)
train_iter, val_iter, test_iter = load_dataset(BATCH_SIZE) # 导入训练集和测试集
criterion = nn.CrossEntropyLoss() # 交叉熵损失函数损失计算器
# 优化器
optimizer = optim.SGD(
net.parameters(),
# 构建好神经网络后,网络的参数都保存在parameters()函数当中
lr=LEARNING_RATE,
momentum=MOMENTUM,
weight_decay=WEIGHT_DECAY,
nesterov=True
# Nesterov动量梯度下降
)
# 调整学习率 (step_size:每训练step_size个epoch,更新一次参数; gamma:更新lr的乘法因子)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
record_train, record_val = train(net, train_iter, criterion, optimizer, \
NUM_EPOCHS, DEVICE, NUM_PRINT, lr_scheduler, val_iter)
learning_curve(record_train, record_val) # 画出准确率曲线
if test_iter is not None: # 判断验证集是否为空 (注意这里将调用test函数)
test(net, test_iter, criterion, DEVICE)
main()
下面是原数据集的主函数
import os
import sys
import json
import torch
import torch.nn as nn
from torchvision import transforms, datasets
import torch.optim as optim
from tqdm import tqdm
from model import vgg
def main():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("using {} device.".format(device))
data_transform = {
"train": transforms.Compose([transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
# 因为是从头开始学习,而不是迁移学习,因此他这个可以设置为0.5,0.5,0.5但如过是迁移学习,就得让设置成他一开始调成的那个量
"val": transforms.Compose([transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}
data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path
image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path
assert os.path.exists(image_path), "{} path does not exist.".format(image_path)
train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"),
transform=data_transform["train"])
train_num = len(train_dataset)
# {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
flower_list = train_dataset.class_to_idx
cla_dict = dict((val, key) for key, val in flower_list.items())
# write dict into json file
json_str = json.dumps(cla_dict, indent=4)
with open('class_indices.json', 'w') as json_file:
json_file.write(json_str)
batch_size = 32
nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers
print('Using {} dataloader workers every process'.format(nw))
train_loader = torch.utils.data.DataLoader(train_dataset,
batch_size=batch_size, shuffle=True,
num_workers=nw)
validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"),
transform=data_transform["val"])
val_num = len(validate_dataset)
validate_loader = torch.utils.data.DataLoader(validate_dataset,
batch_size=batch_size, shuffle=False,
num_workers=nw)
print("using {} images for training, {} images for validation.".format(train_num,
val_num))
# test_data_iter = iter(validate_loader)
# test_image, test_label = test_data_iter.next()
model_name = "vgg16"
net = vgg(model_name=model_name, num_classes=5, init_weights=True)
net.to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.0001)
epochs = 30
best_acc = 0.0
save_path = './{}Net.pth'.format(model_name)
train_steps = len(train_loader)
for epoch in range(epochs):
# train
net.train()
running_loss = 0.0
train_bar = tqdm(train_loader, file=sys.stdout)
for step, data in enumerate(train_bar):
images, labels = data
optimizer.zero_grad()
outputs = net(images.to(device))
loss = loss_function(outputs, labels.to(device))
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
epochs,
loss)
# validate
net.eval()
acc = 0.0 # accumulate accurate number / epoch
with torch.no_grad():
val_bar = tqdm(validate_loader, file=sys.stdout)
for val_data in val_bar:
val_images, val_labels = val_data
outputs = net(val_images.to(device))
predict_y = torch.max(outputs, dim=1)[1]
acc += torch.eq(predict_y, val_labels.to(device)).sum().item()
val_accurate = acc / val_num
print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' %
(epoch + 1, running_loss / train_steps, val_accurate))
if val_accurate > best_acc:
best_acc = val_accurate
torch.save(net.state_dict(), save_path)
print('Finished Training')
if __name__ == '__main__':
main()