VERY DEEP CONVOLUTIONAL NETWORKS FOR LARGE-SCALE IMAGE RECOGNITION
论文阅读
论文链接
这个论文section2就讲Alexnet的LRN没有用,果然喷前任的论文才是最好的点子
1、使用3* 3卷积来模拟7* 7和5* 5卷积
感受野定义:
输出feature map中一个像素对应输入层的区域大小叫感受野
2个 3* 3卷积的感受野与一个5 * 5的感受野一致,见图(7 *7的原理一致)
2、对于ALexnet中使用的数据增强方法这里有了改进,关于不同大小目标的随机裁剪原图像大小进行了调整,针对原图像大小随机尺度的训练,进行了预置S=384
模型图示
D、E对应的是VGG16、19
下面的图源网络,侵权删
代码实现
model.py
跑不动啊= - =
from typing import Callable, Optional
import torch.nn as nn
model_urls = {
'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth',
'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth',
'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth',
'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth'
}
class VGG(nn.Module):
def __init__(self,
features: Optional[Callable[..., nn.Sequential]],
num_classes: int = 1000):
super(VGG, self).__init__()
self.features = features
self.classifier = nn.Sequential(
nn.Dropout(p=0.5),
nn.Linear(25088, 2048),
nn.ReLU(True),
nn.Dropout(p=0.5),
nn.Linear(2048, 2048),
nn.ReLU(True),
nn.Dropout(p=0.5),
nn.Linear(2048, num_classes)
)
for m in self.modules():
if isinstance(m, nn.Conv2d):
# nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
nn.init.xavier_uniform_(m.weight)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.xavier_uniform_(m.weight)
# nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)
def forward(self, x):
# N x 3 x 224 x 224
x = self.features(x)
# N x 512 x 7 x 7
x = x.flatten(start_dim=1)
# N x 512*7*7
x = self.classifier(x)
return x
def make_features(cfg: list):
layers = []
in_channels = 3
for v in cfg:
if v == "M":
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
else:
conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
layers += [conv2d, nn.ReLU(True)]
in_channels = v
return nn.Sequential(*layers)
cfgs = {
'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}
def vgg(model_name="vgg16", **kwargs):
assert model_name in cfgs, "Warning: model number {} not in cfgs dict!".format(
model_name)
cfg = cfgs[model_name]
model = VGG(make_features(cfg), **kwargs)
return model
train.py
import argparse
import os
import torch
import torch.nn as nn
from torchvision import transforms, datasets
import torch.optim as optim
from tqdm import tqdm
from model import vgg
def main(args):
print(args)
device = torch.device(args.device if torch.cuda.is_available() else "cpu")
print("using {} device.".format(device))
batch_size = args.batch_size
# number of workers
nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])
print('Using {} dataloader workers every process'.format(nw))
data_transform = {
"train": transforms.Compose([transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
"val": transforms.Compose([transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}
data_path = args.data_path
train_dataset = datasets.ImageFolder(root=os.path.join(data_path, "train"),
transform=data_transform["train"])
validate_dataset = datasets.ImageFolder(root=os.path.join(data_path, "val"),
transform=data_transform["val"])
train_num = len(train_dataset)
val_num = len(validate_dataset)
train_loader = torch.utils.data.DataLoader(train_dataset,
batch_size=batch_size, shuffle=True, num_workers=nw)
validate_loader = torch.utils.data.DataLoader(validate_dataset,
batch_size=batch_size, shuffle=False, num_workers=nw)
print("using {} images for training, {} images for validation.".format(train_num,
val_num))
model_name = "vgg16"
net = vgg(model_name=model_name, num_classes=args.num_classes)
net.to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), args.lr)
epochs = args.epochs
best_acc = 0.0
save_path = './{}Net.pth'.format(model_name)
train_steps = len(train_loader)
for epoch in range(epochs):
net.train()
running_loss = 0.0
train_bar = tqdm(train_loader)
for data in train_bar:
images, labels = data
optimizer.zero_grad()
outputs = net(images.to(device))
loss = loss_function(outputs, labels.to(device))
loss.backward()
optimizer.step()
running_loss += loss.item()
train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
epochs,
loss)
net.eval()
acc = 0.0
with torch.no_grad():
val_bar = tqdm(validate_loader)
for val_data in val_bar:
val_images, val_labels = val_data
outputs = net(val_images.to(device))
predict_y = torch.max(outputs, dim=1)[1]
acc += torch.eq(predict_y, val_labels.to(device)).sum().item()
val_accurate = acc / val_num
print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' %
(epoch + 1, running_loss / train_steps, val_accurate))
if val_accurate > best_acc:
best_acc = val_accurate
torch.save(net.state_dict(), save_path)
print('Finished Training')
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--num_classes', type=int, default=5)
parser.add_argument('--epochs', type=int, default=20)
parser.add_argument('--batch_size', type=int, default=512)
parser.add_argument('--lr', type=float, default=0.0001)
parser.add_argument('--data_path', type=str,
default='../dataset/flower_data')
parser.add_argument('--device', default='cuda:0')
opt = parser.parse_args()
main(opt)
predict.py
import json
import torch
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt
from model import vgg
def main():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
data_transform = transforms.Compose(
[transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
img_path = "../tulip.jpg"
img = Image.open(img_path)
plt.imshow(img)
img = data_transform(img)
# 添加batchsize维度
img = torch.unsqueeze(img, dim=0)
json_path = './class_indices.json'
json_file = open(json_path, "r")
class_indict = json.load(json_file)
model = vgg(model_name="vgg16", num_classes=5).to(device)
weights_path = "./vgg16Net.pth"
model.load_state_dict(torch.load(weights_path, map_location=device))
model.eval()
with torch.no_grad():
output = torch.squeeze(model(img.to(device))).cpu()
predict = torch.softmax(output, dim=0)
predict_cla = torch.argmax(predict).numpy()
print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)],
predict[predict_cla].numpy())
plt.title(print_res)
print(print_res)
plt.show()
if __name__ == '__main__':
main()
实验结果
这个电脑跑不动,另一个电脑没有windows,换来换去太麻烦,就没跑这个,用CPU跑了跑,代码没有错就发了