本文参考借鉴了github下代码:https://github.com/HaoMood/bilinear-cnn
代码主要由以下几个部分构成:
(1) config.py
(2)BCNN_fc.py和BCNN_all.py
(3)data_load.py
(4)main.py
本电脑下,py文件放在bilinear-cnn文件夹下,主要包含model文件夹,用于存放训练好的模型,其余是上述5个py文件,数据集放在磁盘中。
代码如下:
(1)config文件
# *_*coding: utf-8 *_*
# author --liming--
import os
CUB_PATH = '/media/lm/C3F680DFF08EB695/细粒度数据集/birds/CUB200/CUB_200_2011/dataset'
PROJECT_ROOT = os.getcwd()
PATH = {
'cub200_train': CUB_PATH + '/train/',
'cub200_test': CUB_PATH + '/test/',
'model': os.path.join(PROJECT_ROOT, 'model/')
}
BASE_LEARNING_RATE = 0.05
EPOCHS = 100
BATCH_SIZE = 8
WEIGHT_DECAY = 0.00001
(2) Bilinear CNN网络模型,分别是BCNN_fc和BCNN_all。
# *_*coding: utf-8 *_*
# author --liming--
import torch
import torch.nn as nn
import torchvision
class BCNN_fc(nn.Module):
def __init__(self):
#torch.nn.Module.__init__()
super(BCNN_fc, self).__init__()
# VGG16的卷积层和池化层
self.features = torchvision.models.vgg16(pretrained=True).features
self.features = nn.Sequential(*list(self.features.children())[:-1]) # 去除最后的pool层
# 线性分类层
self.fc = nn.Linear(512*512, 200)
# 冻结以前的所有层
for param in self.features.parameters():
param.requres_grad = False
# 初始化fc层
nn.init.kaiming_normal_(self.fc.weight.data)
if self.fc.bias is not None:
nn.init.constant_(self.fc.bias.data, val=0)
def forward(self, x):
N = x.size()[0]
assert x.size() == (N, 3, 448, 448)
x = self.features(x)
assert x.size() == (N, 512, 28, 28)
x = x.view(N, 512, 28*28)
x = torch.bmm(x, torch.transpose(x, 1, 2)) / (28*28) # 双线性
assert x.size() == (N, 512, 512)
x = x.view(N, 512*512)
x = torch.sqrt(x + 1e-5)
x = torch.nn.functional.normalize(x)
x = self.fc(x)
assert x.size() == (N, 200)
return x
# *_*coding: utf-8 *_*
# author --liming--
import torch
import torch.nn as nn
import torchvision
class BCNN_all(nn.Module):
def __init__(self):
super(BCNN_all, self).__init__()
# VGG16的卷积层与池化层
self.features = torchvision.models.vgg16(pretrained=False).features
self.features = torch.nn.Sequential(*list(self.features.children())[:-1]) # 去除最后的pool层
# 全连接层
self.fc = torch.nn.Linear(512*512, 200)
def forward(self, x):
N = x.size()[0]
assert x.size() == (N, 3, 448, 448)
x = self.features(x)
assert x.size() == (N, 512, 28, 28)
x = x.view(N, 512, 28*28)
x = torch.bmm(x, torch.transpose(x, 1, 2))/(28*28) # 双线性
assert x.size() == (N, 512, 512)
x = x.view(N, 512*512)
x = torch.sqrt(x + 1e-5)
x = torch.nn.functional.normalize(x)
x = self.fc(x)
assert x.size() == (N, 200)
return x
(3) CUB200-2011数据集加载,参考前一篇文章https://mp.csdn.net/postedit/102680185
# *_*coding: utf-8 *_*
# author --liming--
import torch
import torchvision
import config
def train_data_process():
train_transforms = torchvision.transforms.Compose([
torchvision.transforms.Resize(size=448),
torchvision.transforms.RandomHorizontalFlip(),
torchvision.transforms.RandomCrop(size=448),
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(mean=(0.485, 0.456, 0.406),
std=(0.229, 0.224, 0.225))
])
train_data = torchvision.datasets.ImageFolder(root=config.PATH['cub200_train'],
transform=train_transforms)
train_loader = torch.utils.data.DataLoader(train_data,
batch_size=config.BATCH_SIZE,
shuffle=True,
num_workers=8,
pin_memory=True)
return train_loader
def test_data_process():
test_transforms = torchvision.transforms.Compose([
torchvision.transforms.Resize(size=448),
torchvision.transforms.RandomCrop(size=448),
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(mean=(0.485, 0.456, 0.406),
std=(0.229, 0.224, 0.225))
])
test_data = torchvision.datasets.ImageFolder(root=config.PATH['cub200_test'],
transform=test_transforms)
test_loader = torch.utils.data.DataLoader(test_data,
batch_size=config.BATCH_SIZE,
shuffle=True,
num_workers=8,
pin_memory=True)
return test_loader
if __name__ == '__main__':
train_data_process()
test_data_process()
(4) main.py。用于模型的训练及测试数据下的精度测试。
# *_*coding: utf-8 *_*
# author --liming--
import os
import torch
import torch.nn as nn
import torchvision
import argparse
import config
from BCNN_fc import BCNN_fc
from BCNN_all import BCNN_all
from data_load import train_data_process, test_data_process
# 配置GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 加载数据
train_loader = train_data_process()
test_loader = test_data_process()
# 主程序
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='network_select')
parser.add_argument('--net_select',
dest='net_select',
default='BCNN_all',
help='select which net to train/test.')
args = parser.parse_args()
if args.net_select == 'BCNN_fc':
net = BCNN_fc().to(device)
else:
net = BCNN_all().to(device)
# 损失
criterion = nn.CrossEntropyLoss().cuda()
optimizer = torch.optim.SGD(net.fc.parameters(),
lr=config.BASE_LEARNING_RATE,
momentum=0.9,
weight_decay=config.WEIGHT_DECAY)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
mode='max',
factor=0.1,
patience=3,
verbose=True,
threshold=1e-4)
# 训练模型
print('Start Training ==>')
total_step = len(train_loader)
best_acc = 0.0
best_epoch = None
for epoch in range(config.EPOCHS):
epoch_loss = []
num_correct = 0
num_total = 0
for i, (images, labels) in enumerate(train_loader):
# 数据转为cuda
images = torch.autograd.Variable(images.cuda())
labels = torch.autograd.Variable(labels.cuda())
#梯度清零
optimizer.zero_grad()
#前向传播
outputs = net(images)
loss = criterion(outputs, labels)
aaaa = loss.data
epoch_loss.append(loss.data)
#预测
_, prediction = torch.max(outputs.data, 1)
num_total += labels.size(0)
num_correct += torch.sum(prediction == labels.data)
#后向传播
loss.backward()
optimizer.step()
if (i + 1) % 10 == 0:
print('Epoch [{}/{}], Step [{}/{}], Training Loss: {:.4f}'.format(epoch + 1, config.EPOCHS, i+1, total_step, loss.item()))
train_Acc = 100*num_correct/num_total
print('Epoch:%d Training Loss:%.03f Acc: %.03f' % (epoch+1, sum(epoch_loss)/len(epoch_loss), train_Acc))
# 在测试集上进行测试
print('Watting for Test ==>')
with torch.no_grad():
num_correct = 0
num_total = 0
for images, labels in test_loader:
net.eval()
images = torch.autograd.Variable(images.cuda())
labels = torch.autograd.Variable(labels.cuda())
outputs = net(images)
_, prediction = torch.max(outputs.data, 1)
num_total += labels.size(0)
num_correct += torch.sum(prediction == labels.data).item()
test_Acc = 100 * num_correct / num_total
print('第%d个Epoch下的测试精度为: %.03f' % (epoch+1, test_Acc))
# 保存模型
torch.save(net.state_dict(), config.PATH['model'] + 'vgg16_epoch_%d.pth' % (epoch + 1))
(5)训练过程。
##################################################
更新CUB200-2011数据集的读取方式,通过给定的Text文件进行读取(主要基于Windows系统,Ubuntu系统下可能某些地方需要修改,后续再说明)
(1)首先根据Pytorch数据读取格式,创建数据读取的data.py文件,准备好训练和测试的txt文件,如下所示:
# _*_ coding: UTF-8 _*_
# Author: liming
import torch
from torchvision import transforms
from torch.utils.data import Dataset,DataLoader
from PIL import Image
import config
"""
实验在CUB200-2011, FGVC-Aircraft, Stanford_Cars和Oxford_102_Flowers四个数据集上进行.
"""
#############################################################################
# CUB200-2011数据集
if config.args.dataset == 'CUB200_2011':
def default_loader(path):
return Image.open(path).convert('RGB')
class MyDataset(Dataset):
def __init__(self, txt, transform, loader=default_loader):
fh = open(txt, 'r')
imgs = []
for line in fh:
line = line.strip('\n')
line = line.rstrip()
words = line.split()
imgs.append((words[0], int(words[1])))
self.imgs = imgs
self.transform = transform
self.loader = loader
def __getitem__(self, index):
fn, label = self.imgs[index]
img = self.loader(config.data_path + fn)
img = self.transform(img)
return img, label
def __len__(self):
return len(self.imgs)
##########################################################################
# FGVC_Aircraft数据集
elif config.args.dataset == 'FGVC_Aircraft':
def default_loader(path):
return Image.open(path).convert('RGB')
class MyDataset(Dataset):
def __init__(self, txt, transform, loader=default_loader):
fh = open(txt, 'r')
imgs = []
for line in fh:
line = line.strip('\n')
line = line.rstrip()
words = line.split()
imgs.append((words[0], int(words[1])))
self.imgs = imgs
self.transform = transform
self.loader = loader
def __getitem__(self, index):
fn, label = self.imgs[index]
img = self.loader(config.data_path + fn)
img = self.transform(img)
return img, label
def __len__(self):
return len(self.imgs)
###########################################################################
# Stanford_Cars数据集
elif config.args.dataset == 'Stanford_Cars':
################## 训练集
def default_loader(path):
return Image.open(path).convert('RGB')
class MyDataset_train(Dataset):
def __init__(self, txt, transform, loader=default_loader):
fh = open(txt, 'r')
imgs = []
for line in fh:
line = line.strip('\n')
line = line.rstrip()
words = line.split()
imgs.append((words[0], int(words[1])))
self.imgs = imgs
self.transform = transform
self.loader = loader
def __getitem__(self, index):
fn, label = self.imgs[index]
img = self.loader(config.train_data_path + fn)
img = self.transform(img)
return img, label
def __len__(self):
return len(self.imgs)
#############测试集
def default_loader(path):
return Image.open(path).convert('RGB')
class MyDataset_test(Dataset):
def __init__(self, txt, transform, loader=default_loader):
fh = open(txt, 'r')
imgs = []
for line in fh:
line = line.strip('\n')
line = line.rstrip()
words = line.split()
imgs.append((words[0], int(words[1])))
self.imgs = imgs
self.transform = transform
self.loader = loader
def __getitem__(self, index):
fn, label = self.imgs[index]
img = self.loader(config.test_data_path + fn)
img = self.transform(img)
return img, label
def __len__(self):
return len(self.imgs)
#####################################################################
# Stanford_Dogs数据集
else:
def default_loader(path):
return Image.open(path).convert('RGB')
class MyDataset(Dataset):
def __init__(self, txt, transform, loader=default_loader):
fh = open(txt, 'r')
imgs = []
for line in fh:
line = line.strip('\n')
line = line.rstrip()
words = line.split()
imgs.append((words[0], int(words[1])))
self.imgs = imgs
self.transform = transform
self.loader = loader
def __getitem__(self, index):
fn, label = self.imgs[index]
img = self.loader(config.data_path + fn)
img = self.transform(img)
return img, label
def __len__(self):
return len(self.imgs)
(2)调用data.py文件进行数据读取,得到train_loader和test_loader。
# _*_ coding: UTF-8 _*_
# Author: liming
import os
import torch
import torch.optim as optim
import torchvision.transforms as transforms
import torch.nn as nn
import data.data as data
import torch.backends.cudnn as cudnn
import config
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
# 导入数据集
trainset = data.MyDataset_train(config.train_txt_path, transform=transforms.Compose([
transforms.Resize((config.Resize, config.Resize), Image.BILINEAR),
transforms.RandomHorizontalFlip(),
transforms.RandomCrop(config.Crop_Size),
transforms.ToTensor(),
transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
]))
trainloader = torch.utils.data.DataLoader(trainset,
batch_size=config.Batch_Size,
shuffle=True,
num_workers=config.num_workers)
testset = data.MyDataset_test(config.test_txt_path, transform=transforms.Compose([
transforms.Resize((config.Resize, config.Resize), Image.BILINEAR),
transforms.CenterCrop(config.Crop_Size),
transforms.ToTensor(),
transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
]))
testloader = torch.utils.data.DataLoader(testset,
batch_size=config.Batch_Size,
shuffle=False,
num_workers=config.num_workers)
cudnn.benchmark = True