pytorch练习: resnet+siamese对FlickerLogo32(32种商标)分类
- resnet-18网络结构示意图如下
-
ResNet全名Residual Network残差网络。Kaiming He 的《Deep Residual Learning for Image Recognition》获得了CVPR最佳论文。他提出的深度残差网络在2015年可以说是洗刷了图像方面的各大比赛,以绝对优势取得了多个比赛的冠军。而且它在保证网络精度的前提下,将网络的深度达到了152层,后来又进一步加到1000的深度。论文的开篇先是说明了深度网络的好处:特征等级随着网络的加深而变高,网络的表达能力也会大大提高。因此论文中提出了一个问题:是否可以通过叠加网络层数来获得一个更好的网络呢?作者经过实验发现,单纯的把网络叠起来的深层网络的效果反而不如合适层数的较浅的网络效果。因此何恺明等人在普通平原网络的基础上增加了一个shortcut, 构成一个residual block。此时拟合目标就变为F(x),F(x)就是残差:
参考:https://blog.csdn.net/sunqiande88/article/details/80100891
2.自定义数据集
由于数据是图片,所以要自己制作数据集
参考ImageFolder的使用
#loadData.py
''' author by xiaotian'''
import random
from torch.utils.data import Dataset,DataLoader
import torchvision.datasets
from torchvision import transforms
import torch
import numpy as np
from PIL import Image
class getDataset(Dataset):
def __init__(self,getDataset,transform=None,relables=False):
self.getDataset = getDataset
self.relables = relables
self.transform = transform
def __getitem__(self, index):
list = []
for i in range(len(self.getDataset.imgs)):
list.append(self.getDataset.imgs[i][0])
datas = list
listb = []
for i in range(len(self.getDataset.imgs)):
listb.append(self.getDataset.imgs[i][1])
labels = listb
rand_i = random.choice(range(len(datas)))
img0 = datas[rand_i]
should_get_same_class = random.randint(0,1)
if should_get_same_class:
while True:
rand_j = random.choice(range(len(datas)))
img1 = datas[rand_j]
if labels[rand_j]==labels[rand_i]:
break
else:
while True:
rand_j = random.choice(range(len(datas)))
img1 = datas[rand_j]
if labels[rand_j]!=labels[rand_i]:
break
img0 = Image.open(img0)
img1 = Image.open(img1)
if self.transform is not None:
img0 = self.transform(img0)
# print(img0.shape)
img1 = self.transform(img1)
if self.relables:
return img0, img1, torch.from_numpy(np.array([int(labels[rand_i] != labels[rand_j])], dtype=np.float32)), \
labels[rand_i], labels[rand_j]
else:
return img0, img1, torch.from_numpy(np.array([int(labels[rand_i] != labels[rand_j])], dtype=np.float32))
def __len__(self):
return len(self.getDataset.imgs)
transform_train = transforms.Compose([transforms.Resize(size=(32, 32)),
transforms.RandomHorizontalFlip(),
transforms.ToTensor()])
def getDataloder(data_dir = "./FlickerLogo32/train",batch_size = 64):
train_dataset = torchvision.datasets.ImageFolder(root = data_dir)
dataset = getDataset(train_dataset,transform=transform_train)
train_dataloader = DataLoader(dataset,shuffle=True,batch_size = batch_size)
return train_dataloader
def getTestDataloder(data_dir = "./FlickerLogo32/test",batch_size = 1):
test_dataset = torchvision.datasets.ImageFolder(root = data_dir)
dataset = getDataset(test_dataset,relables = True,transform=transform_train)
test_dataloader = DataLoader(dataset,shuffle=True,batch_size = batch_size)
dataiter = iter(test_dataloader)
return test_dataloader
- net:
resnet提取特征+siamese
#resnet+siamese网络 输入(3,32,32)
import torch.nn as nn
import torch.nn.functional as F
class ResidualBlock(nn.Module):
def __init__(self, inchannel, outchannel, stride=1):
super(ResidualBlock, self).__init__()
self.left = nn.Sequential(
nn.Conv2d(inchannel, outchannel, kernel_size=3, stride=stride, padding=1, bias=False),
nn.BatchNorm2d(outchannel),
nn.ReLU(inplace=True),
nn.Conv2d(outchannel, outchannel, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(outchannel)
)
self.shortcut = nn.Sequential()
if stride != 1 or inchannel != outchannel:
self.shortcut = nn.Sequential(
nn.Conv2d(inchannel, outchannel, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(outchannel)
)
def forward(self, x):
out = self.left(x)
out += self.shortcut(x)
out = F.relu(out)
return out
class SiameseNetwork(nn.Module):
def __init__(self):
super(SiameseNetwork, self).__init__()
self.inchannel = 64
self.conv1 = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False),#-->(batch,64,32,32)
nn.BatchNorm2d(64),
nn.ReLU(),
)
self.layer1 = self.make_layer(ResidualBlock, 64, 2, stride=1)#-->(batch,64,32,32)
self.layer2 = self.make_layer(ResidualBlock, 128, 2, stride=2)#-->(batch,128,16,16)
self.layer3 = self.make_layer(ResidualBlock, 256, 2, stride=2)#-->(batch,256,8,8)
self.layer4 = self.make_layer(ResidualBlock, 512, 2, stride=2)#-->(batch,512,4,4)
self.fc = nn.Linear(512,32)#-->(batch,32)
def make_layer(self, block, channels, num_blocks, stride):
strides = [stride] + [1] * (num_blocks - 1) # strides=[1,1]
layers = []
for stride in strides:
layers.append(block(self.inchannel, channels, stride))
self.inchannel = channels
return nn.Sequential(*layers)
# -->(batch,3,32,32)
def forward_once(self, x):
out = self.conv1(x) #-->(batch,64,32,32)
out = self.layer1(out) #-->(batch,64,32,32)
out = self.layer2(out) #-->(batch,128,16,16)
out = self.layer3(out) #-->(batch,256,8,8)
out = self.layer4(out) #-->(batch,512,4,4)
out = F.avg_pool2d(out, 4) #-->(batch,512,1,1)
out = out.view(out.size(0), -1) #-->(batch,512*1*1)
out = self.fc(out) #-->(batch,32)
out = F.log_softmax(out, dim=1)
return out
def forward(self, input1, input2):
output1 = self.forward_once(input1)
output2 = self.forward_once(input2)
return output1, output2
- loss
import torch
import torch.nn as nn
import torch.nn.functional as F
class ContrastiveLoss(nn.Module):
def __init__(self, margin=2.0):
super(ContrastiveLoss, self).__init__()
self.margin = margin
def forward(self, output1, output2, label):
# print(output1.size(), output2.size(), len(label))
# exit()
distance = F.pairwise_distance(output1, output2, keepdim=True)
loss = torch.mean((1 - label) * torch.pow(distance, 2) +
label * torch.pow(torch.clamp(self.margin - distance, min=0.0), 2))
return loss
5.train
import torch
from loss import ContrastiveLoss
from net import SiameseNetwork
import torch.optim as optim
from loadData import getDataloder,getTestDataloder
from test import make_test
train_number_epochs = 50
train_batch_size = 64
dataloader = getDataloder()
testdataloder = getTestDataloder()
net = SiameseNetwork()
loss_func = ContrastiveLoss()
optimizer = optim.Adam(net.parameters(),lr = 0.0005)
accuracy = .0
for epoch in range(train_number_epochs):
for i, data in enumerate(dataloader):
# img0,img1 (batch, 3, 32, 32)
# label (batch)
img_1, img_2, label = data[0].type(torch.FloatTensor), data[1].type(torch.FloatTensor), data[2].type(torch.FloatTensor)
optimizer.zero_grad()
output_1, output_2 = net(img_1, img_2)
loss = loss_func(output_1, output_2, label)
loss.backward()
optimizer.step()
if i%1==0:
total = len(dataloader)
print("\rEpoch: %d, cur_epoch_progress: %d/%d, loss: %f, accuracy: %f" % (epoch, i, total, loss.item(), accuracy), end="")
if i%20==0:
accuracy = make_test(net)
torch.save(net, 'model.pth')
6.test
import torch
import numpy as np
import torch.nn.functional as F
from loadData import getTestDataloder
import math
def make_test(net,test_dir = './FilckerLogo32/test'):
test_dataloader = getTestDataloder()
dataiter = iter(test_dataloader)
THRESHOLD = 1.15
correct_pre = 0
for i in range(100):
x0, x1, label2, label0, label1 = next(dataiter)
output1, output2 = net(x0.type(torch.FloatTensor), x1.type(torch.FloatTensor))
euclidean_distance = F.pairwise_distance(output1, output2)
if label2 == 0 and euclidean_distance.cpu()[0].detach().numpy() < THRESHOLD:
correct_pre += 1
if label2 == 1 and euclidean_distance.cpu()[0].detach().numpy() >= THRESHOLD:
correct_pre += 1
accuracy = correct_pre / (i + 1)
#print(test_image_distance(x0,x1,net))
return accuracy
# 图片相似度
def test_image_distance(img_1, img_2, net):
img_1 = img_1.type(torch.FloatTensor)
img_2 = img_2.type(torch.FloatTensor)
output1, output2 = net(img_1, img_2)
euclidean_distance = F.pairwise_distance(output1, output2).item()
def normal_distribution(x, mean, sigma):
return np.exp(-1 * ((x - mean) ** 2) / (2 * (sigma ** 2))) / (math.sqrt(2 * np.pi) * sigma)
similarity = normal_distribution(euclidean_distance, 0, 1) / normal_distribution(0, 0, 1)
return similarity