pytorch上resnet+siamese对FlickerLogo32(32种商标)分类

pytorch练习: resnet+siamese对FlickerLogo32(32种商标)分类

  1. resnet-18网络结构示意图如下
    -https://blog.csdn.net/sunqiande88/article/details/80100891
    ResNet全名Residual Network残差网络。Kaiming He 的《Deep Residual Learning for Image Recognition》获得了CVPR最佳论文。他提出的深度残差网络在2015年可以说是洗刷了图像方面的各大比赛,以绝对优势取得了多个比赛的冠军。而且它在保证网络精度的前提下,将网络的深度达到了152层,后来又进一步加到1000的深度。论文的开篇先是说明了深度网络的好处:特征等级随着网络的加深而变高,网络的表达能力也会大大提高。因此论文中提出了一个问题:是否可以通过叠加网络层数来获得一个更好的网络呢?作者经过实验发现,单纯的把网络叠起来的深层网络的效果反而不如合适层数的较浅的网络效果。因此何恺明等人在普通平原网络的基础上增加了一个shortcut, 构成一个residual block。此时拟合目标就变为F(x),F(x)就是残差:
    https://blog.csdn.net/sunqiande88/article/details/80100891
    参考:https://blog.csdn.net/sunqiande88/article/details/80100891
    2.自定义数据集
    由于数据是图片,所以要自己制作数据集
    参考ImageFolder的使用
#loadData.py
''' author by xiaotian'''
import random
from torch.utils.data import Dataset,DataLoader
import torchvision.datasets
from torchvision import transforms
import torch
import numpy as np
from PIL import Image

class getDataset(Dataset):
    def __init__(self,getDataset,transform=None,relables=False):
        self.getDataset = getDataset
        self.relables = relables
        self.transform = transform
    def __getitem__(self, index):
        list = []
        for i in range(len(self.getDataset.imgs)):
            list.append(self.getDataset.imgs[i][0])
        datas = list
        listb = []
        for i in range(len(self.getDataset.imgs)):
            listb.append(self.getDataset.imgs[i][1])
        labels = listb

        rand_i = random.choice(range(len(datas)))
        img0 = datas[rand_i]

        should_get_same_class = random.randint(0,1)
        if should_get_same_class:
            while True:
                rand_j = random.choice(range(len(datas)))
                img1 = datas[rand_j]
                if labels[rand_j]==labels[rand_i]:
                    break
        else:
            while True:
                rand_j = random.choice(range(len(datas)))
                img1 = datas[rand_j]
                if labels[rand_j]!=labels[rand_i]:
                    break
        img0 = Image.open(img0)
        img1 = Image.open(img1)
        if self.transform is not None:
             img0 = self.transform(img0)
             # print(img0.shape)
             img1 = self.transform(img1)

        if self.relables:
            return img0, img1, torch.from_numpy(np.array([int(labels[rand_i] != labels[rand_j])], dtype=np.float32)), \
                   labels[rand_i], labels[rand_j]
        else:
            return img0, img1, torch.from_numpy(np.array([int(labels[rand_i] != labels[rand_j])], dtype=np.float32))

    def __len__(self):
        return len(self.getDataset.imgs)

transform_train = transforms.Compose([transforms.Resize(size=(32, 32)),
                                              transforms.RandomHorizontalFlip(),
                                              transforms.ToTensor()])
def getDataloder(data_dir = "./FlickerLogo32/train",batch_size = 64):
    train_dataset = torchvision.datasets.ImageFolder(root = data_dir)
    dataset = getDataset(train_dataset,transform=transform_train)
    train_dataloader = DataLoader(dataset,shuffle=True,batch_size = batch_size)
    return train_dataloader
def getTestDataloder(data_dir = "./FlickerLogo32/test",batch_size = 1):
    test_dataset = torchvision.datasets.ImageFolder(root = data_dir)
    dataset = getDataset(test_dataset,relables = True,transform=transform_train)
    test_dataloader = DataLoader(dataset,shuffle=True,batch_size = batch_size)
    dataiter = iter(test_dataloader)
    return test_dataloader
   
  1. net:
    resnet提取特征+siamese
#resnet+siamese网络 输入(3,32,32)
import torch.nn as nn
import torch.nn.functional as F

class ResidualBlock(nn.Module):
    def __init__(self, inchannel, outchannel, stride=1):
        super(ResidualBlock, self).__init__()
        self.left = nn.Sequential(
            nn.Conv2d(inchannel, outchannel, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(outchannel),
            nn.ReLU(inplace=True),
            nn.Conv2d(outchannel, outchannel, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(outchannel)
        )
        self.shortcut = nn.Sequential()
        if stride != 1 or inchannel != outchannel:
            self.shortcut = nn.Sequential(
                nn.Conv2d(inchannel, outchannel, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(outchannel)
            )

    def forward(self, x):
        out = self.left(x)
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class SiameseNetwork(nn.Module):
    def __init__(self):
        super(SiameseNetwork, self).__init__()
        self.inchannel = 64
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False),#-->(batch,64,32,32)
            nn.BatchNorm2d(64),
            nn.ReLU(),
        )
        self.layer1 = self.make_layer(ResidualBlock, 64, 2, stride=1)#-->(batch,64,32,32)
        self.layer2 = self.make_layer(ResidualBlock, 128, 2, stride=2)#-->(batch,128,16,16)
        self.layer3 = self.make_layer(ResidualBlock, 256, 2, stride=2)#-->(batch,256,8,8)
        self.layer4 = self.make_layer(ResidualBlock, 512, 2, stride=2)#-->(batch,512,4,4)
        self.fc = nn.Linear(512,32)#-->(batch,32)

    def make_layer(self, block, channels, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)  # strides=[1,1]
        layers = []
        for stride in strides:
            layers.append(block(self.inchannel, channels, stride))
            self.inchannel = channels
        return nn.Sequential(*layers)

    # -->(batch,3,32,32)
    def forward_once(self, x):
        out = self.conv1(x)     #-->(batch,64,32,32)
        out = self.layer1(out)  #-->(batch,64,32,32)
        out = self.layer2(out)  #-->(batch,128,16,16)
        out = self.layer3(out)  #-->(batch,256,8,8)
        out = self.layer4(out)  #-->(batch,512,4,4)
        out = F.avg_pool2d(out, 4)     #-->(batch,512,1,1)
        out = out.view(out.size(0), -1) #-->(batch,512*1*1)
        out = self.fc(out)      #-->(batch,32)
        out = F.log_softmax(out, dim=1)
        return out

    def forward(self, input1, input2):
        output1 = self.forward_once(input1)
        output2 = self.forward_once(input2)
        return output1, output2

  1. loss
import torch
import torch.nn as nn
import torch.nn.functional as F


class ContrastiveLoss(nn.Module):

    def __init__(self, margin=2.0):
        super(ContrastiveLoss, self).__init__()
        self.margin = margin

    def forward(self, output1, output2, label):
        # print(output1.size(), output2.size(), len(label))
        # exit()
        distance = F.pairwise_distance(output1, output2, keepdim=True)
        loss = torch.mean((1 - label) * torch.pow(distance, 2) +
                          label * torch.pow(torch.clamp(self.margin - distance, min=0.0), 2))
        return loss

5.train

import torch
from loss import ContrastiveLoss
from net import SiameseNetwork
import torch.optim as optim
from loadData import getDataloder,getTestDataloder
from test import make_test
train_number_epochs = 50
train_batch_size = 64
dataloader = getDataloder()
testdataloder = getTestDataloder()

net = SiameseNetwork()
loss_func = ContrastiveLoss()
optimizer = optim.Adam(net.parameters(),lr = 0.0005)


accuracy = .0
for epoch in range(train_number_epochs):
    for i, data in enumerate(dataloader):
        # img0,img1  (batch, 3, 32, 32)
        # label (batch)
        img_1, img_2, label = data[0].type(torch.FloatTensor), data[1].type(torch.FloatTensor), data[2].type(torch.FloatTensor)
        optimizer.zero_grad()
        output_1, output_2 = net(img_1, img_2)
        loss = loss_func(output_1, output_2, label)
        loss.backward()
        optimizer.step()
        if i%1==0:
            total = len(dataloader)
            print("\rEpoch: %d, cur_epoch_progress: %d/%d, loss: %f, accuracy: %f" % (epoch, i, total, loss.item(), accuracy), end="")
            if i%20==0:
                accuracy = make_test(net)


torch.save(net, 'model.pth')

6.test

import torch
import numpy as np
import torch.nn.functional as F
from loadData import getTestDataloder
import math


def make_test(net,test_dir = './FilckerLogo32/test'):
    test_dataloader = getTestDataloder()
    dataiter = iter(test_dataloader)

    THRESHOLD = 1.15
    correct_pre = 0
    for i in range(100):
        x0, x1, label2, label0, label1 = next(dataiter)
        output1, output2 = net(x0.type(torch.FloatTensor), x1.type(torch.FloatTensor))
        euclidean_distance = F.pairwise_distance(output1, output2)
        if label2 == 0 and euclidean_distance.cpu()[0].detach().numpy() < THRESHOLD:
            correct_pre += 1
        if label2 == 1 and euclidean_distance.cpu()[0].detach().numpy() >= THRESHOLD:
            correct_pre += 1
        accuracy = correct_pre / (i + 1)
        #print(test_image_distance(x0,x1,net))
    return accuracy


# 图片相似度
def test_image_distance(img_1, img_2, net):
    img_1 = img_1.type(torch.FloatTensor)
    img_2 = img_2.type(torch.FloatTensor)
    output1, output2 = net(img_1, img_2)
    euclidean_distance = F.pairwise_distance(output1, output2).item()
    def normal_distribution(x, mean, sigma):
        return np.exp(-1 * ((x - mean) ** 2) / (2 * (sigma ** 2))) / (math.sqrt(2 * np.pi) * sigma)
    similarity = normal_distribution(euclidean_distance, 0, 1) / normal_distribution(0, 0, 1)
    return similarity
  • 0
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值