p y r e t r i pyretri pyretri
1. config
config.py
# -*- coding: utf-8 -*-
from yacs.config import CfgNode
from ..datasets import get_datasets_cfg
from ..models import get_model_cfg
from ..extract import get_extract_cfg
from ..index import get_index_cfg
from ..evaluate import get_evaluate_cfg
def get_defaults_cfg() -> CfgNode:
"""
Construct the default configuration tree.
Returns:
cfg (CfgNode): the default configuration tree.
"""
cfg = CfgNode()
cfg["datasets"] = get_datasets_cfg()
cfg["model"] = get_model_cfg()
cfg["extract"] = get_extract_cfg()
cfg["index"] = get_index_cfg()
cfg["evaluate"] = get_evaluate_cfg()
return cfg
def setup_cfg(cfg: CfgNode, cfg_file: str, cfg_opts: list or None = None) -> CfgNode:
"""
Load a yaml config file and merge it this CfgNode.
Args:
cfg (CfgNode): the configuration tree with default structure.
cfg_file (str): the path for yaml config file which is matched with the CfgNode.
cfg_opts (list, optional): config (keys, values) in a list (e.g., from command line) into this CfgNode.
Returns:
cfg (CfgNode): the configuration tree with settings in the config file.
"""
cfg.merge_from_file(cfg_file)
cfg.merge_from_list(cfg_opts)
cfg.freeze()
return cfg
2. datasets:数据预处理
- transformer:
- transformers_impl:
transformers.py
# -*- coding: utf-8 -*-
import torch
import numpy as np
from PIL import Image
from ..transformers_base import TransformerBase
from ...registry import TRANSFORMERS
from torchvision.transforms import Resize as TResize
from torchvision.transforms import TenCrop as TTenCrop
from torchvision.transforms import CenterCrop as TCenterCrop
from torchvision.transforms import Scale as TScale
from torchvision.transforms import ToTensor as TToTensor
from torchvision.transforms.functional import hflip
from typing import Dict
@TRANSFORMERS.register
class DirectResize(TransformerBase):
"""
Directly resize image to target size, regardless of h: w ratio.
Hyper-Params
size (sequence): desired output size.
interpolation (int): desired interpolation.
"""
default_hyper_params = {
"size": (224, 224),
"interpolation": Image.BILINEAR,
}
def __init__(self, hps: Dict or None = None):
"""
Args:
hps (dict): default hyper parameters in a dict (keys, values).
"""
super(DirectResize, self).__init__(hps)
self.t_transformer = TResize(self._hyper_params["size"], self._hyper_params["interpolation"])
def __call__(self, img: Image) -> Image:
return self.t_transformer(img)
@TRANSFORMERS.register
class PadResize(TransformerBase):
"""
Resize image's longer edge to target size, and then pad the shorter edge to target size.
Hyper-Params
size (int): desired output size of the longer edge.
padding_v (sequence): padding pixel value.
interpolation (int): desired interpolation.
"""
default_hyper_params = {
"size": 224,
"padding_v": [124, 116, 104],
"interpolation": Image.BILINEAR,
}
def __init__(self, hps: Dict or None = None):
"""
Args:
hps: default hyper parameters in a dict (keys, values).
"""
super(PadResize, self).__init__(hps)
def __call__(self, img: Image) -> Image:
target_size = self._hyper_params["size"]
padding_v = tuple(self._hyper_params["padding_v"])
interpolation = self._hyper_params["interpolation"]
w, h = img.size
if w > h:
img = img.resize((int(target_size), int(h * target_size * 1.0 / w)), interpolation)
else:
img = img.resize((int(w * target_size * 1.0 / h), int(target_size)), interpolation)
ret_img = Image.new("RGB", (target_size, target_size), padding_v)
w, h = img.size
st_w = int((ret_img.size[0] - w) / 2.0)
st_h = int((ret_img.size[1] - h) / 2.0)
ret_img.paste(img, (st_w, st_h))
return ret_img
@TRANSFORMERS.register
class ShorterResize(TransformerBase):
"""
Resize image's shorter edge to target size, while keep h: w ratio.
Hyper-Params
size (int): desired output size.
interpolation (int): desired interpolation.
"""
default_hyper_params = {
"size": 224,
"interpolation": Image.BILINEAR,
}
def __init__(self, hps: Dict or None = None):
"""
Args:
hps (dict): default hyper parameters in a dict (keys, values).
"""
super(ShorterResize, self).__init__(hps)
self.t_transformer = TResize(self._hyper_params["size"], self._hyper_params["interpolation"])
def __call__(self, img: Image) -> Image:
return self.t_transformer(img)
@TRANSFORMERS.register
class CenterCrop(TransformerBase):
"""
A wrapper from CenterCrop in pytorch, see torchvision.transformers.CenterCrop for explanation.
Hyper-Params
size(sequence or int): desired output size.
"""
default_hyper_params = {
"size": 224,
}
def __init__(self, hps: Dict or None = None):
"""
Args:
hps (dict): default hyper parameters in a dict (keys, values).
"""
super(CenterCrop, self).__init__(hps)
self.t_transformer_bef =TScale(self._hyper_params["size"])
self.t_transformer = TCenterCrop(self._hyper_params["size"])
def __call__(self, img: Image) -> Image:
# return self.t_transformer(img)
return self.t_transformer(self.t_transformer_bef(img))
@TRANSFORMERS.register
class ToTensor(TransformerBase):
"""
A wrapper from ToTensor in pytorch, see torchvision.transformers.ToTensor for explanation.
"""
default_hyper_params = dict()
def __init__(self, hps: Dict or None = None):
"""
Args:
hps (dict): default hyper parameters in a dict (keys, values).
"""
super(ToTensor, self).__init__(hps)
self.t_transformer = TToTensor()
def __call__(self, imgs: Image or tuple) -> torch.Tensor:
if not isinstance(imgs, tuple):
imgs = [imgs]
ret_tensor = list()
for img in imgs:
ret_tensor.append(self.t_transformer(img))
ret_tensor = torch.stack(ret_tensor, dim=0)
return ret_tensor
@TRANSFORMERS.register
class ToCaffeTensor(TransformerBase):
"""
Create tensors for models trained in caffe.
"""
default_hyper_params = dict()
def __init__(self, hps: Dict or None = None):
"""
Args:
hps (dict): default hyper parameters in a dict (keys, values).
"""
super(ToCaffeTensor, self).__init__(hps)
def __call__(self, imgs: Image or tuple) -> torch.tensor:
if not isinstance(imgs, tuple):
imgs = [imgs]
ret_tensor = list()
for img in imgs:
img = np.array(img, np.int32, copy=False)
r, g, b = img[:, :, 0], img[:, :, 1], img[:, :, 2]
img = np.stack([b, g, r], axis=2)
img = torch.from_numpy(img)
img = img.transpose(0, 1).transpose(0, 2).contiguous()
img = img.float()
ret_tensor.append(img)
ret_tensor = torch.stack(ret_tensor, dim=0)
return ret_tensor
@TRANSFORMERS.register
class Normalize(TransformerBase):
"""
Normalize a tensor image with mean and standard deviation.
Hyper-Params
mean (sequence): sequence of means for each channel.
std (sequence): sequence of standard deviations for each channel.
"""
default_hyper_params = {
"mean": [0.485, 0.456, 0.406],
"std": [0.229, 0.224, 0.225],
}
def __init__(self, hps: Dict or None = None):
"""
Args:
hps (dict): default hyper parameters in a dict (keys, values).
"""
super(Normalize, self).__init__(hps)
for v in ["mean", "std"]:
self.__dict__[v] = np.array(self._hyper_params[v])[None, :, None, None]
self.__dict__[v] = torch.from_numpy(self.__dict__[v]).float()
def __call__(self, tensor: torch.tensor) -> torch.tensor:
assert tensor.ndimension() == 4
tensor.sub_(self.mean).div_(self.std)
return tensor
@TRANSFORMERS.register
class TenCrop(TransformerBase):
"""
A wrapper from TenCrop in pytorch,see torchvision.transformers.TenCrop for explanation.
Hyper-Params
size (sequence or int): desired output size.
"""
default_hyper_params = {
"size": 224,
}
def __init__(self, hps: Dict or None = None):
"""
Args:
hps (dict): default hyper parameters in a dict (keys, values).
"""
super(TenCrop, self).__init__(hps)
self.t_transformer = TTenCrop(self._hyper_params["size"])
def __call__(self, img: Image) -> Image:
return self.t_transformer(img)
@TRANSFORMERS.register
class TwoFlip(TransformerBase):
"""
Return the image itself and its horizontal flipped one.
"""
default_hyper_params = dict()
def __init__(self, hps: Dict or None = None):
"""
Args:
hps (dict): default hyper parameters in a dict (keys, values).
"""
super(TwoFlip, self).__init__(hps)
def __call__(self, img: Image) -> (Image, Image):
return img, hflip(img)
3. evaluate
4. extract:数据抽取
- extractor:
- extractors_impl
res_series.py
# -*- coding: utf-8 -*-
from ..extractors_base import ExtractorBase
from ...registry import EXTRACTORS
from typing import Dict
@EXTRACTORS.register
class ResSeries(ExtractorBase):
"""
The extractors for ResNet.
Hyper-Parameters
extract_features (list): indicates which feature maps to output. See available_feas for available feature maps.
If it is ["all"], then all available features will be output.
"""
default_hyper_params = {
"extract_features": list(),
}
available_feas = ["pool5", "pool4", "pool3"]
def __init__(self, model, hps: Dict or None = None):
"""
Args:
model (nn.Module): the model for extracting features.
hps (dict): default hyper parameters in a dict (keys, values).
"""
children = list(model.children())
feature_modules = {
"pool5": children[-3][-1].relu,
"pool4": children[-4][-1].relu,
"pool3": children[-5][-1].relu
}
super(ResSeries, self).__init__(model, feature_modules, hps)
5. index
6. models:模型构建
backbone:
__init__py
:
在其中添加自己新增的模型:ft_net_50
# -*- coding: utf-8 -*-
from .backbone_impl.resnet import ResNet
from .backbone_impl.vgg import VGG
from .backbone_impl.reid_baseline import ft_net,ft_net_50,ft_net_18 # ***
from .backbone_base import BackboneBase
__all__ = [
'BackboneBase',
'ResNet', 'VGG',
'ft_net',
'ft_net_50', # ***
'ft_net_18',
]
backbone_impl:
在reid_baseline.py添加完模型后在/models/backbone/__init__py
中引入填加
reid_baseline.py
import torch
import torch.nn as nn
from torch.nn import init
from torchvision import models
from torch.autograd import Variable
from ..backbone_base import BackboneBase
from ...registry import BACKBONES
######################################################################
def weights_init_kaiming(m):
classname = m.__class__.__name__
# print(classname)
if classname.find('Conv') != -1:
init.kaiming_normal_(m.weight.data, a=0, mode='fan_in') # For old pytorch, you may use kaiming_normal.
elif classname.find('Linear') != -1:
init.kaiming_normal_(m.weight.data, a=0, mode='fan_out')
init.constant_(m.bias.data, 0.0)
elif classname.find('BatchNorm1d') != -1:
init.normal_(m.weight.data, 1.0, 0.02)
init.constant_(m.bias.data, 0.0)
def weights_init_classifier(m):
classname = m.__class__.__name__
if classname.find('Linear') != -1:
init.normal_(m.weight.data, std=0.001)
init.constant_(m.bias.data, 0.0)
# Defines the new fc layer and classification layer
# |--Linear--|--bn--|--relu--|--Linear--|
class ClassBlock(nn.Module):
def __init__(self, input_dim, class_num, droprate, relu=False, bnorm=True, num_bottleneck=512, linear=True,
return_f=False):
super(ClassBlock, self).__init__()
self.return_f = return_f
add_block = []
if linear:
add_block += [nn.Linear(input_dim, num_bottleneck)]
else:
num_bottleneck = input_dim
if bnorm:
add_block += [nn.BatchNorm1d(num_bottleneck)]
if relu:
add_block += [nn.LeakyReLU(0.1)]
if droprate > 0:
add_block += [nn.Dropout(p=droprate)]
add_block = nn.Sequential(*add_block)
add_block.apply(weights_init_kaiming)
classifier = []
classifier += [nn.Linear(num_bottleneck, class_num)]
classifier = nn.Sequential(*classifier)
classifier.apply(weights_init_classifier)
self.add_block = add_block
self.classifier = classifier
def forward(self, x):
x = self.add_block(x)
if self.return_f:
f = x
x = self.classifier(x)
return x, f
else:
x = self.classifier(x)
return x
# Define the ResNet50-based Model
@BACKBONES.register
class ft_net(BackboneBase):
def __init__(self, class_num=751, droprate=0.5, stride=2):
super(ft_net, self).__init__()
model_ft = models.resnet50(pretrained=True)
# avg pooling to global pooling
if stride == 1:
self.model.layer4[0].downsample[0].stride = (1, 1)
self.model.layer4[0].conv2.stride = (1, 1)
model_ft.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.model = model_ft
self.classifier = ClassBlock(2048, class_num, droprate)
def forward(self, x):
x = self.model.conv1(x)
x = self.model.bn1(x)
x = self.model.relu(x)
x = self.model.maxpool(x)
x = self.model.layer1(x)
x = self.model.layer2(x)
x = self.model.layer3(x)
x = self.model.layer4(x)
x = self.model.avgpool(x)
x = x.view(x.size(0), x.size(1))
x = self.classifier(x)
return x
@BACKBONES.register
class ft_net_18(BackboneBase):
def __init__(self, class_num=3094):
super(ft_net_own, self).__init__()
model = torch.hub.load('pytorch/vision:v0.6.0', 'resnet18', pretrained=False)
num_ftrs = model.fc.in_features
# model.fc = nn.Linear(num_ftrs, class_num)
self.conv1 = model.conv1
self.bn1 = model.bn1
self.relu = model.relu
self.maxpool = model.maxpool
self.layer1 = model.layer1
self.layer2 = model.layer2
self.layer3 = model.layer3
self.layer4 = model.layer4
self.avgpool = model.avgpool
self.fc = nn.Linear(num_ftrs, class_num)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = x.view(x.size(0), x.size(1))
x = self.fc(x)
return x
#************************************************************************************************#
#************************************************************************************************#
#********************************forward至少写三个特征图层******************************************#
@BACKBONES.register
class ft_net_50(BackboneBase):
def __init__(self, class_num=3094):
super(ft_net_50, self).__init__()
model = torch.hub.load('pytorch/vision:v0.6.0', 'resnet50', pretrained=False)
num_ftrs = model.fc.in_features
# model.fc = nn.Linear(num_ftrs, class_num)
self.conv1 = model.conv1
self.bn1 = model.bn1
self.relu = model.relu
self.maxpool = model.maxpool
self.layer1 = model.layer1
self.layer2 = model.layer2
self.layer3 = model.layer3
self.layer4 = model.layer4
self.avgpool = model.avgpool
self.fc = nn.Linear(num_ftrs, class_num)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = x.view(x.size(0), x.size(1))
x = self.fc(x)
return x
#************************************************************************************************#
#************************************************************************************************#
#************************************************************************************************#
# Define the DenseNet121-based Model
class ft_net_dense(nn.Module):
def __init__(self, class_num, droprate=0.5):
super().__init__()
model_ft = models.densenet121(pretrained=True)
model_ft.features.avgpool = nn.AdaptiveAvgPool2d((1, 1))
model_ft.fc = nn.Sequential()
self.model = model_ft
# For DenseNet, the feature dim is 1024
self.classifier = ClassBlock(1024, class_num, droprate)
def forward(self, x):
x = self.model.features(x)
x = x.view(x.size(0), x.size(1))
x = self.classifier(x)
return x
# Define the ResNet50-based Model (Middle-Concat)
# In the spirit of "The Devil is in the Middle: Exploiting Mid-level Representations for Cross-Domain Instance Matching." Yu, Qian, et al. arXiv:1711.08106 (2017).
class ft_net_middle(nn.Module):
def __init__(self, class_num, droprate=0.5):
super(ft_net_middle, self).__init__()
model_ft = models.resnet50(pretrained=True)
# avg pooling to global pooling
model_ft.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.model = model_ft
self.classifier = ClassBlock(2048 + 1024, class_num, droprate)
def forward(self, x):
x = self.model.conv1(x)
x = self.model.bn1(x)
x = self.model.relu(x)
x = self.model.maxpool(x)
x = self.model.layer1(x)
x = self.model.layer2(x)
x = self.model.layer3(x)
# x0 n*1024*1*1
x0 = self.model.avgpool(x)
x = self.model.layer4(x)
# x1 n*2048*1*1
x1 = self.model.avgpool(x)
x = torch.cat((x0, x1), 1)
x = x.view(x.size(0), x.size(1))
x = self.classifier(x)
return x
# Part Model proposed in Yifan Sun etal. (2018)
class PCB(nn.Module):
def __init__(self, class_num):
super(PCB, self).__init__()
self.part = 6 # We cut the pool5 to 6 parts
model_ft = models.resnet50(pretrained=True)
self.model = model_ft
self.avgpool = nn.AdaptiveAvgPool2d((self.part, 1))
self.dropout = nn.Dropout(p=0.5)
# remove the final downsample
self.model.layer4[0].downsample[0].stride = (1, 1)
self.model.layer4[0].conv2.stride = (1, 1)
# define 6 classifiers
for i in range(self.part):
name = 'classifier' + str(i)
setattr(self, name, ClassBlock(2048, class_num, droprate=0.5, relu=False, bnorm=True, num_bottleneck=256))
def forward(self, x):
x = self.model.conv1(x)
x = self.model.bn1(x)
x = self.model.relu(x)
x = self.model.maxpool(x)
x = self.model.layer1(x)
x = self.model.layer2(x)
x = self.model.layer3(x)
x = self.model.layer4(x)
x = self.avgpool(x)
x = self.dropout(x)
part = {}
predict = {}
# get six part feature batchsize*2048*6
for i in range(self.part):
part[i] = torch.squeeze(x[:, :, i])
name = 'classifier' + str(i)
c = getattr(self, name)
predict[i] = c(part[i])
# sum prediction
# y = predict[0]
# for i in range(self.part-1):
# y += predict[i+1]
y = []
for i in range(self.part):
y.append(predict[i])
return y
class PCB_test(nn.Module):
def __init__(self, model):
super(PCB_test, self).__init__()
self.part = 6
self.model = model.model
self.avgpool = nn.AdaptiveAvgPool2d((self.part, 1))
# remove the final downsample
self.model.layer4[0].downsample[0].stride = (1, 1)
self.model.layer4[0].conv2.stride = (1, 1)
def forward(self, x):
x = self.model.conv1(x)
x = self.model.bn1(x)
x = self.model.relu(x)
x = self.model.maxpool(x)
x = self.model.layer1(x)
x = self.model.layer2(x)
x = self.model.layer3(x)
x = self.model.layer4(x)
x = self.avgpool(x)
y = x.view(x.size(0), x.size(1), x.size(2))
return y