(1)GaitSet代码解析 超详细版 1数据加载部分-CSDN博客
紧接着上一篇开启模型加载部分的学习
4.2.Initializing model
继续回到初始化函数
上一篇我们介绍到 执行完initialize_data函数以后
Data initialization complete. 所有数据初始化完成,返回train_source, test_source
看一下train_source的结果
data[0]是表示第一个序列一共data[0]-data[8006]
data[0][0]表示第一个序列的 dataarray (frame*64*44)
def initialize_model(config, train_source, test_source):
print("Initializing model...")
data_config = config['data']
model_config = config['model']
model_param = deepcopy(model_config)
model_param['train_source'] = train_source
model_param['test_source'] = test_source
model_param['train_pid_num'] = data_config['pid_num']
batch_size = int(np.prod(model_config['batch_size']))
model_param['save_name'] = '_'.join(map(str,[
model_config['model_name'],
data_config['dataset'],
data_config['pid_num'],
data_config['pid_shuffle'],
model_config['hidden_dim'],
model_config['margin'],
batch_size,
model_config['hard_or_full_trip'],
model_config['frame_num'],
]))
m = Model(**model_param)
print("Model initialization complete.")
return m, model_param['save_name']
下一个输出 "Initializing model...“设置模型参数
m = Model(**model_param) 实例化类Model 把model_parm 参数传递给Model类
输出
"Model initialization complete."
返回类对象m和模型参数 model_param['save_name']
所以初始化函数通过初始化数据,把初始化后的train_source, test_source 传递给初始化模型initialize_model 该函数返回Model 类的类对象m 还有 model_param['save_name']
初始化函数终于结束啦
回到train.py
m 接收初始化函数return 的m然后训练就开始了 输出
"Training START"
m.fit()调用类对象的fit函数,这时候我们再回到m 的类Model中去在 model.py 文件中
5 class Model类
5.0 Model类中包含的用到的其他文件夹下的函数或者类
5.0.1 TripletSampler类
fit函数中——triplet_sampler = TripletSampler(self.train_source, self.batch_size)
pid_list 是从[0-73]不放回抽样随机采样8个值
_index 从 10*11个值中放回抽样采样16个
sample 是相当于不放回抽样。如果列表中的数据不重复,抽取数据不重复.
choices 是相当于放回抽样。数据可能重复。
yield和return的区别-- 超详细_yield return-CSDN博客
输出是8*16 个 标签 0- 8006
class TripletSampler(tordata.sampler.Sampler):
def __init__(self, dataset, batch_size):
self.dataset = dataset
self.batch_size = batch_size
def __iter__(self):
while (True):
sample_indices = list()
pid_list = random.sample(
list(self.dataset.label_set),
self.batch_size[0])
for pid in pid_list:
_index = self.dataset.index_dict.loc[pid, :, :].values
#读取 index_dict pid 维度的值
_index = _index[_index > 0].flatten().tolist()
#如果索引大于0 展平形成一个列表
_index = random.choices(
_index,
k=self.batch_size[1])
#随机选择16个
sample_indices += _index
yield sample_indices
def __len__(self):
return self.dataset.data_size
5.0.2 collate_fn函数
详解torch中的collate_fn参数 - 知乎 (zhihu.com)
我们在编写完Dataset之后,其内部的__getitem__会弹出一个[data, label]的“一条数据”。然后真正将这些一条一条的数据组织成一个batch的时侯(一个Batch就是多条数据,我们一般希望data部分组织在一起,然后labels组织在一起)。
在我们的数据集中__getitem__会弹出的是
[data, frame_set, self.view[ index], self.seq_type[index], self.label[index]]
代码对一批数据的batch 进行了重新定义,为了配合PyTorch的DataLoader
使用,
其中 batch = [seqs, view, seq_type, label, None] seqs也就是dataset 中的data 通过
select_frame函数将其固定为30帧 以便以后的处理 batch中有 每个数据的维度是8*16 即[seqs(8*16), view(8*16), seq_type(8*16), label(8*16), None]
def collate_fn(self, batch):
batch_size = len(batch)
feature_num = len(batch[0][0])
seqs = [batch[i][0] for i in range(batch_size)]
frame_sets = [batch[i][1] for i in range(batch_size)]
view = [batch[i][2] for i in range(batch_size)]
seq_type = [batch[i][3] for i in range(batch_size)]
label = [batch[i][4] for i in range(batch_size)]
batch = [seqs, view, seq_type, label, None]
def select_frame(index):
sample = seqs[index]
frame_set = frame_sets[index]
if self.sample_type == 'random':
frame_id_list = random.choices(frame_set, k=self.frame_num)
_ = [feature.loc[frame_id_list].values for feature in sample]
else:
_ = [feature.values for feature in sample]
return _
seqs = list(map(select_frame, range(len(seqs))))
if self.sample_type == 'random':
seqs = [np.asarray([seqs[i][j] for i in range(batch_size)]) for j in range(feature_num)]
else:
gpu_num = min(torch.cuda.device_count(), batch_size)
batch_per_gpu = math.ceil(batch_size / gpu_num)
batch_frames = [[
len(frame_sets[i])
for i in range(batch_per_gpu * _, batch_per_gpu * (_ + 1))
if i < batch_size
] for _ in range(gpu_num)]
if len(batch_frames[-1]) != batch_per_gpu:
for _ in range(batch_per_gpu - len(batch_frames[-1])):
batch_frames[-1].append(0)
max_sum_frame = np.max([np.sum(batch_frames[_]) for _ in range(gpu_num)])
seqs = [[
np.concatenate([
seqs[i][j]
for i in range(batch_per_gpu * _, batch_per_gpu * (_ + 1))
if i < batch_size
], 0) for _ in range(gpu_num)]
for j in range(feature_num)]
seqs = [np.asarray([
np.pad(seqs[j][_],
((0, max_sum_frame - seqs[j][_].shape[0]), (0, 0), (0, 0)),
'constant',
constant_values=0)
for _ in range(gpu_num)])
for j in range(feature_num)]
batch[4] = np.asarray(batch_frames)
batch[0] = seqs
return batch
5.0.3 SeNet 类 这是网络的核心
看一下forward 函数数据传播路线
有些模糊 大致的意思是将 x[b,f,c,h,w] 经过一系列转化卷积池化 frame_max 等,转换为feature
MGP 是获取局部特征的多层全局管道 也就是 代码中的 gl 卷积网络的不同层具有不同的感受野。层越深,感受野越大。因此,浅层特征图中的像素侧重于局部信息和细粒度信息,而较深层的像素则侧重于更全局和粗粒度的信息。通过在不同层的应用 SP 提取的集合级特征具有类比属性。如图2的主要管道所示,卷积网络的最后一层只有一个SP。为了收集各种级别的集合信息,提出了多层全局管道(MGP)。它与主管道中的卷积网络具有相似的结构,将不同层的集合级特征添加到 MGP 中。MGP 生成的最终特征图也将通过 HPM 映射到 ∑S s=1 2s-1 特征。注意,MGP后的HPM在主管道后不与HPM共享参数
n=64,x 扩充了一个维度 从[b,f,h,w]——[b,f,c,h,w]
feature
import torch
import torch.nn as nn
import numpy as np
from .basic_blocks import SetBlock, BasicConv2d
class SetNet(nn.Module):
def __init__(self, hidden_dim):
super(SetNet, self).__init__()
self.hidden_dim = hidden_dim
self.batch_frame = None
_set_in_channels = 1
_set_channels = [32, 64, 128]
self.set_layer1 = SetBlock(BasicConv2d(_set_in_channels, _set_channels[0], 5, padding=2))
self.set_layer2 = SetBlock(BasicConv2d(_set_channels[0], _set_channels[0], 3, padding=1), True)
self.set_layer3 = SetBlock(BasicConv2d(_set_channels[0], _set_channels[1], 3, padding=1))
self.set_layer4 = SetBlock(BasicConv2d(_set_channels[1], _set_channels[1], 3, padding=1), True)
self.set_layer5 = SetBlock(BasicConv2d(_set_channels[1], _set_channels[2], 3, padding=1))
self.set_layer6 = SetBlock(BasicConv2d(_set_channels[2], _set_channels[2], 3, padding=1))
_gl_in_channels = 32
_gl_channels = [64, 128]
self.gl_layer1 = BasicConv2d(_gl_in_channels, _gl_channels[0], 3, padding=1)
self.gl_layer2 = BasicConv2d(_gl_channels[0], _gl_channels[0], 3, padding=1)
self.gl_layer3 = BasicConv2d(_gl_channels[0], _gl_channels[1], 3, padding=1)
self.gl_layer4 = BasicConv2d(_gl_channels[1], _gl_channels[1], 3, padding=1)
self.gl_pooling = nn.MaxPool2d(2)
self.bin_num = [1, 2, 4, 8, 16]
self.fc_bin = nn.ParameterList([
nn.Parameter(
nn.init.xavier_uniform_(
torch.zeros(sum(self.bin_num) * 2, 128, hidden_dim)))])
for m in self.modules():
if isinstance(m, (nn.Conv2d, nn.Conv1d)):
nn.init.xavier_uniform_(m.weight.data)
elif isinstance(m, nn.Linear):
nn.init.xavier_uniform_(m.weight.data)
nn.init.constant(m.bias.data, 0.0)
elif isinstance(m, (nn.BatchNorm2d, nn.BatchNorm1d)):
nn.init.normal(m.weight.data, 1.0, 0.02)
nn.init.constant(m.bias.data, 0.0)
def frame_max(self, x):
if self.batch_frame is None:
return torch.max(x, 1)
else:
_tmp = [
torch.max(x[:, self.batch_frame[i]:self.batch_frame[i + 1], :, :, :], 1)
for i in range(len(self.batch_frame) - 1)
]
max_list = torch.cat([_tmp[i][0] for i in range(len(_tmp))], 0)
arg_max_list = torch.cat([_tmp[i][1] for i in range(len(_tmp))], 0)
return max_list, arg_max_list
def frame_median(self, x):
if self.batch_frame is None:
return torch.median(x, 1)
else:
_tmp = [
torch.median(x[:, self.batch_frame[i]:self.batch_frame[i + 1], :, :, :], 1)
for i in range(len(self.batch_frame) - 1)
]
median_list = torch.cat([_tmp[i][0] for i in range(len(_tmp))], 0)
arg_median_list = torch.cat([_tmp[i][1] for i in range(len(_tmp))], 0)
return median_list, arg_median_list
def forward(self, silho, batch_frame=None):
# n: batch_size, s: frame_num, k: keypoints_num, c: channel
if batch_frame is not None:
batch_frame = batch_frame[0].data.cpu().numpy().tolist()
_ = len(batch_frame)
for i in range(len(batch_frame)):
if batch_frame[-(i + 1)] != 0:
break
else:
_ -= 1
batch_frame = batch_frame[:_]
frame_sum = np.sum(batch_frame)
if frame_sum < silho.size(1):
silho = silho[:, :frame_sum, :, :]
self.batch_frame = [0] + np.cumsum(batch_frame).tolist()
n = silho.size(0)
x = silho.unsqueeze(2)
del silho
x = self.set_layer1(x)
x = self.set_layer2(x)
gl = self.gl_layer1(self.frame_max(x)[0])
gl = self.gl_layer2(gl)
gl = self.gl_pooling(gl)
x = self.set_layer3(x)
x = self.set_layer4(x)
gl = self.gl_layer3(gl + self.frame_max(x)[0])
gl = self.gl_layer4(gl)
x = self.set_layer5(x)
x = self.set_layer6(x)
x = self.frame_max(x)[0]
gl = gl + x
feature = list()
n, c, h, w = gl.size()
for num_bin in self.bin_num:
z = x.view(n, c, num_bin, -1)
z = z.mean(3) + z.max(3)[0]
feature.append(z)
z = gl.view(n, c, num_bin, -1)
z = z.mean(3) + z.max(3)[0]
feature.append(z)
feature = torch.cat(feature, 2).permute(2, 0, 1).contiguous()
feature = feature.matmul(self.fc_bin[0])
feature = feature.permute(1, 0, 2).contiguous()
return feature, None
按照程序的执行顺序,m.fit() 调用fit 函数
5.1 fit 函数
if self.restore_iter != 0: self.load(self.restore_iter)什么意思?
如果restore_iter 不等与0 即训练不是从0 开始就加载 在config 中默认是等于0
这个操作可以用来 之前训练过 比如训练了40000轮 还想再训练 就把restore_iter设置为40000
比如这是之前的参数 'restore_iter': 0, 'total_iter': 50000,训练好了50000轮 我把参数设置为'restore_iter': 50000,'total_iter': 60000,模型就从50000开始接着训练了
怎么加载 之前的训练结果呢? load 函数见5.1.1
self.encoder.train() self.encoder是什么?回到初始化__init__函数
self.encoder = SetNet(self.hidden_dim).float() self.encoder = nn.DataParallel(self.encoder)
原来我们的编码器是SetNet类也就是论文的核心!nn.DataParallel是使用并行化处理Pytorch分布式训练/多卡训练(一) —— Data Parallel并行(DP)_pytorch dataparallel-CSDN博客
实例化SetNet类 传递给self.encoder self.encoder.train() 后面的.train()是模型训练的常见格式每当你开始一个新的训练或评估阶段时,你都应该明确地设置模型的模式。这是因为模型的模式不会随着你的操作自动切换。
采样类型是random
for param_group in self.optimizer.param_groups:
遍历优化器中的参数 因为优化器中有self.encoder模型的参数还有学习率,将学习率设置为 self.lr 不知道为什么这里又设置一遍
self.optimizer = optim.Adam([ {'params': self.encoder.parameters()}, ], lr=self.lr)
triplet_sampler = TripletSampler(self.train_source, self.batch_size)
设置采样器是三元组采样,实例化 TripletSampler类在utils 文件夹下的samper.py 文件中
接下来定义了训练数据器train_loader 数据集是train_source batch 采样是triplet_sampler, collate_fn是类的collate_fn函数
需要了解了TripletSampler类 collate_fn函数 才能了解train_loader到底是怎么加载 数据的
import torch.utils.data as tordata train_loader = tordata.DataLoader(dataset=self.train_source,batch_sampler=triplet_sampler, collate_fn=self.collate_fn, num_workers=self.num_workers for seq, view, seq_type, label, batch_frame in train_loader: seq是一个128*30*64*44 的数据 遍历一个batch数据 一个batch 包含128个序列通过self.np2var 函数将np 数据转化为tensor数据再通过ts2var函数return autograd.Variable(x).cuda()将数据加载到cuda 上
feature, label_prob = self.encoder(*seq, batch_frame)
将数据传入网络
def fit(self):
if self.restore_iter != 0:
self.load(self.restore_iter)#如果restore_iter 不等与0 即训练不是从0 开始就加载
self.encoder.train()
self.sample_type = 'random'
for param_group in self.optimizer.param_groups:
param_group['lr'] = self.lr
triplet_sampler = TripletSampler(self.train_source, self.batch_size)
train_loader = tordata.DataLoader(
dataset=self.train_source,
batch_sampler=triplet_sampler,
collate_fn=self.collate_fn,
num_workers=self.num_workers)
train_label_set = list(self.train_source.label_set)
train_label_set.sort()
_time1 = datetime.now()
for seq, view, seq_type, label, batch_frame in train_loader:
self.restore_iter += 1
self.optimizer.zero_grad()
for i in range(len(seq)):
seq[i] = self.np2var(seq[i]).float()
if batch_frame is not None:
batch_frame = self.np2var(batch_frame).int()
feature, label_prob = self.encoder(*seq, batch_frame)
target_label = [train_label_set.index(l) for l in label]
target_label = self.np2var(np.array(target_label)).long()
triplet_feature = feature.permute(1, 0, 2).contiguous()
triplet_label = target_label.unsqueeze(0).repeat(triplet_feature.size(0), 1)
(full_loss_metric, hard_loss_metric, mean_dist, full_loss_num
) = self.triplet_loss(triplet_feature, triplet_label)
if self.hard_or_full_trip == 'hard':
loss = hard_loss_metric.mean()
elif self.hard_or_full_trip == 'full':
loss = full_loss_metric.mean()
self.hard_loss_metric.append(hard_loss_metric.mean().data.cpu().numpy())
self.full_loss_metric.append(full_loss_metric.mean().data.cpu().numpy())
self.full_loss_num.append(full_loss_num.mean().data.cpu().numpy())
self.dist_list.append(mean_dist.mean().data.cpu().numpy())
if loss > 1e-9:
loss.backward()
self.optimizer.step()
if self.restore_iter % 1000 == 0:
print(datetime.now() - _time1)
_time1 = datetime.now()
if self.restore_iter % 100 == 0:
self.save()
print('iter {}:'.format(self.restore_iter), end='')
print(', hard_loss_metric={0:.8f}'.format(np.mean(self.hard_loss_metric)), end='')
print(', full_loss_metric={0:.8f}'.format(np.mean(self.full_loss_metric)), end='')
print(', full_loss_num={0:.8f}'.format(np.mean(self.full_loss_num)), end='')
self.mean_dist = np.mean(self.dist_list)
print(', mean_dist={0:.8f}'.format(self.mean_dist), end='')
print(', lr=%f' % self.optimizer.param_groups[0]['lr'], end='')
print(', hard or full=%r' % self.hard_or_full_trip)
sys.stdout.flush()
self.hard_loss_metric = []
self.full_loss_metric = []
self.full_loss_num = []
self.dist_list = []
# Visualization using t-SNE
# if self.restore_iter % 500 == 0:
# pca = TSNE(2)
# pca_feature = pca.fit_transform(feature.view(feature.size(0), -1).data.cpu().numpy())
# for i in range(self.P):
# plt.scatter(pca_feature[self.M * i:self.M * (i + 1), 0],
# pca_feature[self.M * i:self.M * (i + 1), 1], label=label[self.M * i])
#
# plt.show()
if self.restore_iter == self.total_iter:
break
5.1.1 load 函数
def load(self, restore_iter):
self.encoder.load_state_dict(torch.load(osp.join(
'checkpoint', self.model_name,
'{}-{:0>5}-encoder.ptm'.format(self.save_name, restore_iter))))
self.optimizer.load_state_dict(torch.load(osp.join(
'checkpoint', self.model_name,
'{}-{:0>5}-optimizer.ptm'.format(self.save_name, restore_iter))))
5.1.2 Model 类的初始化函数 __init__
在初始化函数中定义了很多变量
def __init__(self,
hidden_dim,
lr,
hard_or_full_trip,
margin,
num_workers,
batch_size,
restore_iter,
total_iter,
save_name,
train_pid_num,
frame_num,
model_name,
train_source,
test_source,
img_size=64):
self.save_name = save_name
self.train_pid_num = train_pid_num
self.train_source = train_source
self.test_source = test_source
self.hidden_dim = hidden_dim
self.lr = lr
self.hard_or_full_trip = hard_or_full_trip
self.margin = margin
self.frame_num = frame_num
self.num_workers = num_workers
self.batch_size = batch_size
self.model_name = model_name
self.P, self.M = batch_size
self.restore_iter = restore_iter
self.total_iter = total_iter
self.img_size = img_size
self.encoder = SetNet(self.hidden_dim).float()
self.encoder = nn.DataParallel(self.encoder)
self.triplet_loss = TripletLoss(self.P * self.M, self.hard_or_full_trip, self.margin).float()
self.triplet_loss = nn.DataParallel(self.triplet_loss)
self.encoder.cuda()
self.triplet_loss.cuda()
self.optimizer = optim.Adam([
{'params': self.encoder.parameters()},
], lr=self.lr)
self.hard_loss_metric = []
self.full_loss_metric = []
self.full_loss_num = []
self.dist_list = []
self.mean_dist = 0.01
self.sample_type = 'all'
6. Model 类所有代码
import math
import os
import os.path as osp
import random
import sys
from datetime import datetime
import numpy as np
import torch
import torch.nn as nn
import torch.autograd as autograd
import torch.optim as optim
import torch.utils.data as tordata
from .network import TripletLoss, SetNet
from .utils import TripletSampler
class Model:
def __init__(self,
hidden_dim,
lr,
hard_or_full_trip,
margin,
num_workers,
batch_size,
restore_iter,
total_iter,
save_name,
train_pid_num,
frame_num,
model_name,
train_source,
test_source,
img_size=64):
self.save_name = save_name
self.train_pid_num = train_pid_num
self.train_source = train_source
self.test_source = test_source
self.hidden_dim = hidden_dim
self.lr = lr
self.hard_or_full_trip = hard_or_full_trip
self.margin = margin
self.frame_num = frame_num
self.num_workers = num_workers
self.batch_size = batch_size
self.model_name = model_name
self.P, self.M = batch_size
self.restore_iter = restore_iter
self.total_iter = total_iter
self.img_size = img_size
self.encoder = SetNet(self.hidden_dim).float()
self.encoder = nn.DataParallel(self.encoder)
self.triplet_loss = TripletLoss(self.P * self.M, self.hard_or_full_trip, self.margin).float()
self.triplet_loss = nn.DataParallel(self.triplet_loss)
self.encoder.cuda()
self.triplet_loss.cuda()
self.optimizer = optim.Adam([
{'params': self.encoder.parameters()},
], lr=self.lr)
self.hard_loss_metric = []
self.full_loss_metric = []
self.full_loss_num = []
self.dist_list = []
self.mean_dist = 0.01
self.sample_type = 'all'
def collate_fn(self, batch):
batch_size = len(batch)
feature_num = len(batch[0][0])
seqs = [batch[i][0] for i in range(batch_size)]
frame_sets = [batch[i][1] for i in range(batch_size)]
view = [batch[i][2] for i in range(batch_size)]
seq_type = [batch[i][3] for i in range(batch_size)]
label = [batch[i][4] for i in range(batch_size)]
batch = [seqs, view, seq_type, label, None]
def select_frame(index):
sample = seqs[index]
frame_set = frame_sets[index]
if self.sample_type == 'random':
frame_id_list = random.choices(frame_set, k=self.frame_num)
_ = [feature.loc[frame_id_list].values for feature in sample]
else:
_ = [feature.values for feature in sample]
return _
seqs = list(map(select_frame, range(len(seqs))))
if self.sample_type == 'random':
seqs = [np.asarray([seqs[i][j] for i in range(batch_size)]) for j in range(feature_num)]
else:
gpu_num = min(torch.cuda.device_count(), batch_size)
batch_per_gpu = math.ceil(batch_size / gpu_num)
batch_frames = [[
len(frame_sets[i])
for i in range(batch_per_gpu * _, batch_per_gpu * (_ + 1))
if i < batch_size
] for _ in range(gpu_num)]
if len(batch_frames[-1]) != batch_per_gpu:
for _ in range(batch_per_gpu - len(batch_frames[-1])):
batch_frames[-1].append(0)
max_sum_frame = np.max([np.sum(batch_frames[_]) for _ in range(gpu_num)])
seqs = [[
np.concatenate([
seqs[i][j]
for i in range(batch_per_gpu * _, batch_per_gpu * (_ + 1))
if i < batch_size
], 0) for _ in range(gpu_num)]
for j in range(feature_num)]
seqs = [np.asarray([
np.pad(seqs[j][_],
((0, max_sum_frame - seqs[j][_].shape[0]), (0, 0), (0, 0)),
'constant',
constant_values=0)
for _ in range(gpu_num)])
for j in range(feature_num)]
batch[4] = np.asarray(batch_frames)
batch[0] = seqs
return batch
def fit(self):
if self.restore_iter != 0:
self.load(self.restore_iter)
self.encoder.train()
self.sample_type = 'random'
for param_group in self.optimizer.param_groups:
param_group['lr'] = self.lr
triplet_sampler = TripletSampler(self.train_source, self.batch_size)
train_loader = tordata.DataLoader(
dataset=self.train_source,
batch_sampler=triplet_sampler,
collate_fn=self.collate_fn,
num_workers=self.num_workers)
train_label_set = list(self.train_source.label_set)
train_label_set.sort()
_time1 = datetime.now()
for seq, view, seq_type, label, batch_frame in train_loader:
self.restore_iter += 1
self.optimizer.zero_grad()
for i in range(len(seq)):
seq[i] = self.np2var(seq[i]).float()
if batch_frame is not None:
batch_frame = self.np2var(batch_frame).int()
feature, label_prob = self.encoder(*seq, batch_frame)
target_label = [train_label_set.index(l) for l in label]
target_label = self.np2var(np.array(target_label)).long()
triplet_feature = feature.permute(1, 0, 2).contiguous()
triplet_label = target_label.unsqueeze(0).repeat(triplet_feature.size(0), 1)
(full_loss_metric, hard_loss_metric, mean_dist, full_loss_num
) = self.triplet_loss(triplet_feature, triplet_label)
if self.hard_or_full_trip == 'hard':
loss = hard_loss_metric.mean()
elif self.hard_or_full_trip == 'full':
loss = full_loss_metric.mean()
self.hard_loss_metric.append(hard_loss_metric.mean().data.cpu().numpy())
self.full_loss_metric.append(full_loss_metric.mean().data.cpu().numpy())
self.full_loss_num.append(full_loss_num.mean().data.cpu().numpy())
self.dist_list.append(mean_dist.mean().data.cpu().numpy())
if loss > 1e-9:
loss.backward()
self.optimizer.step()
if self.restore_iter % 1000 == 0:
print(datetime.now() - _time1)
_time1 = datetime.now()
if self.restore_iter % 100 == 0:
self.save()
print('iter {}:'.format(self.restore_iter), end='')
print(', hard_loss_metric={0:.8f}'.format(np.mean(self.hard_loss_metric)), end='')
print(', full_loss_metric={0:.8f}'.format(np.mean(self.full_loss_metric)), end='')
print(', full_loss_num={0:.8f}'.format(np.mean(self.full_loss_num)), end='')
self.mean_dist = np.mean(self.dist_list)
print(', mean_dist={0:.8f}'.format(self.mean_dist), end='')
print(', lr=%f' % self.optimizer.param_groups[0]['lr'], end='')
print(', hard or full=%r' % self.hard_or_full_trip)
sys.stdout.flush()
self.hard_loss_metric = []
self.full_loss_metric = []
self.full_loss_num = []
self.dist_list = []
# Visualization using t-SNE
# if self.restore_iter % 500 == 0:
# pca = TSNE(2)
# pca_feature = pca.fit_transform(feature.view(feature.size(0), -1).data.cpu().numpy())
# for i in range(self.P):
# plt.scatter(pca_feature[self.M * i:self.M * (i + 1), 0],
# pca_feature[self.M * i:self.M * (i + 1), 1], label=label[self.M * i])
#
# plt.show()
if self.restore_iter == self.total_iter:
break
def ts2var(self, x):
return autograd.Variable(x).cuda()
def np2var(self, x):
return self.ts2var(torch.from_numpy(x))
def transform(self, flag, batch_size=1):
self.encoder.eval()
source = self.test_source if flag == 'test' else self.train_source
self.sample_type = 'all'
data_loader = tordata.DataLoader(
dataset=source,
batch_size=batch_size,
sampler=tordata.sampler.SequentialSampler(source),
collate_fn=self.collate_fn,
num_workers=self.num_workers)
feature_list = list()
view_list = list()
seq_type_list = list()
label_list = list()
for i, x in enumerate(data_loader):
seq, view, seq_type, label, batch_frame = x
for j in range(len(seq)):
seq[j] = self.np2var(seq[j]).float()
if batch_frame is not None:
batch_frame = self.np2var(batch_frame).int()
# print(batch_frame, np.sum(batch_frame))
feature, _ = self.encoder(*seq, batch_frame)
n, num_bin, _ = feature.size()
feature_list.append(feature.view(n, -1).data.cpu().numpy())
view_list += view
seq_type_list += seq_type
label_list += label
return np.concatenate(feature_list, 0), view_list, seq_type_list, label_list
def save(self):
os.makedirs(osp.join('checkpoint', self.model_name), exist_ok=True)
torch.save(self.encoder.state_dict(),
osp.join('checkpoint', self.model_name,
'{}-{:0>5}-encoder.ptm'.format(
self.save_name, self.restore_iter)))
torch.save(self.optimizer.state_dict(),
osp.join('checkpoint', self.model_name,
'{}-{:0>5}-optimizer.ptm'.format(
self.save_name, self.restore_iter)))
# restore_iter: iteration index of the checkpoint to load
def load(self, restore_iter):
self.encoder.load_state_dict(torch.load(osp.join(
'checkpoint', self.model_name,
'{}-{:0>5}-encoder.ptm'.format(self.save_name, restore_iter))))
self.optimizer.load_state_dict(torch.load(osp.join(
'checkpoint', self.model_name,
'{}-{:0>5}-optimizer.ptm'.format(self.save_name, restore_iter))))
7.小结
今天看了一下午+一晚上的代码,理解了很多之前不懂得知识点,具体的损失函数的计算还没写完等以后有时间再写吧。从理解TripletSampler类 理解batch_size=(8,16)到底是什么意思,到collate_fn函数理解输出的batch 是怎样打包的,是如何用函数实现在所有帧中固定取30帧的,再到理解SeNet 网络架构如何实现,fit函数是怎么训练模型 加载数据,有些python pytorch 的用法还是要多熟悉 多了解,逐渐形成自己的心理表征,理解代码的心理表征,耐心真的是最重要的品质,需要一点点 一行一行的去理解。