随机采样器
(IDM-main/idm/utils/data/sampler.py)
from __future__ import absolute_import
from collections import defaultdict
import numpy as np
import random
import torch
from torch.utils.data.sampler import Sampler
def No_index(a, b):
assert isinstance(a, list)
return [i for i, j in enumerate(a) if j != b]
class RandomIdentitySampler(Sampler):
def __init__(self, data_source, num_instances):
self.data_source = data_source # dataset
self.num_instances = num_instances # 每个id的采样数
# 字典,value为一个列表,用于存储行人ID与对应数据图片的序号{pid:[index1,index2,...]}
self.index_dic = defaultdict(list)
for index, (_, pid, _) in enumerate(data_source):
self.index_dic[pid].append(index) # 将该行人ID对应的数据图片序号加入字典
self.pids = list(self.index_dic.keys()) # 获取行人ID列表
self.num_samples = len(self.pids)
def __len__(self):
# 举例:样本长度3004=训练集行人ID数量(751)×每个行人取样四张图片(4)
return self.num_samples * self.num_instances
def __iter__(self):
indices = torch.randperm(self.num_samples).tolist() # 打乱行人ID
ret = [] # result列表用于存储采样数据图片的序号
# 对每个id挑选num_instances 张图
for i in indices:
pid = self.pids[i]
t = self.index_dic[pid] # 当前属于该pid的所有图片列表
# 挑出num_instances 张图
if len(t) >= self.num_instances:
t = np.random.choice(t, size=self.num_instances, replace=False)
else:
t = np.random.choice(t, size=self.num_instances, replace=True) # 需要重复采样
ret.extend(t) # 将采样后的图片加入结果列表
return iter(ret) # 返回result的迭代器
class RandomMultipleGallerySampler(Sampler):
def __init__(self, data_source, num_instances=4):
self.data_source = data_source
self.index_pid = defaultdict(int)
self.pid_cam = defaultdict(list)
self.pid_index = defaultdict(list)
self.num_instances = num_instances
for index, (_, pid, cam) in enumerate(data_source):
if (pid<0): continue
self.index_pid[index] = pid
self.pid_cam[pid].append(cam)
self.pid_index[pid].append(index)
self.pids = list(self.pid_index.keys())
self.num_samples = len(self.pids)
def __len__(self):
return self.num_samples * self.num_instances
def __iter__(self):
indices = torch.randperm(len(self.pids)).tolist()
ret = []
for kid in indices:
i = random.choice(self.pid_index[self.pids[kid]])
_, i_pid, i_cam = self.data_source[i]
ret.append(i)
pid_i = self.index_pid[i]
cams = self.pid_cam[pid_i]
index = self.pid_index[pid_i]
select_cams = No_index(cams, i_cam)
if select_cams:
if len(select_cams) >= self.num_instances:
cam_indexes = np.random.choice(select_cams, size=self.num_instances-1, replace=False)
else:
cam_indexes = np.random.choice(select_cams, size=self.num_instances-1, replace=True)
for kk in cam_indexes:
ret.append(index[kk])
else:
select_indexes = No_index(index, i)
if (not select_indexes): continue
if len(select_indexes) >= self.num_instances:
ind_indexes = np.random.choice(select_indexes, size=self.num_instances-1, replace=False)
else:
ind_indexes = np.random.choice(select_indexes, size=self.num_instances-1, replace=True)
for kk in ind_indexes:
ret.append(index[kk])
return iter(ret)