pyskl官代码实在linux环境下运行的,在issues里作者也没有提供windows的运行修改,但是给出了pytorch分布式参考修改方法,本文记录下修改过程:
代码下载
PYSKL Release v0.2:https://github.com/kennymckormick/pyskl/releases/tag/v0.2
按官方说明安装pyskl:
git clone https://github.com/kennymckormick/pyskl.git
cd pyskl
# This command runs well with conda 22.9.0, if you are running an early conda version and got some errors, try to update your conda first
conda env create -f pyskl.yaml
conda activate pyskl
pip install -e .
下载需要的包
测试成功的安装包版本如下:
修改pycharm本地运行
在config前面加‘--’
def parse_args():
parser = argparse.ArgumentParser(description='Train a recognizer')
parser.add_argument('--config',default='../configs/posec3d/c3d_light_gym/joint.py', help='train config file path')
parser.add_argument(
'--validate',
action='store_true',
help='whether to evaluate the checkpoint during training')
parser.add_argument(
'--test-last',
action='store_true',
help='whether to test the checkpoint after training')
parser.add_argument(
'--test-best',
action='store_true',
help='whether to test the best checkpoint (if applicable) after training')
parser.add_argument('--seed', type=int, default=None, help='random seed')
parser.add_argument(
'--deterministic',
action='store_true',
help='whether to set deterministic options for CUDNN backend.')
parser.add_argument(
'--launcher',
choices=['pytorch', 'slurm'],
default='pytorch',
help='job launcher')
# parser.add_argument('--local_rank', type=int, default=0)
args = parser.parse_args()
# if 'LOCAL_RANK' not in os.environ:
# os.environ['LOCAL_RANK'] = str(args.local_rank)
return args
修改分布式训练代码
tool/train.py
修改注释的代码:
# 第47行,注释
parser.add_argument('--local_rank', type=int, default=0)
# 第49、50行,注释
if 'LOCAL_RANK' not in os.environ:
os.environ['LOCAL_RANK'] = str(args.local_rank)
# 第70-74行,注释
if not hasattr(cfg, 'dist_params'):
cfg.dist_params = dict(backend='nccl')
init_dist(args.launcher, **cfg.dist_params)
rank, world_size = get_dist_info()
# 修改第75行
# cfg.gpu_ids = range(world_size)
cfg.gpu_ids =[0]
#修改第134行
# if rank == 0 and memcached:
if memcached:
#修改第153行
# if rank == 0 and memcached:
if memcached:
# 注释所有dist.barrier(),148行,151行
下面是修改完成后的代码,替换pyskl中的train.py即可
# Copyright (c) OpenMMLab. All rights reserved.
# flake8: noqa: E722
import argparse
import os
import os.path as osp
import time
import mmcv
import torch
import torch.distributed as dist
from mmcv import Config
from mmcv.runner import get_dist_info, init_dist, set_random_seed
from mmcv.utils import get_git_hash
from pyskl import __version__
from pyskl.apis import init_random_seed, train_model
from pyskl.datasets import build_dataset
from pyskl.models import build_model
from pyskl.utils import collect_env, get_root_logger, mc_off, mc_on, test_port
def parse_args():
parser = argparse.ArgumentParser(description='Train a recognizer')
parser.add_argument('--config',default='../configs/posec3d/c3d_light_gym/joint.py', help='train config file path')
parser.add_argument(
'--validate',
action='store_true',
help='whether to evaluate the checkpoint during training')
parser.add_argument(
'--test-last',
action='store_true',
help='whether to test the checkpoint after training')
parser.add_argument(
'--test-best',
action='store_true',
help='whether to test the best checkpoint (if applicable) after training')
parser.add_argument('--seed', type=int, default=None, help='random seed')
parser.add_argument(
'--deterministic',
action='store_true',
help='whether to set deterministic options for CUDNN backend.')
parser.add_argument(
'--launcher',
choices=['pytorch', 'slurm'],
default='pytorch',
help='job launcher')
# parser.add_argument('--local_rank', type=int, default=0)
args = parser.parse_args()
# if 'LOCAL_RANK' not in os.environ:
# os.environ['LOCAL_RANK'] = str(args.local_rank)
return args
def main():
args = parse_args()
cfg = Config.fromfile(args.config)
# set cudnn_benchmark
if cfg.get('cudnn_benchmark', False):
torch.backends.cudnn.benchmark = True
# work_dir is determined in this priority:
# config file > default (base filename)
if cfg.get('work_dir', None) is None:
# use config filename as default work_dir if cfg.work_dir is None
cfg.work_dir = osp.join('./work_dirs', osp.splitext(osp.basename(args.config))[0])
# if not hasattr(cfg, 'dist_params'):
# cfg.dist_params = dict(backend='nccl')
#
# init_dist(args.launcher, **cfg.dist_params)
# rank, world_size = get_dist_info()
cfg.gpu_ids =[0]
auto_resume = cfg.get('auto_resume', True)
if auto_resume and cfg.get('resume_from', None) is None:
resume_pth = osp.join(cfg.work_dir, 'latest.pth')
if osp.exists(resume_pth):
cfg.resume_from = resume_pth
# create work_dir
mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
# dump config
cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config)))
# init logger before other steps
timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
log_file = osp.join(cfg.work_dir, f'{timestamp}.log')
logger = get_root_logger(log_file=log_file, log_level=cfg.log_level)
# init the meta dict to record some important information such as
# environment info and seed, which will be logged
meta = dict()
# log env info
env_info_dict = collect_env()
env_info = '\n'.join([f'{k}: {v}' for k, v in env_info_dict.items()])
dash_line = '-' * 60 + '\n'
logger.info('Environment info:\n' + dash_line + env_info + '\n' +
dash_line)
meta['env_info'] = env_info
# log some basic info
logger.info(f'Config: {cfg.pretty_text}')
# set random seeds
seed = init_random_seed(args.seed)
logger.info(f'Set random seed to {seed}, deterministic: {args.deterministic}')
set_random_seed(seed, deterministic=args.deterministic)
cfg.seed = seed
meta['seed'] = seed
meta['config_name'] = osp.basename(args.config)
meta['work_dir'] = osp.basename(cfg.work_dir.rstrip('/\\'))
model = build_model(cfg.model)
datasets = [build_dataset(cfg.data.train)]
cfg.workflow = cfg.get('workflow', [('train', 1)])
assert len(cfg.workflow) == 1
if cfg.checkpoint_config is not None:
# save pyskl version, config file content and class names in
# checkpoints as meta data
cfg.checkpoint_config.meta = dict(
pyskl_version=__version__ + get_git_hash(digits=7),
config=cfg.pretty_text)
test_option = dict(test_last=args.test_last, test_best=args.test_best)
default_mc_cfg = ('localhost', 22077)
memcached = cfg.get('memcached', False)
# if rank == 0 and memcached:
if memcached:
# mc_list is a list of pickle files you want to cache in memory.
# Basically, each pickle file is a dictionary.
mc_cfg = cfg.get('mc_cfg', default_mc_cfg)
assert isinstance(mc_cfg, tuple) and mc_cfg[0] == 'localhost'
if not test_port(mc_cfg[0], mc_cfg[1]):
mc_on(port=mc_cfg[1], launcher=args.launcher)
retry = 3
while not test_port(mc_cfg[0], mc_cfg[1]) and retry > 0:
time.sleep(5)
retry -= 1
assert retry >= 0, 'Failed to launch memcached. '
# dist.barrier()
train_model(model, datasets, cfg, validate=args.validate, test=test_option, timestamp=timestamp, meta=meta)
# dist.barrier()
# if rank == 0 and memcached:
if memcached:
mc_off()
if __name__ == '__main__':
main()
pyskl/apis/train.py
这个不是是train的主要代码,使用了mmcv的分布式训练代码,需要替换
# 第10行注释,替换
# from mmcv.parallel import MMDistributedDataParallel
from mmcv.parallel import MMDistributedDataParallel, MMDataParallel
# 第94-98注释,替换
# model = MMDistributedDataParallel(
# model.cuda(),
# device_ids=[torch.cuda.current_device()],
# broadcast_buffers=False,
# find_unused_parameters=fin
model = MMDataParallel(model.cuda())
# 注释第147行dist.barrier()
# dist.barrier()
完整代码:
# Copyright (c) OpenMMLab. All rights reserved.
import warnings
import numpy as np
from ..builder import PIPELINES
@PIPELINES.register_module()
class UniformSampleFrames:
"""Uniformly sample frames from the video.
To sample an n-frame clip from the video. UniformSampleFrames basically
divide the video into n segments of equal length and randomly sample one
frame from each segment. To make the testing results reproducible, a
random seed is set during testing, to make the sampling results
deterministic.
Required keys are "total_frames", "start_index" , added or modified keys
are "frame_inds", "clip_len", "frame_interval" and "num_clips".
Args:
clip_len (int): Frames of each sampled output clip.
num_clips (int): Number of clips to be sampled. Default: 1.
test_mode (bool): Store True when building test or validation dataset.
Default: False.
seed (int): The random seed used during test time. Default: 255.
"""
def __init__(self,
clip_len,
num_clips=1,
test_mode=False,
float_ok=False,
p_interval=1,
seed=255):
self.clip_len = clip_len
self.num_clips = num_clips
self.test_mode = test_mode
self.float_ok = float_ok
self.seed = seed
self.p_interval = p_interval
if not isinstance(p_interval, tuple):
self.p_interval = (p_interval, p_interval)
if self.float_ok:
warnings.warn('When float_ok == True, there will be no loop.')
def _get_train_clips(self, num_frames, clip_len):
"""Uniformly sample indices for training clips.
Args:
num_frames (int): The number of frames.
clip_len (int): The length of the clip.
"""
allinds = []
for clip_idx in range(self.num_clips):
old_num_frames = num_frames
pi = self.p_interval
ratio = np.random.rand() * (pi[1] - pi[0]) + pi[0]
num_frames = int(ratio * num_frames)
off = np.random.randint(old_num_frames - num_frames + 1)
if self.float_ok:
interval = (num_frames - 1) / clip_len
offsets = np.arange(clip_len) * interval
inds = np.random.rand(clip_len) * interval + offsets
inds = inds.astype(np.float32)
elif num_frames < clip_len:
start = np.random.randint(0, num_frames)
inds = np.arange(start, start + clip_len)
elif clip_len <= num_frames < 2 * clip_len:
basic = np.arange(clip_len)
inds = np.random.choice(
clip_len + 1, num_frames - clip_len, replace=False)
offset = np.zeros(clip_len + 1, dtype=np.int64)
offset[inds] = 1
offset = np.cumsum(offset)
inds = basic + offset[:-1]
else:
bids = np.array(
[i * num_frames // clip_len for i in range(clip_len + 1)])
bsize = np.diff(bids)
bst = bids[:clip_len]
offset = np.random.randint(bsize)
inds = bst + offset
inds = inds + off
num_frames = old_num_frames
allinds.append(inds)
return np.concatenate(allinds)
def _get_test_clips(self, num_frames, clip_len):
"""Uniformly sample indices for testing clips.
Args:
num_frames (int): The number of frames.
clip_len (int): The length of the clip.
"""
np.random.seed(self.seed)
if self.float_ok:
interval = (num_frames - 1) / clip_len
offsets = np.arange(clip_len) * interval
inds = np.concatenate([
np.random.rand(clip_len) * interval + offsets
for i in range(self.num_clips)
]).astype(np.float32)
all_inds = []
for i in range(self.num_clips):
old_num_frames = num_frames
pi = self.p_interval
ratio = np.random.rand() * (pi[1] - pi[0]) + pi[0]
num_frames = int(ratio * num_frames)
off = np.random.randint(old_num_frames - num_frames + 1)
if num_frames < clip_len:
start_ind = i if num_frames < self.num_clips else i * num_frames // self.num_clips
inds = np.arange(start_ind, start_ind + clip_len)
elif clip_len <= num_frames < clip_len * 2:
basic = np.arange(clip_len)
inds = np.random.choice(clip_len + 1, num_frames - clip_len, replace=False)
offset = np.zeros(clip_len + 1, dtype=int64)
offset[inds] = 1
offset = np.cumsum(offset)
inds = basic + offset[:-1]
else:
bids = np.array([i * num_frames // clip_len for i in range(clip_len + 1)])
bsize = np.diff(bids)
bst = bids[:clip_len]
offset = np.random.randint(bsize)
inds = bst + offset
all_inds.append(inds + off)
num_frames = old_num_frames
return np.concatenate(all_inds)
def __call__(self, results):
num_frames = results['total_frames']
if self.test_mode:
inds = self._get_test_clips(num_frames, self.clip_len)
else:
inds = self._get_train_clips(num_frames, self.clip_len)
inds = np.mod(inds, num_frames)
start_index = results['start_index']
inds = inds + start_index
if 'keypoint' in results:
kp = results['keypoint']
assert num_frames == kp.shape[1]
num_person = kp.shape[0]
num_persons = [num_person] * num_frames
for i in range(num_frames):
j = num_person - 1
while j >= 0 and np.all(np.abs(kp[j, i]) < 1e-5):
j -= 1
num_persons[i] = j + 1
transitional = [False] * num_frames
for i in range(1, num_frames - 1):
if num_persons[i] != num_persons[i - 1]:
transitional[i] = transitional[i - 1] = True
if num_persons[i] != num_persons[i + 1]:
transitional[i] = transitional[i + 1] = True
inds_int = inds.astype(int)
coeff = np.array([transitional[i] for i in inds_int])
inds = (coeff * inds_int + (1 - coeff) * inds).astype(np.float32)
results['frame_inds'] = inds if self.float_ok else inds.astype(int)
results['clip_len'] = self.clip_len
results['frame_interval'] = None
results['num_clips'] = self.num_clips
return results
def __repr__(self):
repr_str = (f'{self.__class__.__name__}('
f'clip_len={self.clip_len}, '
f'num_clips={self.num_clips}, '
f'test_mode={self.test_mode}, '
f'seed={self.seed})')
return repr_str
@PIPELINES.register_module()
class UniformSample(UniformSampleFrames):
pass
@PIPELINES.register_module()
class SampleFrames:
"""Sample frames from the video.
Required keys are "total_frames", "start_index" , added or modified keys
are "frame_inds", "frame_interval" and "num_clips".
Args:
clip_len (int): Frames of each sampled output clip.
frame_interval (int): Temporal interval of adjacent sampled frames.
Default: 1.
num_clips (int): Number of clips to be sampled. Default: 1.
temporal_jitter (bool): Whether to apply temporal jittering.
Default: False.
twice_sample (bool): Whether to use twice sample when testing.
If set to True, it will sample frames with and without fixed shift,
which is commonly used for testing in TSM model. Default: False.
out_of_bound_opt (str): The way to deal with out of bounds frame
indexes. Available options are 'loop', 'repeat_last'.
Default: 'loop'.
test_mode (bool): Store True when building test or validation dataset.
Default: False.
start_index (None): This argument is deprecated and moved to dataset
class (``BaseDataset``, ``VideoDatset``, ``RawframeDataset``, etc),
see this: https://github.com/open-mmlab/mmaction2/pull/89.
keep_tail_frames (bool): Whether to keep tail frames when sampling.
Default: False.
"""
def __init__(self,
clip_len,
frame_interval=1,
num_clips=1,
temporal_jitter=False,
twice_sample=False,
out_of_bound_opt='loop',
test_mode=False,
start_index=None,
keep_tail_frames=False):
self.clip_len = clip_len
self.frame_interval = frame_interval
self.num_clips = num_clips
self.temporal_jitter = temporal_jitter
self.twice_sample = twice_sample
self.out_of_bound_opt = out_of_bound_opt
self.test_mode = test_mode
self.keep_tail_frames = keep_tail_frames
assert self.out_of_bound_opt in ['loop', 'repeat_last']
if start_index is not None:
warnings.warn('No longer support "start_index" in "SampleFrames", '
'it should be set in dataset class, see this pr: '
'https://github.com/open-mmlab/mmaction2/pull/89')
def _get_train_clips(self, num_frames):
"""Get clip offsets in train mode.
It will calculate the average interval for selected frames,
and randomly shift them within offsets between [0, avg_interval].
If the total number of frames is smaller than clips num or origin
frames length, it will return all zero indices.
Args:
num_frames (int): Total number of frame in the video.
Returns:
np.ndarray: Sampled frame indices in train mode.
"""
ori_clip_len = self.clip_len * self.frame_interval
if self.keep_tail_frames:
avg_interval = (num_frames - ori_clip_len + 1) / float(
self.num_clips)
if num_frames > ori_clip_len - 1:
base_offsets = np.arange(self.num_clips) * avg_interval
clip_offsets = (base_offsets + np.random.uniform(
0, avg_interval, self.num_clips)).astype(int)
else:
clip_offsets = np.zeros((self.num_clips, ), dtype=int)
else:
avg_interval = (num_frames - ori_clip_len + 1) // self.num_clips
if avg_interval > 0:
base_offsets = np.arange(self.num_clips) * avg_interval
clip_offsets = base_offsets + np.random.randint(
avg_interval, size=self.num_clips)
elif num_frames > max(self.num_clips, ori_clip_len):
clip_offsets = np.sort(
np.random.randint(
num_frames - ori_clip_len + 1, size=self.num_clips))
elif avg_interval == 0:
ratio = (num_frames - ori_clip_len + 1.0) / self.num_clips
clip_offsets = np.around(np.arange(self.num_clips) * ratio)
else:
clip_offsets = np.zeros((self.num_clips, ), dtype=int)
return clip_offsets
def _get_test_clips(self, num_frames):
"""Get clip offsets in test mode.
Calculate the average interval for selected frames, and shift them
fixedly by avg_interval/2. If set twice_sample True, it will sample
frames together without fixed shift. If the total number of frames is
not enough, it will return all zero indices.
Args:
num_frames (int): Total number of frame in the video.
Returns:
np.ndarray: Sampled frame indices in test mode.
"""
ori_clip_len = self.clip_len * self.frame_interval
avg_interval = (num_frames - ori_clip_len + 1) / float(self.num_clips)
if num_frames > ori_clip_len - 1:
base_offsets = np.arange(self.num_clips) * avg_interval
clip_offsets = (base_offsets + avg_interval / 2.0).astype(int)
if self.twice_sample:
clip_offsets = np.concatenate([clip_offsets, base_offsets])
else:
clip_offsets = np.zeros((self.num_clips, ), dtype=int)
return clip_offsets
def _sample_clips(self, num_frames):
"""Choose clip offsets for the video in a given mode.
Args:
num_frames (int): Total number of frame in the video.
Returns:
np.ndarray: Sampled frame indices.
"""
if self.test_mode:
clip_offsets = self._get_test_clips(num_frames)
else:
clip_offsets = self._get_train_clips(num_frames)
return clip_offsets
def __call__(self, results):
"""Perform the SampleFrames loading.
Args:
results (dict): The resulting dict to be modified and passed
to the next transform in pipeline.
"""
total_frames = results['total_frames']
clip_offsets = self._sample_clips(total_frames)
frame_inds = clip_offsets[:, None] + np.arange(
self.clip_len)[None, :] * self.frame_interval
frame_inds = np.concatenate(frame_inds)
if self.temporal_jitter:
perframe_offsets = np.random.randint(
self.frame_interval, size=len(frame_inds))
frame_inds += perframe_offsets
frame_inds = frame_inds.reshape((-1, self.clip_len))
if self.out_of_bound_opt == 'loop':
frame_inds = np.mod(frame_inds, total_frames)
elif self.out_of_bound_opt == 'repeat_last':
safe_inds = frame_inds < total_frames
unsafe_inds = 1 - safe_inds
last_ind = np.max(safe_inds * frame_inds, axis=1)
new_inds = (safe_inds * frame_inds + (unsafe_inds.T * last_ind).T)
frame_inds = new_inds
else:
raise ValueError('Illegal out_of_bound option.')
start_index = results['start_index']
frame_inds = np.concatenate(frame_inds) + start_index
results['frame_inds'] = frame_inds.astype(int)
results['clip_len'] = self.clip_len
results['frame_interval'] = self.frame_interval
results['num_clips'] = self.num_clips
return results
def __repr__(self):
repr_str = (f'{self.__class__.__name__}('
f'clip_len={self.clip_len}, '
f'frame_interval={self.frame_interval}, '
f'num_clips={self.num_clips}, '
f'temporal_jitter={self.temporal_jitter}, '
f'twice_sample={self.twice_sample}, '
f'out_of_bound_opt={self.out_of_bound_opt}, '
f'test_mode={self.test_mode})')
return repr_str
重装yapf包
此时运行train.py可能会遇到
TypeError: FormatCode() got an unexpected keyword argument 'verify'
这是因为yapf包版本太新了,降低为0.40.1
pip uninstall yapf
pip install yapf==0.40.1 -i https://pypi.tuna.tsinghua.edu.cn/simple
修改mmcv-full1.5.0代码
安装mmcv-full1.5.0版本运行过程会遇到如下问题
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xd3 in position 0: invalid continuation byte
这是因为编码格式的问题,修改C:\ProgramData\Anaconda3\envs\pyskl\lib\site-packages\mmcv\utils\env.py文件
# 第91行env_info['MSVC'] = cc.decode(encoding).partition('\n')[0].strip()修改为
env_info['MSVC'] = cc.decode(encoding, 'ignore').partition('\n')[0].strip()
修改sample.py
由于numpy的原因,会存在版本冲突,np.int在最先的版本中改为了int,降低numpy版本会导致scipy冲突,因此建议将sample.py中所有的np.int替换为int,同时np.int64无需更改
训练: