昇腾平台模型训练
export nnUNet_raw_data_base="/home/ma-user/work/nnunet/nnUNet-nnunetv1/nnUNetFrame/DATASET/nnUNet_raw"
export nnUNet_preprocessed="/home/ma-user/work/nnunet/nnUNet-nnunetv1/nnUNetFrame/DATASET/nnUNet_preprocessed"
export RESULTS_FOLDER="/home/ma-user/work/nnunet/nnUNet-nnunetv1/nnUNetFrame/DATASET/nnUNet_trained_models"
nnUNet_convert_decathlon_task -i /home/ma-user/work/nnunet/nnUNet-nnunetv1/nnUNetFrame/DATASET/nnUNet_raw/nnUNet_raw_data/Task02_BraTS2021
nnUNet_plan_and_preprocess -t 2 --verify_dataset_integrity
nnUNet_train 3d_fullres nnUNetTrainerV2 1 0 --npz
nnUNet_train 3d_fullres nnUNetTrainer 1 0 --npz
nnUNet_train 2d nnUNetTrainerV2 501 0
# 还未安装隐藏层,记得安装
# 后台跑程序
nohup nnUNet_train 2d nnUNetTrainerV2 501 0 > log_8_24_test.txt
代码迁移
# 进入工具箱
cd /usr/local/Ascend/ascend-toolkit/latest/tools/ms_fmk_transplt/
# 执行python文件转换脚本
python ms_fmk_transplt.py -i /home/ma-user/work/nnunet/nnUNet-nnunetv1/nnunet/ -o /home/ma-user/work/nnunet/nnUNet-nnunetv1/nnunet_ascend/ -v 1.11.0
python ms_fmk_transplt.py -i /home/ma-user/work/nnunet/nnUNet-nnunetv1/nnunet/training/ -o /home/ma-user/work/nnunet/nnUNet-nnunetv1/nnunet_ascend/ -v 1.11.0
# 查看npu占用率
npu-smi info
# 结束进程
pkill -9 python
nnUNet_predict -i /home/ma-user/work/nnunet/nnUNet-nnunetv1/nnUNetFrame/DATASET/nnUNet_raw/nnUNet_raw_data/Task002_BraTS2021/imagesTr/ -o /home/ma-user/work/nnunet/nnUNet-nnunetv1/nnUNetFrame/DATASET/nnUNet_raw/nnUNet_raw_data/Task002_BraTS2021/inferTs/ -t 501 -m 2d -f 0
# pytorch2mindspore代码迁移
cd /usr/local/Ascend/ascend-toolkit/latest/tools/x2mindspore
./run_x2mindspore.sh -i /home/ma-user/work/nnunet_ascend/nnUNet-nnunetv1_msft/ -o /home/ma-user/work/nnunet_ascend/ -f pytorch
pytorch2.0
# 进入pytorch环境
source activate /home/ma-user/anaconda3/envs/pytorch_1_11
# 进入工具箱
cd /usr/local/Ascend/ascend-toolkit/latest/tools/ms_fmk_transplt/
# 执行python文件转换脚本
python ms_fmk_transplt.py -i /home/ma-user/work/nnUNet-nnunetv1/ -o /home/ma-user/work/nnunet_ascend/
# 打包迁移好的文件夹
tar -zcvf /home/ma-user/work/nnunet/nnUNet-nnunetv1_msft.tar /home/ma-user/work/nnunet/nnUNet-nnunetv1_msft/
# 解压文件夹
tar -zxvf nnunet_msft.tar
tar -zxvf nnUNet-nnunetv1_ascend.tar
tar -xvf BraTS2021_Data.tar
# 运行错命令需要中值tar
pkill tar
# 规范数据格式,需要修改代码路径
python Task500_BraTS_2021.py
# 数据集转换
nnUNet_convert_decathlon_task -i /home/ma-user/work/nnunet/nnUNet-nnunetv1/nnUNetFrame/DATASET/nnUNet_raw/nnUNet_raw_data/Task01_BraTS2021
报错
Failed to interpret file '/home/ma-user/work/nnunet/nnUNet-nnunetv1/nnUNetFrame/DATASET/nnUNet_preprocessed/Task001_BraTS2021/nnUNetData_plans_v2.1_stage0/BraTS2021_01351.npy' as a pickle
Traceback (most recent call last):
File "/home/ma-user/anaconda3/envs/Pytorch-1.11.0/lib/python3.7/site-packages/numpy/lib/npyio.py", line 448, in load
return pickle.load(fid, **pickle_kwargs)
EOFError: Ran out of input
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/ma-user/anaconda3/envs/Pytorch-1.11.0/lib/python3.7/site-packages/batchgenerators/dataloading/multi_threaded_augmenter.py", line 46, in producer
item = next(data_loader)
File "/home/ma-user/anaconda3/envs/Pytorch-1.11.0/lib/python3.7/site-packages/batchgenerators/dataloading/data_loader.py", line 126, in __next__
return self.generate_train_batch()
File "/home/ma-user/work/nnunet/nnUNet-nnunetv1/nnunet/training/dataloading/dataset_loading.py", line 245, in generate_train_batch
case_all_data = np.load(self._data[i]['data_file'][:-4] + ".npy", self.memmap_mode, allow_pickle=True)
File "/home/ma-user/anaconda3/envs/Pytorch-1.11.0/lib/python3.7/site-packages/numpy/lib/npyio.py", line 451, in load
"Failed to interpret file %s as a pickle" % repr(file)) from e
OSError: Failed to interpret file '/home/ma-user/work/nnunet/nnUNet-nnunetv1/nnUNetFrame/DATASET/nnUNet_preprocessed/Task001_BraTS2021/nnUNetData_plans_v2.1_stage0/BraTS2021_01351.npy' as a pickle
Traceback (most recent call last):
File "/home/ma-user/anaconda3/envs/Pytorch-1.11.0/bin/nnUNet_train", line 33, in <module>
sys.exit(load_entry_point('nnunet', 'console_scripts', 'nnUNet_train')())
File "/home/ma-user/work/nnunet/nnUNet-nnunetv1/nnunet/run/run_training.py", line 182, in main
trainer.run_training()
File "/home/ma-user/work/nnunet/nnUNet-nnunetv1/nnunet/training/network_training/nnUNetTrainerV2.py", line 440, in run_training
ret = super().run_training()
File "/home/ma-user/work/nnunet/nnUNet-nnunetv1/nnunet/training/network_training/nnUNetTrainer.py", line 318, in run_training
super(nnUNetTrainer, self).run_training()
File "/home/ma-user/work/nnunet/nnUNet-nnunetv1/nnunet/training/network_training/network_trainer.py", line 418, in run_training
_ = self.tr_gen.next()
File "/home/ma-user/anaconda3/envs/Pytorch-1.11.0/lib/python3.7/site-packages/batchgenerators/dataloading/multi_threaded_augmenter.py", line 181, in next
return self.__next__()
File "/home/ma-user/anaconda3/envs/Pytorch-1.11.0/lib/python3.7/site-packages/batchgenerators/dataloading/multi_threaded_augmenter.py", line 204, in __next__
item = self.__get_next_item()
File "/home/ma-user/anaconda3/envs/Pytorch-1.11.0/lib/python3.7/site-packages/batchgenerators/dataloading/multi_threaded_augmenter.py", line 189, in __get_next_item
raise RuntimeError("One or more background workers are no longer alive. Exiting. Please check the "
RuntimeError: One or more background workers are no longer alive. Exiting. Please check the print statements above for the actual error message
THPModule_npu_shutdown success.
未修改的代码报错
Exception in background worker 4:
Cannot load file containing pickled data when allow_pickle=False
Traceback (most recent call last):
File "/home/ma-user/anaconda3/envs/Pytorch-1.11.0/lib/python3.7/site-packages/batchgenerators/dataloading/multi_threaded_augmenter.py", line 46, in producer
item = next(data_loader)
File "/home/ma-user/anaconda3/envs/Pytorch-1.11.0/lib/python3.7/site-packages/batchgenerators/dataloading/data_loader.py", line 126, in __next__
return self.generate_train_batch()
File "/home/ma-user/work/nnunet/nnUNet-nnunetv1/nnunet/training/dataloading/dataset_loading.py", line 246, in generate_train_batch
case_all_data = np.load(self._data[i]['data_file'][:-4] + ".npy", self.memmap_mode)
File "/home/ma-user/anaconda3/envs/Pytorch-1.11.0/lib/python3.7/site-packages/numpy/lib/npyio.py", line 445, in load
raise ValueError("Cannot load file containing pickled data "
ValueError: Cannot load file containing pickled data when allow_pickle=False
Traceback (most recent call last):
File "/home/ma-user/anaconda3/envs/Pytorch-1.11.0/bin/nnUNet_train", line 33, in <module>
sys.exit(load_entry_point('nnunet', 'console_scripts', 'nnUNet_train')())
File "/home/ma-user/work/nnunet/nnUNet-nnunetv1/nnunet/run/run_training.py", line 182, in main
trainer.run_training()
File "/home/ma-user/work/nnunet/nnUNet-nnunetv1/nnunet/training/network_training/nnUNetTrainerV2.py", line 440, in run_training
ret = super().run_training()
File "/home/ma-user/work/nnunet/nnUNet-nnunetv1/nnunet/training/network_training/nnUNetTrainer.py", line 318, in run_training
super(nnUNetTrainer, self).run_training()
File "/home/ma-user/work/nnunet/nnUNet-nnunetv1/nnunet/training/network_training/network_trainer.py", line 418, in run_training
_ = self.tr_gen.next()
File "/home/ma-user/anaconda3/envs/Pytorch-1.11.0/lib/python3.7/site-packages/batchgenerators/dataloading/multi_threaded_augmenter.py", line 181, in next
return self.__next__()
File "/home/ma-user/anaconda3/envs/Pytorch-1.11.0/lib/python3.7/site-packages/batchgenerators/dataloading/multi_threaded_augmenter.py", line 204, in __next__
item = self.__get_next_item()
File "/home/ma-user/anaconda3/envs/Pytorch-1.11.0/lib/python3.7/site-packages/batchgenerators/dataloading/multi_threaded_augmenter.py", line 189, in __get_next_item
raise RuntimeError("One or more background workers are no longer alive. Exiting. Please check the "
RuntimeError: One or more background workers are no longer alive. Exiting. Please check the print statements above for the actual error message
THPModule_npu_shutdown success.
问题
预处理步骤出现问题,有npy任务出现大小为0的情况