1、准备环境
# 编写run docker脚本run_docker.sh
export MY_CONTAINER="GroundingDino"
num=`docker ps -a|grep "$MY_CONTAINER" | wc -l`
echo $num
echo $MY_CONTAINER
if [ 0 -eq $num ];then
docker run -dit \
--privileged=true \
--pid=host \
--net=host \
--shm-size 10g \
--ulimit memlock=-1 \
--device /dev/cambricon_dev0 \
--device /dev/cambricon_ctl \
--device /dev/cambricon_ipcm0 \
--name $MY_CONTAINER \
-v /usr/bin/cnmon:/usr/bin/cnmon \
yellow.hub.cambricon.com/cambricon_pytorch_container/cambricon_pytorch_container:v24.02.1-torch2.1.0-ubuntu22.04-py310
/bin/bash
else
docker start $MY_CONTAINER
docker exec --privileged=true -ti --workdir=/workspace $MY_CONTAINER /bin/bash
fi
下载代码、安装依赖
git clone https://github.com/longzw1997/Open-GroundingDino.git
python /torch/src/catch/tools/torch_gpu2mlu/torch_gpu2mlu.py -i Open-GroundingDino/
cd Open-GroundingDino_mlu
# 注释掉requirements.txt中的torch、torchvision、transformers,transformers需要转换安装
pip install -r requirements.txt
git clone -b v4.39.1 https://github.com/huggingface/transformers.git
python /torch/src/catch/tools/torch_gpu2mlu/torch_gpu2mlu.py -i transformers/
pip install -e ./transformers_mlu/
2、准备数据集和预训练模型
# 下载预训练模型
mkdir weights && cd weights
# groundingdino_swint_ogc.pth与bert-base-uncased
wget -q https://github.com/IDEA-Research/GroundingDINO/releases/download/v0.1.0-alpha/groundingdino_swint_ogc.pth
pip install -U huggingface_hub
export HF_ENDPOINT=https://hf-mirror.com
huggingface-cli download --resume-download google-bert/bert-base-uncased --local-dir bert-base-uncased
# 下载数据集
# 示例使用COCO2017,更多数据集参考https://github.com/longzw1997/Open-GroundingDino/blob/main/config/datasets_mixed_odvg.json
mkdir datasets && cd datasets
wget http://images.cocodataset.org/zips/train2017.zip
wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip
wget http://images.cocodataset.org/zips/val2017.zip
wget http://images.cocodataset.org/annotations/stuff_annotations_trainval2017.zip
wget http://images.cocodataset.org/zips/test2017.zip
wget http://images.cocodataset.org/annotations/image_info_test2017.zip
unzip train2017.zip
unzip val2017.zip
数据集转换ODVG格式
cd .tools
# 执行coco2odvg.py转换脚本
python coco2odvg.py --input /workspace/datasets/coco/annotations/instances_train2017.json --output /workspace/datasets/coco/annotations/coco2017_train_odvg.json
3、部分代码修改
vim /workspace_bf16/Open-GroundingDino_mlu/config/datasets_mixed_odvg.json
# datasets_mixed_odvg.json
{
"train": [
{
"root": "/workspace/datasets/coco/train2017",
"anno": "/workspace/datasets/coco/annotations/coco2017_train_odvg.jsonl",
"label_map": "/workspace/code/small-model/Open-GroundingDino_mlu/config/coco2017_label_map.json",
"dataset_mode": "odvg"
}
],
"val": [
{
"root": "/workspace/datasets/coco/val2017",
"anno": "config/instances_val2017.json",
"label_map": null,
"dataset_mode": "coco"
}
]
}
vim /workspace/code/small-model/Open-GroundingDino_mlu/models/GroundingDINO/ms_deform_attn.py
# 342行,注释掉
if torch.mlu.is_available() and value.is_mlu:
halffloat = False
if value.dtype == torch.float16:
halffloat = True
value = value.float()
sampling_locations = sampling_locations.float()
attention_weights = attention_weights.float()
output = MultiScaleDeformableAttnFunction.apply(
value,
spatial_shapes,
level_start_index,
sampling_locations,
attention_weights,
self.im2col_step,
)
if halffloat:
output = output.half()
# 361行 去掉else
output = multi_scale_deformable_attn_pytorch(
value, spatial_shapes, sampling_locations, attention_weights
)
4、运行训练
cd /workspace/code/small-model/Open-GroundingDino_mlu/
''' train_dist.sh
GPU_NUM=$1
CFG=$2
DATASETS=$3
OUTPUT_DIR=$4
NNODES=${NNODES:-1}
NODE_RANK=${NODE_RANK:-0}
PORT=${PORT:-29500}
MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
# Change ``pretrain_model_path`` to use a different pretrain.
# (e.g. GroundingDINO pretrain, DINO pretrain, Swin Transformer pretrain.)
# If you don't want to use any pretrained model, just ignore this parameter.
python -m torch.distributed.launch --nproc_per_node=${GPU_NUM} main.py \
--output_dir ${OUTPUT_DIR} \
-c ${CFG} \
--datasets ${DATASETS} \
--pretrain_model_path /workspace/code/small-model/Open-GroundingDino_mlu/weights/groundingdino_swint_ogc.pth \
--options text_encoder_type=/workspace/code/small-model/Open-GroundingDino_mlu/weights/bert-base-uncased
执行训练命令
bash train_dist.sh 2 /workspace/code/small-model/Open-GroundingDino_mlu/config/cfg_odvg.py /workspace/code/small-model/Open-GroundingDino_mlu/config/datasets_mixed_odvg.json ./logs
5、训练结果
Epoch: [0] [ 50/14786] eta: 11:00:19 lr: 0.000100 loss: 7.4870 (8.0138) loss_bbox: 0.1199 (0.1331) loss_bbox_0: 0.1364 (0.1460) loss_bbox_1: 0.1312 (0.1400) loss_bbox_2: 0.1352 (0.1364) loss_bbox_3: 0.1191 (0.1348) loss_bbox_4: 0.1164 (0.1347) loss_bbox_interm: 0.1383 (0.1567) loss_ce: 0.6004 (0.6628) loss_ce_0: 0.6271 (0.7087) loss_ce_1: 0.6154 (0.6794) loss_ce_2: 0.5807 (0.6706) loss_ce_3: 0.5743 (0.6616) loss_ce_4: 0.6049 (0.6594) loss_ce_interm: 0.6382 (0.7235) loss_giou: 0.2998 (0.3146) loss_giou_0: 0.3225 (0.3297) loss_giou_1: 0.3209 (0.3219) loss_giou_2: 0.3072 (0.3180) loss_giou_3: 0.3011 (0.3163) loss_giou_4: 0.2926 (0.3126) loss_giou_interm: 0.3378 (0.3530) loss_bbox_unscaled: 0.0240 (0.0266) loss_bbox_0_unscaled: 0.0273 (0.0292) loss_bbox_1_unscaled: 0.0262 (0.0280) loss_bbox_2_unscaled: 0.0270 (0.0273) loss_bbox_3_unscaled: 0.0238 (0.0270) loss_bbox_4_unscaled: 0.0233 (0.0269) loss_bbox_interm_unscaled: 0.0277 (0.0313) loss_ce_unscaled: 0.3002 (0.3314) loss_ce_0_unscaled: 0.3135 (0.3544) loss_ce_1_unscaled: 0.3077 (0.3397) loss_ce_2_unscaled: 0.2903 (0.3353) loss_ce_3_unscaled: 0.2872 (0.3308) loss_ce_4_unscaled: 0.3024 (0.3297) loss_ce_interm_unscaled: 0.3191 (0.3618) loss_giou_unscaled: 0.1499 (0.1573) loss_giou_0_unscaled: 0.1613 (0.1648) loss_giou_1_unscaled: 0.1604 (0.1610) loss_giou_2_unscaled: 0.1536 (0.1590) loss_giou_3_unscaled: 0.1506 (0.1581) loss_giou_4_unscaled: 0.1463 (0.1563) loss_giou_interm_unscaled: 0.1689 (0.1765) loss_hw_unscaled: 0.0151 (0.0177) loss_hw_0_unscaled: 0.0190 (0.0196) loss_hw_1_unscaled: 0.0173 (0.0189) loss_hw_2_unscaled: 0.0163 (0.0183) loss_hw_3_unscaled: 0.0164 (0.0181) loss_hw_4_unscaled: 0.0147 (0.0180) loss_hw_interm_unscaled: 0.0190 (0.0212) loss_xy_unscaled: 0.0093 (0.0089) loss_xy_0_unscaled: 0.0093 (0.0096) loss_xy_1_unscaled: 0.0089 (0.0091) loss_xy_2_unscaled: 0.0084 (0.0090) loss_xy_3_unscaled: 0.0086 (0.0089) loss_xy_4_unscaled: 0.0081 (0.0089) loss_xy_interm_unscaled: 0.0104 (0.0102) time: 2.7235 data: 0.0132 max mem: 15629