1、下载代码
git clone https://github.com/microsoft/GLIP.git
python /torch/src/catch/tools/torch_gpu2mlu/torch_gpu2mlu.py -i GLIP/
pip install -e ./GLIP_mlu/
2、下载依赖
pip install pycocotools opencv-python yacs
3、准备数据集
#需要准备 Objects365, Flickr30K, MixedGrounding, coco/train2014
cd ./GLIP_MLU
# 创建数据地址
mkdir DATASET && cd DATASET
# 创建目录
mkdir coco flickr30k_images gqa mdetr_annotations Objects365
wget https://penzhanwu2bbs.blob.core.windows.net/data/GLIPv1_Open/mdetr_annotations/final_mixed_train_no_coco.json -O ./mdetr_annotations/final_mixed_train_no_coco.json
写下载数据集脚本 touch download.sh
Flickr30K数据集自行前往https://shannon.cs.illinois.edu/DenotationGraph/下载
```touch download.sh
cd coco/
wget http://images.cocodataset.org/zips/train2014.zip
unzip train2014.zip
wget http://images.cocodataset.org/zips/val2017.zip
unzip val2017.zip
cd ../gqa/
wget https://nlp.stanford.edu/data/gqa/images.zip
unzip images.zip
cd ../medtr_annotations/
wget https://penzhanwu2bbs.blob.core.windows.net/data/GLIPv1_Open/mdetr_annotations/final_mixed_train_no_coco.json
wget https://zenodo.org/record/4729015/files/mdetr_annotations.tar.gz?download=1
tar -zxvf 'mdetr_annotations.tar.gz?download=1'
cp ./OpenSource/final_flickr_separateGT_* ../
cd ../Objects365
# 如果下方链接达不到,配置文件到https://huggingface.co/GLIPModel/GLIP/tree/main/mdetr_annotations下载
wget https://github.com/microsoft/scene_graph_benchmark/blob/main/tools/mini_tsv/data/train.hw.lineidx
wget https://github.com/microsoft/scene_graph_benchmark/blob/main/tools/mini_tsv/data/train.hw.tsv
wget https://github.com/microsoft/scene_graph_benchmark/blob/main/tools/mini_tsv/data/train.label.lineidx
wget https://github.com/microsoft/scene_graph_benchmark/blob/main/tools/mini_tsv/data/train.label.tsv
wget https://github.com/microsoft/scene_graph_benchmark/blob/main/tools/mini_tsv/data/train.labelmap.tsv
wget https://github.com/microsoft/scene_graph_benchmark/blob/main/tools/mini_tsv/data/train.lineidx
wget https://github.com/microsoft/scene_graph_benchmark/blob/main/tools/mini_tsv/data/train.linelist.tsv
wget https://github.com/microsoft/scene_graph_benchmark/blob/main/tools/mini_tsv/data/train.tsv
wget https://github.com/microsoft/scene_graph_benchmark/blob/main/tools/mini_tsv/data/train.yaml
执行下载,需要等待一些时间(两三个小时)
bash download.sh
下载Objects365数据集
wget https://penzhanwu2bbs.blob.core.windows.net/data/GLIPv1_Open/Objects365/objects365_train_vgoiv6.cas2000.yaml -O DATASET/Objects365/objects365_train_vgoiv6.cas2000.yaml
wget https://penzhanwu2bbs.blob.core.windows.net/data/GLIPv1_Open/Objects365/train.label.tsv -O DATASET/Objects365/train.label.tsv
wget https://penzhanwu2bbs.blob.core.windows.net/data/GLIPv1_Open/Objects365/train.label.linelist.cas.2000.tsv -O DATASET/Objects365/train.label.linelist.cas.2000.tsv
wget https://penzhanwu2bbs.blob.core.windows.net/data/GLIPv1_Open/Objects365/train.label.lineidx -O DATASET/Objects365/train.label.lineidx
wget https://penzhanwu2bbs.blob.core.windows.net/data/GLIPv1_Open/Objects365/train.hw.tsv -O DATASET/Objects365/train.hw.tsv
wget https://penzhanwu2bbs.blob.core.windows.net/data/GLIPv1_Open/Objects365/train.hw.lineidx -O DATASET/Objects365/train.hw.lineidx
wget https://penzhanwu2bbs.blob.core.windows.net/data/GLIPv1_Open/Objects365/object365_vgoiv6_class2ind.json -O DATASET/Objects365/object365_vgoiv6_class2ind.json
4、部分代码修改
```./maskrcnn_benchmark/utils/model_zoo.py
# 将第6行try的内容
from torch.hub import _download_url_to_file # 修改为=》
from torch.hub import download_url_to_file
vim /workspace/code/small-model/GLIP_mlu/maskrcnn_benchmark/utils/imports.py
# 将第5行
if torch._six.PY37:
import importlib
import importlib.util
import sys
# 修改为=》
if False:
import importlib
import importlib.util
import sys
```./maskrcnn_benchmark/modeling/rpn/anchor_generator.py
# 将第364行
return _generate_anchors(
stride,
np.array(sizes, dtype=np.float) / stride,
np.array(aspect_ratios, dtype=np.float),
)
# 修改为=》
return _generate_anchors(
stride,
np.array(sizes, dtype=np.float32) / stride,
np.array(aspect_ratios, dtype=np.float32),
)
# 将第375行
anchor = np.array([1, 1, base_size, base_size], dtype=np.float) - 1
# 修改为=》
anchor = np.array([1, 1, base_size, base_size], dtype=np.float32) - 1
vim /workspace/code/small-model/GLIP_mlu/maskrcnn_benchmark/utils/fuse_helper.py
# 265行
attention_mask = attention_mask.masked_fill(attention_mask == 0, -9e15)
# 修改为
attention_mask = attention_mask.masked_fill(attention_mask == 0, -1e4)
```./maskrcnn_benchmark/modeling/rpn/loss.py:783
# 添加:anchors_to_gt_indexs.to('mlu')
下载mmcv
wget https://sdk.cambricon.com/static/independent/MMCV/1.7.1_1.1.0_2.1_python3.10_pip/mmcv_full-1.7.1_v1.1.0_pt21-cp310-cp310-linux_x86_64.whl
pip install mmcv_full-1.7.1_v1.1.0_pt21-cp310-cp310-linux_x86_64.whl
代码修改
```./maskrcnn_benchmark/layers/deform_conv.py
# 注释掉11行from maskrcnn_benchmark import _C,并添加
ext_module = ext_loader.load_ext(
'_ext',
['modulated_deform_conv_forward', 'modulated_deform_conv_backward',
'deform_conv_forward', 'deform_conv_backward_input', 'deform_conv_backward_parameters'])
# 将deform_conv.py中的所有 _C 替换为ext_module,对应的函数名也相应修改为引用的名字。
# 相应的/workspace_bf16/GLIP_mlu/maskrcnn_benchmark/layers/sigmoid_focal_loss.py 也做也同样的处理
```./maskrcnn_benchmark/layers/sigmoid_focal_loss.py
# 23-45行:forward
class _SigmoidFocalLoss(Function):
@staticmethod
def forward(ctx, logits, targets, gamma, alpha, reduction: str = 'mean'):
ctx.save_for_backward(logits, targets)
num_classes = logits.shape[1]
ctx.num_classes = num_classes
ctx.gamma = gamma
ctx.alpha = alpha
ctx.reduction_dict = {'none': 0, 'mean': 1, 'sum': 2}
ctx.reduction = ctx.reduction_dict[reduction]
# num_classes = torch.tensor(num_classes, dtype=torch.float32)
# num_classes = torch.tensor([num_classes]).to('mlu')
# print("num_classes::::::::",type(num_classes))
loss = logits.new_zeros(logits.size())
weight = logits.new_empty(0)
ext_module.sigmoid_focal_loss_forward(
logits, targets, weight, loss, gamma, alpha
)
if ctx.reduction == ctx.reduction_dict['mean']:
loss = loss.sum() / logits.size(0)
elif ctx.reduction == ctx.reduction_dict['sum']:
loss = loss.sum()
ctx.save_for_backward(logits, targets, weight)
# print("loss:::::::::::::::::::",loss)
return loss
# backward
def backward(ctx, d_loss):
logits, targets, weight = ctx.saved_tensors
num_classes = ctx.num_classes
gamma = ctx.gamma
alpha = ctx.alpha
d_loss = d_loss.contiguous()
# print("d_loss::::::::::::",d_loss)
grad_input = logits.new_zeros(logits.size())
ext_module.sigmoid_focal_loss_backward(
logits, targets, weight, grad_input, gamma=gamma,alpha=alpha
)
# grad_input *= d_loss
if ctx.reduction == ctx.reduction_dict['mean']:
grad_input /= logits.size(0)
return grad_input, None, None, None, None
修改配置文件
```./configs/pretrain/glip_Swin_T_O365_GoldG.yaml
# 修改为自己使用的数据集,查看./maskrcnn_benchmark/config/paths_catalog.py
DATASETS:
TRAIN: ("mixed_train_no_coco", )
TEST: ("coco_2017_val", )
SOLVER:
FIND_UNUSED_PARAMETERS: True
TEST_WITH_INFERENCE: False
5、执行训练
MLU_VISIBLE_DEVICES=6,7 TORCH_DISTRIBUTED_DEBUG=DETAIL python -m torch.distributed.launch --nnodes 1 --nproc_per_node=2 --use_env --master_port=12346 tools/train_net.py \
--config-file configs/pretrain/glip_Swin_T_O365_GoldG.yaml \
--skip-test --use-tensorboard --override_output_dir output
model.safetensors: 83%|██████████████████████████████████████████████████████████████████████████████████████████████▏ | 367M/440M [27:28<03:59, 306kB/s]
eta: 90 days, 14:06:11 iter: 20 loss: 4.0815 (4.1118) loss_centerness: 0.7034 (0.7046) loss_cls: 0.0000 (0.0000) loss_dot_product_token: 2.2535 (2.3103) loss_reg: 1.0791 (1.0969) time: 1.5451 (1.6809) data: 0.0211 (0.1391) lr: 0.000001 wd: 0.000100 max mem: 8943
eta: 87 days, 19:33:08 iter: 40 loss: 3.7911 (3.9735) loss_centerness: 0.7023 (0.7031) loss_cls: 0.0000 (0.0000) loss_dot_product_token: 2.0421 (2.1845) loss_reg: 1.0447 (1.0858) time: 1.5403 (1.6295) data: 0.0212 (0.0804) lr: 0.000002 wd: 0.000100 max mem: 8943