复现PersFormer_3DLane


前言

项目:https://github.com/OpenDriveLab/PersFormer_3DLane/tree/main


提示:以下是本篇文章正文内容,下面仅用openlane数据集

一、创建环境

本实验环境在autodl服务器上
4*GPU-RTX3080
PyTorch 1.11.0
Python 3.8(ubuntu20.04)
Cuda 11.3
安装参考官方文档说明

pip3 install -r requirements.txt
cd models/nms/
python setup.py install

cd ../ops/
bash make.sh

二、准备数据集

安装官方要求准备openlane数据集
整体目录文件如下
在这里插入图片描述

1.准备好数据集后,修改persformer_openlane.py下数据集路径,其中persformer_openlane.py在autodl-tmp/PersFormer_3DLane/config文件夹下
修改后例如:在这里插入图片描述

2.本地手动下载权重https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b7_ns-1dbc32de.pth
下载完成后传入autodl-tmp中

mkdir -p ~/.cache/torch/hub/checkpoints/
cp tf_efficientnet_b7_ns-1dbc32de.pth ~/.cache/torch/hub/checkpoints/

注意:此时你在cp时你终端应在autodl-tmp目录下,以便cp

3.由于本实验环境4张3080分布式运行,在ddp.py中修改
原先已注释掉,重新修改如下

import torch
import torch.distributed as dist
import torch.multiprocessing as mp
import os
import subprocess
import numpy as np
import random

def setup_dist_launch(args):
    args.proc_id = args.local_rank
    world_size = int(os.getenv('WORLD_SIZE', 1))*args.nodes
    print("proc_id: " + str(args.proc_id))
    print("world size: " + str(world_size))
    print("local_rank: " + str(args.local_rank))

    os.environ['WORLD_SIZE'] = str(world_size)
    os.environ['RANK'] = str(args.proc_id)
    os.environ['LOCAL_RANK'] = str(args.local_rank)

def setup_slurm(args):
    if mp.get_start_method(allow_none=True) is None:
        mp.set_start_method('spawn')

    args.proc_id = int(os.environ['SLURM_PROCID'])
    ntasks = int(os.environ['SLURM_NTASKS'])
    node_list = os.environ['SLURM_NODELIST']
    num_gpus = torch.cuda.device_count()
    local_rank = args.proc_id % num_gpus
    args.local_rank = local_rank

    print("proc_id: " + str(args.proc_id))
    print("world size: " + str(ntasks))
    print("local_rank: " + str(local_rank))

    addr = subprocess.getoutput(
        f'scontrol show hostname {node_list} | head -n1')
    os.environ['MASTER_PORT'] = str(args.port)
    os.environ['MASTER_ADDR'] = addr

    os.environ['WORLD_SIZE'] = str(ntasks)
    os.environ['RANK'] = str(args.proc_id)
    os.environ['LOCAL_RANK'] = str(local_rank)

# def setup_distributed(args):
#     if not dist.is_initialized():#新加
#         dist.init_process_group(backend='nccl')#新加
#         print(f"Rank: {dist.get_rank()}, World size: {dist.get_world_size()}")#新加
#     args.gpu = args.local_rank
#     torch.cuda.set_device(args.gpu)
#     dist.init_process_group(backend='nccl')
#     args.world_size = dist.get_world_size()
#     torch.set_printoptions(precision=10)
def setup_distributed(args):
    if not dist.is_initialized():
        dist.init_process_group(backend='nccl')
        print(f"Rank: {dist.get_rank()}, World size: {dist.get_world_size()}")
    args.gpu = args.local_rank
    torch.cuda.set_device(args.gpu)
    args.world_size = dist.get_world_size()
    torch.set_printoptions(precision=10)
# def ddp_init(args):
#     setup_distributed(args)#新加
#     args.proc_id, args.gpu, args.world_size = 0, 0, 1
    
#     if args.use_slurm == True:
#         setup_slurm(args)
#     else:
#         setup_dist_launch(args)

#     if 'WORLD_SIZE' in os.environ:
#         args.distributed = int(os.environ['WORLD_SIZE']) >= 1

#     if args.distributed:
#         setup_distributed(args)

#     # deterministic
#     torch.backends.cudnn.benchmark = False
#     torch.backends.cudnn.deterministic = True
#     torch.manual_seed(args.proc_id)
#     np.random.seed(args.proc_id)
#     random.seed(args.proc_id)
def ddp_init(args):
    args.proc_id, args.gpu, args.world_size = 0, 0, 1
    
    if args.use_slurm == True:
        setup_slurm(args)
    else:
        setup_dist_launch(args)

    if 'WORLD_SIZE' in os.environ:
        args.distributed = int(os.environ['WORLD_SIZE']) >= 1

    if args.distributed:
        setup_distributed(args)

    # deterministic
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    torch.manual_seed(args.proc_id)
    np.random.seed(args.proc_id)
    random.seed(args.proc_id)
# def to_python_float(t):
#     if hasattr(t, 'item'):
#         return t.item()
#     else:
#         return t[0]

# def reduce_tensor(tensor, world_size):
#     rt = tensor.clone()
#     dist.all_reduce(rt, op=dist.ReduceOp.SUM)
#     rt /= world_size
#     return rt


# def reduce_tensors(*tensors, world_size):
#     return [reduce_tensor(tensor, world_size) for tensor in tensors]
def to_python_float(t):
    if hasattr(t, 'item'):
        return t.item()
    else:
        return t[0]

def reduce_tensor(tensor, world_size):
    rt = tensor.clone()
    dist.all_reduce(rt, op=dist.ReduceOp.SUM)
    rt /= world_size
    return rt

def reduce_tensors(*tensors, world_size):
    return [reduce_tensor(tensor, world_size) for tensor in tensors]

4.编辑bashrc

vim ~/.bashrc

编辑加入以下信息,注意cuda版本

export LIBRARY_PATH=/usr/local/cuda-11.3/lib64/:$LIBRARY_PATH
export LD_LIBRARY_PATH=/usr/local/cuda-11.3/lib64/:$LD_LIBRARY_PATH
export PATH=/usr/local/cuda-11.3/bin/:$PATH
export CUDA_HOME=/usr/local/cuda-11.3/

然后按esc输入:wq保存退出
最后刷新一下

source ~/.bashrc 

5.运行

python -m torch.distributed.launch --nproc_per_node 4 main_persformer.py --mod=PersFormer --batch_size=2

总结

提示:这里对文章进行总结:

第一轮训练时间总3:13:38,训练19726张
第一轮测试时间总21:59,测试4998张
在这里插入图片描述

第一轮训练时间总3:16:38
第一轮测试时间总23:38
在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值