from torch.nn.parallel import DistributedDataParallel as DDP
import yaml
import math
import os
import argparse
from pathlib import Path
import numpy as np
import torch.distributed as dist
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
import torch.utils.data
from utils.torch_utils import init_seeds, intersect_dicts, ModelEMA
from models.yolo import Model
from utils.general import check_img_size, torch_distributed_zero_first
from utils.google_utils import attempt_download
from utils.datasets import create_dataloader
deftrain(hyp, opt, device, tb_writer=None):print(f"Hyparameters {hyp}")
log_dir = Path(tb_writer.log_dir)if tb_writer else Path(opt.logdir)/"evolve"
wdir =str(log_dir /"weights")+ os.sep
os.makedirs(wdir, exist_ok=True)
last = wdir +"last.pt"
best = wdir +"best.pt"
results_file =str(log_dir /"results.txt")
epochs, batch_size, total_batch_size, weights, rank = \
opt.epochs, opt.batch_size, opt.total_batch_size, opt.weights, opt.global_rank
# TODO : Use DDP logging. Only the first process is allowed to log.# Save run settingswithopen(log_dir /"hyp.yaml",'w')as f:# sort_keys=False:show original order,default set True
yaml.dump(hyp, f, sort_keys=False)withopen(log_dir /"opt.yaml",'w')as f:
yaml.dump(vars(opt), f, sort_keys=False)# Configure
cuda = device.type!="cpu"
init_seeds(2+ rank)# custom data ->receive the type of the data that is dictwithopen(opt.data,'r')as f:
data_dict = yaml.load(f, Loader=yaml.FullLoader)
train_path = data_dict["train"]
test_path = data_dict["test"]
nc, names =(1,["item"])if opt.single_cls else(int(data_dict["nc"]), \
data_dict["names"])assertlen(names)== nc, \
f"{len(names)} names found for nc={nc} datasets in {opt.data}"# model mains to load pre_model
pretrained = weights.endswith(".pt")if pretrained:with torch_distributed_zero_first(rank):
attempt_download(weights)
checkpoint = torch.load(weights, map_location=device)
model = Model(opt.cfg or checkpoint["model"].yaml, nc=nc, ch=3)
exclude =["anchor"]if opt.cfg else[]
state_dict = checkpoint["model"].float().state_dict()# judge network layers whether it equals
state_dict = intersect_dicts(state_dict, model.state_dict(), exclude)
model.load_state_dict(state_dict, strict=False)print(f"Transferred {len(state_dict)}/{len(model.state_dict())} item from {weights}")else:
model = Model(opt.cfg, nc=nc, ch=3).to(device)# Optimizer
nbs =64# nominal batch size# round() only have one parameter , means rounding(四舍五入)# round() when have two parameters , assign the decimal places to keep for the first param
accumulate =max(round(nbs / total_batch_size),1)
hyp["weight_decay"]*= total_batch_size * accumulate / nbs
# optimizer parameter groups
pg0, pg1, pg2 =[],[],[]for k, v in model.named_parameters():
v.requires_grad =Trueif".bias"in k:
pg2.append(v)elif".weights"in k and".bn"notin k:
pg1.append(v)else:
pg0.append(v)if opt.adam:
optimizer = optim.Adam(pg0, lr=hyp["lr0"], betas=(hyp["momentum"],0.999))else:
optimizer = optim.SGD(pg0, lr=hyp["lr0"], momentum=hyp["momentum"], nesterov=True)
optimizer.add_param_group({"params": pg1,"weight_decay": hyp["weight_decay"]})
optimizer.add_param_group({"params": pg2})print(f"Optimizer groups:{len(pg1)}.bias,{len(pg2)}conv_weights,{len(pg0)}.others")del pg0, pg1, pg2
# lr decay
lf =lambda x:(((1+ math.cos(x * math.pi / epochs))/2)**1.0)*0.8+0.2
scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)# Resume
start_epoch, best_fitness =0,0.0if pretrained:# Optimizerif checkpoint["optimizer"]isnotNone:
optimizer.load_state_dict(checkpoint["optimizer"])
best_fitness = checkpoint["best_fitness"]# Resultsif checkpoint.get("training_results")isnotNone:withopen(results_file,'w')as f:
f.write(checkpoint["training_results"])# Epoch
start_epoch = checkpoint["epoch"]+1if epochs < start_epoch:print('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.'%(weights, checkpoint['epoch'], epochs))
epochs += checkpoint["epoch"]del checkpoint, state_dict
# Image sizes
gs =int(max(model.stride))
imgsz, imgsz_test =[check_img_size(x, gs)for x in opt.img_size]# DP modelif cuda and rank ==-1and torch.cuda.device_count()>1:
model = torch.nn.parallel.DataParallel(model)# SyncBatchNorm multiple device trainif opt.sync_bn and cuda and rank !=-1:
model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)print("Using SyncBatchNorm")# Exponential moving average
ema = ModelEMA(model)if rank in[-1,0]elseNone# DDP modelif cuda and rank !=-1:
model = DDP(model, device_ids=[opt.local_rank], output_device=[opt.local_rank])# TrainLoader
dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt,
cache=opt.cache_images, rect=opt.rect,
local_rank=rank, world_size=opt.world.size)
mlc = np.concatenate(dataset.labels,0)[:,0].max()# TestLoaderif __name__ =="__main__":
parse = argparse.ArgumentParser()
parse.add_argument("--weights",type=str, default="yolov4-p5.pt")
parse.add_argument("--cfg",type=str, default="")
parse.add_argument("--data",type=str, default="data/coco128.yaml")
parse.add_argument("--hyp",type=str, default="")
parse.add_argument("--epochs",type=int, default=300)
parse.add_argument("--batch-size",type=int, default=16)
parse.add_argument("--img-size", nargs='+',type=int, default=[640,640])
parse.add_argument("--resume", nargs='?', const='get_last', default=False)
parse.add_argument("--nosave", action="store_true")
parse.add_argument("--notest", action="store_true")
parse.add_argument("--noautoanchor", action="store_true")
parse.add_argument("--evolve", action="store_true")
parse.add_argument("--bucket",type=str, default="")
parse.add_argument("--cache-images", action="store_true")
parse.add_argument("--name", default="")
parse.add_argument("--device", default="")
parse.add_argument("--multi-scale", action="store_true")
parse.add_argument("--single-cls", action="store_true")
parse.add_argument("--adam", action="store_true")
parse.add_argument("--sync-bn", action="store_true")
parse.add_argument("--local-rank",type=int, default=-1)
parse.add_argument("--logdir",type=str, default="runs/")
opt = parse.parse_args()# DDP modeif opt.local_rank !=-1:assert torch.cuda.device_count()> opt.local_rank
torch.cuda.set_device(opt.local_rank)
device = torch.device("cuda", opt.local_rank)
dist.init_process_group(backend="nccl", init_method="env://")# return the number of process in the current group of the process
opt.world_size = dist.get_world_size()# return rank of the current process group
opt.global_rank = dist.get_rank()assert opt.batch_size % opt.world_size ==0,"--batch-size must be multiple of CUDA device count"
opt.batch_size = opt.total_batch_size // opt.world_size
print(opt)withopen(opt.hyp)as f:# load hyp
hyp = yaml.load(f, Loader=yaml.FullLoader)
torch_utils.py
import math
import os
import time
from copy import deepcopy
import torch
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.functional as F
import torchvision.models as models
definit_seeds(seed=0):
torch.manual_seed(seed)if seed ==0:
cudnn.deterministic =True
cudnn.benchmark =Falseelse:# benchmark model that can improve the speed of the computer running# but due to random of the computer inner ,# everytime the front of the network products the results that is different slight
cudnn.deterministic =False
cudnn.benchmark =Truedefintersect_dicts(da, db, exclude=()):# judge if statement whether it meets condition ,yes ->{k:v} no ->{}return{k: v for k, v in da.items() \
if k in db andnotany(x in k for x in exclude)and v.shape == db[k].shape}defis_parallel(model):returntype(model)in(nn.parallel.DataParallel, nn.parallel.DistributedDataParallel)defcopy_attr(a, b, include=(), exclude=()):# Copy attributes from b to a, options to only include [...] and to exclude [...]for k, v in b.__dict__.items():if(len(include)and k notin include)or k.startswith('_')or k in exclude:continueelse:setattr(a, k, v)classModelEMA:""" Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models
Keep a moving average of everything in the model state_dict (parameters and buffers).
This is intended to allow functionality like
https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
A smoothed version of the weights is necessary for some training schemes to perform well.
This class is sensitive where it is initialized in the sequence of model init,
GPU assignment and distributed training wrappers.
"""def__init__(self, model, decay=0.9999, updates=0):# Create EMA
self.ema = deepcopy(model.module if is_parallel(model)else model).eval()# FP32 EMA# if next(model.parameters()).device.type != 'cpu':# self.ema.half() # FP16 EMA
self.updates = updates # number of EMA updates
self.decay =lambda x: decay *(1- math.exp(-x /2000))# decay exponential ramp (to help early epochs)for p in self.ema.parameters():
p.requires_grad_(False)defupdate(self, model):# Update EMA parameterswith torch.no_grad():
self.updates +=1
d = self.decay(self.updates)
msd = model.module.state_dict()if is_parallel(model)else model.state_dict()# model state_dictfor k, v in self.ema.state_dict().items():if v.dtype.is_floating_point:
v *= d
v +=(1.- d)* msd[k].detach()defupdate_attr(self, model, include=(), exclude=('process_group','reducer')):# Update EMA attributes
copy_attr(self.ema, model, include, exclude)
general.py
import math
from contextlib import contextmanager
import torch
# context manager -> __exit__ __enter__ ,implement key words -> with function# local_rank -> 0 or -1 represent main process, local process main read and cache data, sub_process read cache data# barrier() -> data intercommunicate(process lock,when main process execute,other subprocess wait)@contextmanagerdeftorch_distributed_zero_first(local_rank:int):# if main process not lock# module "torch.distributed parallel" support,only linux,not support windowsif local_rank notin[-1,0]:
torch.distributed().barrier()yield# if child process is lockif local_rank ==0:
torch.distributed().barrier()defcheck_img_size(imgsz, s=32):# Verify imgsz is a multiple of stride s
new_size = make_divisible(imgsz,int(s))if new_size != imgsz:print("WARNING!")return new_size
# divisible bydefmake_divisible(x, s):return math.ceil(x / s)* s
datasets.py
import glob
import os
from pathlib import Path
import numpy as np
import torch
from PIL import Image, ExifTags
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm
from utils.general import torch_distributed_zero_first
help_url =""
vid_formats =['.mov','.avi','.mp4','.mpg','.mpeg','.m4v','.wmv','.mkv']
img_formats =['.bmp','.jpg','.jpeg','.png','.tif','.tiff','.dng']for orientation in ExifTags.TAGS.keys():if ExifTags.TAGS[orientation]=='Orientation':breakdefget_hash(files):# hash value of the files listreturnsum(os.path.getsize(f)for f in files if os.path.isfile(f))defexit_size(img):
s = img.size
try:
rotation =dict(img._getexif().items())[orientation]if rotation ==6:# rotation 270
s =(s[1], s[0])elif rotation ==8:# rotation 90
s =(s[1], s[0])except:passreturn s
defcreate_dataloader(path, imgsz, batch_size, stride, opt, hyp=None,
augment=False,
cache=False, pad=0.0, rect=False, local_rank=-1, world_size=1):with torch_distributed_zero_first(local_rank):
dataset = LoadImagesAndLabels(path, imgsz, batch_size,
augment=augment,# augment images
hyp=hyp,# augmentation hyper_parameters
rect=rect,# rectangular training
cache_images=cache,
single_cls=opt.single_cls,
stride=int(stride),
pad=pad)
batch_size =min(batch_size,len(dataset))# number of workers
nw =min([os.cpu_count()// world_size, batch_size if batch_size >1else0,8])
train_sampler = torch.utils.data.distributed.DistributedSampler(dataset)if local_rank !=-1elseNone
dataloader = DataLoader(
dataset,
batch_size=batch_size,
num_workers=nw,
sampler=train_sampler,# enough memory ,set pin_memory is True
pin_memory=True,# tensor of the memory quickly transform to gpu
collate_fn=LoadImagesAndLabels.collate_fn
)return dataloader, dataset
classLoadImagesAndLabels(Dataset):def__init__(self, path, img_size=640, batch_size=16, augment=False,
hyp=False, rect=False, image_weights=False,
cache_images=False, single_cls=False, stride=32, pad=0.0):try:
f =[]# image filesfor p in path ifisinstance(path,list)else[path]:
p =str(Path(p))
parent =str(Path(p).parent)+ os.sep
if os.path.isfile(p):withopen(p,'r')as t:
t = t.read().splitlines()
f +=[x.replace("./", parent)if x.startswith("./")else x for x in t]elif os.path.isdir(p):
f += glob.iglob(p + os.sep +"*.*")else:raise Exception(f"{p} does not exist")
self.img_files =sorted([x.replace('/', os.sep)for x in f if os.path.splitext(x)[-1].lower()in img_formats])except Exception as e:raise Exception("Error loading data from %s: %s\nSee %s"%(path, e, help_url))
n =len(self.img_files)assert n >0,"No images found in %s.See %s"%(path, help_url)# bi : every image that correspond itself batch index respectively,# ep dataset_number =10, batch_size = 4,img1,img2,img3,img4 correspond idx [0,0,0,0]# img1,img2,img3,img4 belong to the same batch# nb : number of the batches
bi = np.floor(np.arange(n)/ batch_size).astype(np.int)# batch_index
nb = bi[-1]+1# number of batches
self.n = n # the size of yours datasets
self.batch = bi
self.img_size = img_size
self.augment = augment
self.hyp = hyp
self.image_weights = image_weights
self.rect =Falseif image_weights else rect
self.mosaic = self.augment andnot self.rect
self.mosaic_border =[-img_size //2,-img_size //2]
self.stride = stride
# Define labels
self.label_files =[x.replace("images","labels") \
.replace(os.path.splitext(x)[-1],".txt") \
for x in self.img_files]# Check cache
cache_path =str(Path(self.label_files[0]).parent)+".cache"if os.path.isfile(cache_path):
cache = torch.load(cache_path)if cache["hash"]!= get_hash(self.label_files + self.img_files):
cache = self.cache_labels(cache_path)else:
cache = self.cache_labels(cache_path)# Get labels
labels, shapes =zip(*[cache[x]for x in self.img_files])
self.shapes = np.array(shapes, dtype=np.float64)
self.labels =list(labels)defcache_labels(self, path="labels.cache"):
x ={}# tqdm zip() package data,desc is the name of the pbar,total is the len of the pbar
pbar = tqdm(zip(self.img_files, self.label_files), desc="Scanning images", total=len(self.img_files))for(img, label)in pbar:try:
l =[]
image = Image.open(img)
image.verify()
shape = exit_size(image)assert(shape[0]>9)&(shape[1]>9),"image size <10 pixels"if os.path.isfile(label):withopen(label,'r')as f:
l = np.array([x.split()for x in f.read().splitlines()], dtype=np.float32)iflen(l)==0:
l = np.zeros((0,5), dtype=np.float32)
x[img]=[l, shape]except Exception as e:
x[img]=Noneprint(f"WARNING: {img}: {e}")
x["hash"]= get_hash(self.label_files + self.img_files)
torch.save(x, path)return x
def__getitem__(self, item):passdef__len__(self):returnlen(self.img_files)