1.yolo.py
import torch.nn as nn
import yaml
import argparse
import logging
from pathlib import Path
import sys
from copy import deepcopy
from layers import *
from utils.general import *
sys.path.append(Path(__file__).parent.parent.absolute().__str__())
logger = logging.getLogger(__name__)
class Detect(nn.Module):
stride = None
def __init__(self, nc=80, anchors=(), ch=(), inplace=True):
super(Detect, self).__init__()
self.nc = nc
self.no = nc + 5
self.nl = len(anchors)
self.na = len(anchors[0]) // 2
self.grid = [torch.zeros(1)] * self.nl
a = torch.tensor(anchors).float().view(self.nl, -1, 2)
self.register_buffer('anchors', a)
self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2))
self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)
self.inplace = inplace
def forward(self, x):
z = []
for i in range(self.nl):
x[i] = self.m[i](x[i])
bs, _, ny, nx = x[i].shape
x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
if not self.training:
if self.grid[i].shape[2:4] != x[i].shape[2:4]:
self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
y = x[i].sigmoid(x)
if self.inplace:
y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]
y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]
z.append(y.view(bs, -1, self.no))
return x if self.training else (torch.cat(z, 1), x)
@staticmethod
def _make_grid(nx=20, ny=20):
yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
def parse_model(d, ch):
logger.info('\n%3s%18s%3s%10s %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d[
'width_multiple']
na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors
no = na * (nc + 5)
layers, save, c2 = [], [], ch[-1]
for i, (f, n, m, args) in enumerate(
d['backbone'] + d['head']):
m = eval(m) if isinstance(m, str) else m
for j, a in enumerate(args):
try:
args[j] = eval(a) if isinstance(a, str) else a
except:
pass
n = max(round(n * gd), 1) if n > 1 else n
if m in [CBA, GhostConv, Bottleneck, GhostBottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP,
C3, C3TR, SELayer]:
c1, c2 = ch[f], args[0]
if c2 != no:
c2 = make_divisible(c2 * gw, 8)
args = [c1, c2, *args[1:]]
if m in [BottleneckCSP, C3, C3TR]:
args.insert(2, n)
n = 1
elif m is nn.BatchNorm2d:
args = [ch[f]]
elif m is Concat:
c2 = sum([ch[x] for x in f])
elif m is Detect:
args.append([ch[x] for x in f])
if isinstance(args[1], int):
args[1] = [list(range(args[1] * 2))] * len(f)
elif m is Contract:
c2 = ch[f] * args[0] ** 2
elif m is Expand:
c2 = ch[f] // args[0] ** 2
elif m is SELayer:
channel = args[0]
channel = make_divisible(channel * gw, 8) if channel != no else channel
args = [channel]
else:
c2 = ch[f]
m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args)
t = str(m)[8:-2].replace('__main__.', '')
np = sum([x.numel() for x in m_.parameters()])
m_.i, m_.f, m_.type, m_.np = i, f, t, np
logger.info('%3s%18s%3s%10.0f %-40s%-30s' % (i, f, n, np, t, args))
save.extend(
x % i for x in ([f] if isinstance(f, int) else f) if x != -1)
layers.append(m_)
if i == 0:
ch = []
ch.append(c2)
return nn.Sequential(*layers), sorted(save)
def initialize_weights(model):
for m in model.modules():
t=type(m)
if t is nn.Conv2d:
pass
elif t is nn.BatchNorm2d:
m.eps=1e-3
m.momentum=0.03
elif t in [nn.Hardswish,nn.LeakyReLU,nn.ReLU,nn.ReLU6]:
m.inplace=True
class Model(nn.Module):
def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None):
super(Model, self).__init__()
if isinstance(cfg, dict):
self.yaml = cfg
else:
with open(cfg) as f:
self.yaml = yaml.safe_load(f)
ch = self.yaml['ch'] = self.yaml.get('ch', ch)
if nc and nc != self.yaml['nc']:
logger.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
self.yaml['nc'] = nc
if anchors:
logger.info(f'Overriding model.yaml anchors with anchors={anchors}')
self.yaml['anchors'] = round(anchors)
self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch])
self.names = [str(i) for i in range(self.yaml['nc'])]
self.inplace = self.yaml.get('inplace', True)
m = self.model[-1]
if isinstance(m, Detect):
s = 128
m.inplace = self.inplace
m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))])
m.anchors /= m.stride.view(-1, 1, 1)
check_anchor_order(m)
self.stride = m.stride
self._initialize_biases()
initialize_weights(self)
self.info()
def forward(self, x, augment=False):
if augment:
return self.forward_augment(x)
else:
return self.forward_once(x)
def forward_augment(self,x):
img_size=x.shape[-2:]
s=[1,0.83,0.67]
f=[None,3,None]
y=[]
for si,fi in zip(s,f):
xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max()))
yi=self.forward_once(xi)[0]
yi = self._descale_pred(yi, fi, si, img_size)
y.append(yi)
return torch.cat(y,1),None
def forward_once(self, x):
y, dt = [], []
for m in self.model:
if m.f != -1:
x = [x if j == -1 else y[j] for j in m.f]
x = m(x)
y.append(x if m.i in self.save else None)
return x
def info(self, verbose=False, img_size=640):
model_info(self, verbose, img_size)
def _descale_pred(self,p,flips,scale,img_size):
p[..., :4] /= scale
if flips == 2:
p[..., 1] = img_size[0] - p[..., 1]
elif flips == 3:
p[..., 0] = img_size[1] - p[..., 0]
return p
def _initialize_biases(self,cf=None):
m=self.model[-1]
for mi ,s in zip(m.m,m.stride):
b=mi.bias.view(m.na,-1)
b.data[:, 4] += math.log(8 / (640 / s) ** 2)
b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum())
mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
opt = parser.parse_args()
opt.cfg = check_file(opt.cfg)
set_logging()
device = select_device(opt.device)
model = Model(opt.cfg)
model.train()
2.layers.py
import torch.nn as nn
import torch
import math
import numpy as np
def autopad(k, p=None):
if p is None:
p=k//2 if isinstance(k,int) else [x//2 for x in k]
return p
def DWConv(input_channels,output_channels,kernels=1,stride=1,active=True):
return CBA(input_channels,output_channels,kernels,stride,g=math.gcd(input_channels,output_channels),active=active)
class CBA(nn.Module):
def __init__(self,input_channels,output_channels,keranel_size=1,stride=1,padding=None,group=1,active=True):
super(CBA, self).__init__()
self.conv=nn.Conv2d(input_channels,output_channels,keranel_size,stride,autopad(keranel_size,padding),groups=group,bias=False)
self.bn=nn.BatchNorm2d(output_channels)
self.act=nn.SiLU() if active else (active if isinstance(active,nn.Module) else nn.Identity())
def forward(self,x):
x=self.conv(x)
x=self.bn(x)
x=self.act(x)
return x
class GhostConv(nn.Module):
def __init__(self,input_channels,output_channels,keranel_size=1,stride=1,padding=None,group=1,active=True):
super(GhostConv, self).__init__()
c_=output_channels//2
self.cv1=CBA(input_channels,c_,keranel_size,stride,None,group,active)
self.cv2=CBA(c_,output_channels,5,1,None,c_,active)
def forward(self,x):
y=self.cv1(x)
return torch.cat([y,self.cv2[y]],1)
class Bottleneck(nn.Module):
def __init__(self, input_channels, output_channels, shortcut=True, group=1, expansion=0.5):
super(Bottleneck, self).__init__()
c_=int(output_channels*expansion)
self.cv1=CBA(input_channels,c_,1,1)
self.cv2=CBA(c_,output_channels,3,1,group=group)
self.add=shortcut and input_channels==output_channels
def forward(self,x):
return x+self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
class GhostBottleneck(nn.Module):
def __init__(self,input_channels,output_channels,keranel_size=1,stride=1):
super(GhostBottleneck, self).__init__()
c_=output_channels//2
self.conv=nn.Sequential(
GhostConv(input_channels,c_,1,1),
DWConv(c_,c_,keranel_size,stride,active=False) if s==2 else nn.Identity(),
GhostConv(c_,output_channels,1,1,active=False)
)
self.shortcut=nn.Sequential(
DWConv(input_channels,input_channels,keranel_size,stride,active=False),
CBA(input_channels,output_channels,1,1,active=False)
) if stride==2 else nn.Identity()
def forward(self,x):
return self.conv(x)+self.shortcut(x)
class SPP(nn.Module):
def __init__(self,input_channels,output_channels,k=(5,9,13)):
super(SPP, self).__init__()
c_=input_channels//2
self.cv1=CBA(input_channels,c_,1,1)
self.cv2=CBA(c_*(len(k)+1),output_channels,1,1)
self.m=nn.ModuleList([nn.MaxPool2d(kernel_size=x,stride=1,padding=x//2) for x in k])
def forward(self,x):
x=self.cv1(x)
return self.cv2(torch.cat([x]+[m(x) for m in self.m],1))
class MixConv2d(nn.Module):
pass
class Focus(nn.Module):
def __init__(self,input_channels,output_channels,kernel_size=1,stride=1,padding=None,group=1,active=True):
super(Focus, self).__init__()
self.conv=CBA(input_channels*4,output_channels,kernel_size,stride,padding,group,active)
def forward(self,x):
x=torch.cat([x[...,::2,::2],x[...,1::2,::2],x[...,::2,1::2],x[...,1::2,1::2]],1)
return self.conv(x)
class CrossConv(nn.Module):
def __init__(self,input_channels,output_channels,kernel_size=3,stride=1,group=1,expansion=1.0,shortcut=False):
super(CrossConv, self).__init__()
c_=int(output_channels*expansion)
self.cv1=CBA(input_channels,c_,(1,kernel_size),(1,stride))
self.cv2=CBA(c_,output_channels,(kernel_size,1),(stride,1),group=group)
self.add=shortcut and input_channels== output_channels
def forward(self,x):
y=self.cv1(x)
y=self.cv2(y)
if self.add:
return x+y
else:
return y
class BottleneckCSP(nn.Module):
def __init__(self,input_channels,output_channels,n=1,shortcut=False,group=1,expansion=0.5):
super(BottleneckCSP, self).__init__()
c_=int(output_channels*expansion)
self.cv1=CBA(input_channels,c_,1,1)
self.cv2=nn.Conv2d(input_channels,c_,1,1,bias=False)
self.cv3=nn.Conv2d(c_,c_,1,1,bias=False)
self.cv4=CBA(2*c_,output_channels,1,1)
self.bn=nn.BatchNorm2d(2*c_)
self.act=nn.LeakyReLU(0.1,inplace=True)
self.m=nn.Sequential(*[Bottleneck(c_,c_,shortcut,group,expansion=1.0)for _ in range(n)])
def forward(self,x):
y1=self.cv1(x)
y1=self.m(y1)
y1=self.cv3(y1)
y2=self.cv2(x)
y=torch.cat(y1,y2,dim=1)
y=self.bn(y)
y=self.act(y)
y=self.cv4(y)
return y
class C3(nn.Module):
def __init__(self, input_channels, output_channels, n=1, shortcut=True, group=1, expansion=0.5):
super(C3, self).__init__()
c_=int(output_channels*expansion)
self.cv1=CBA(input_channels,c_,1,1)
self.cv2=CBA(input_channels,c_,1,1)
self.cv3=CBA(2*c_,output_channels,1)
self.m=nn.Sequential(*[Bottleneck(c_,c_,shortcut,group,expansion=1.0)for _ in range(n)])
def forward(self,x):
y1=self.cv1(x)
y1=self.m(y1)
y2=self.cv2(x)
y=torch.cat((y1,y2),dim=1)
y=self.cv3(y)
return y
class C3TR(nn.Module):
def __init__(self):
pass
class SELayer(nn.Module):
def __init__(self,channel,reduction=16):
super(SELayer, self).__init__()
self.avgpool=nn.AdaptiveAvgPool2d(1)
self.fc=nn.Sequential(
nn.Linear(channel,channel//reduction,bias=False),
nn.ReLU(inplace=True),
nn.Linear(channel//reduction,channel,bias=False),
nn.Sigmoid()
)
def forward(self,x):
b,c,h,w=x.size()
y=self.avgpool(x).view(b,c)
y=self.fc(y).view(b,c,1,1)
return x*y.expand_as(x)
class Concat(nn.Module):
def __init__(self,dimension=1):
super(Concat, self).__init__()
self.d=dimension
def forward(self,x):
return torch.cat(x,self.d)
class Contract(nn.Module):
def __init__(self,gain=2):
super(Contract, self).__init__()
self.gain=gain
def forward(self,x):
N,C,H,W=x.size()
s=self.gain
x=x.view(N,C,H//s,s,W//s,s)
x=x.permute(0,3,5,1,2,4).contiguous()
return x.view(N,C*s*s,H//s,W//s)
class Expand(nn.Module):
def __init__(self,gain=2):
super(Expand, self).__init__()
self.gain=gain
def forward(self, x):
N, C, H, W = x.size()
s = self.gain
x = x.view(N, s, s, C // s ** 2, H, W)
x = x.permute(0, 3, 4, 1, 5, 2).contiguous()
return x.view(N, C // s ** 2, H * s, W * s)
3.general
import math
import torch.nn.functional as F
import torch
import logging
from copy import deepcopy
from pathlib import Path
import glob
import os
import subprocess
import datetime
import platform
logger = logging.getLogger(__name__)
def make_divisible(x,divisor):
return math.ceil(x/divisor) *divisor
def scale_img(img,ratio=1.0,same_shape=False,gs=32):
if ratio==1.0:
return img
else:
h,w=img.shape[2:]
s = (int(h * ratio), int(w * ratio))
img=F.interpolate(img,size=s,mode='billinear',align_corners=False)
if not same_shape:
h, w = [math.ceil(x * ratio / gs) * gs for x in (h, w)]
return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447)
def check_anchor_order(m):
a = m.anchor_grid.prod(-1).view(-1)
da = a[-1] - a[0]
ds = m.stride[-1] - m.stride[0]
if da.sign() != ds.sign():
print('Reversing anchor order')
m.anchors[:] = m.anchors.flip(0)
m.anchor_grid[:] = m.anchor_grid.flip(0)
def model_info(model, verbose=False, img_size=640):
n_p = sum(x.numel() for x in model.parameters())
n_g = sum(x.numel() for x in model.parameters() if x.requires_grad)
if verbose:
print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
for i, (name, p) in enumerate(model.named_parameters()):
name = name.replace('module_list.', '')
print('%5g %40s %9s %12g %20s %10.3g %10.3g' %
(i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
try:
from thop import profile
stride = max(int(model.stride.max()), 32) if hasattr(model, 'stride') else 32
img = torch.zeros((1, model.yaml.get('ch', 3), stride, stride), device=next(model.parameters()).device)
flops = profile(deepcopy(model), inputs=(img,), verbose=False)[0] / 1E9 * 2
img_size = img_size if isinstance(img_size, list) else [img_size, img_size]
fs = ', %.1f GFLOPS' % (flops * img_size[0] / stride * img_size[1] / stride)
except (ImportError, Exception):
fs = ''
logger.info(f"Model Summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}")
def check_file(file):
file = str(file)
if Path(file).is_file() or file == '':
return file
elif file.startswith(('http://', 'https://')):
url, file = file, Path(file).name
print(f'Downloading {url} to {file}...')
torch.hub.download_url_to_file(url, file)
assert Path(file).exists() and Path(file).stat().st_size > 0, f'File download failed: {url}'
return file
else:
files = glob.glob('./**/' + file, recursive=True)
assert len(files), f'File not found: {file}'
assert len(files) == 1, f"Multiple files match '{file}', specify exact path: {files}"
return files[0]
def set_logging(rank=-1, verbose=True):
logging.basicConfig(
format="%(message)s",
level=logging.INFO if (verbose and rank in [-1, 0]) else logging.WARN)
def git_describe(path=Path(__file__).parent):
s = f'git -C {path} describe --tags --long --always'
try:
return subprocess.check_output(s, shell=True, stderr=subprocess.STDOUT).decode()[:-1]
except subprocess.CalledProcessError as e:
return ''
def date_modified(path=__file__):
t = datetime.datetime.fromtimestamp(Path(path).stat().st_mtime)
return f'{t.year}-{t.month}-{t.day}'
def select_device(device='', batch_size=None):
s = f'YOLOv5 🚀 {git_describe() or date_modified()} torch {torch.__version__} '
cpu = device.lower() == 'cpu'
if cpu:
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
elif device:
os.environ['CUDA_VISIBLE_DEVICES'] = device
assert torch.cuda.is_available(), f'CUDA unavailable, invalid device {device} requested'
cuda = not cpu and torch.cuda.is_available()
if cuda:
devices = device.split(',') if device else range(torch.cuda.device_count())
n = len(devices)
if n > 1 and batch_size:
assert batch_size % n == 0, f'batch-size {batch_size} not multiple of GPU count {n}'
space = ' ' * len(s)
for i, d in enumerate(devices):
p = torch.cuda.get_device_properties(i)
s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / 1024 ** 2}MB)\n"
else:
s += 'CPU\n'
logger.info(s.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else s)
return torch.device('cuda:0' if cuda else 'cpu')