YOLOv5代码阅读笔记

最新推荐文章于 2024-07-29 10:39:14 发布

ctrlCV女壮士

最新推荐文章于 2024-07-29 10:39:14 发布

阅读量1.9k

点赞数

分类专栏：代码阅读笔记文章标签： python 深度学习

原文链接：https://blog.csdn.net/weixin_41153216/article/details/106924348

版权

代码阅读笔记专栏收录该内容

1 篇文章 0 订阅

订阅专栏

YOLOv5代码阅读笔记

1.train.py
2. dataset.py 中的数据增强
- 2.1 mosaic数据增强 def load_mosaic(self, index)
- 2.2 色彩空间HSV增强
3. 模型文件 YOLO.py
- - 主要讲解class Model
4. utils.py中的损失函数
- - 文章引用

1.train.py

该部分主要讲解train.py中的train(hyp)函数。

1.1 设置日志文件输出位置、名称等

log_dir = tb_writer.log_dir if tb_writer else 'runs-yolov5x/evolution'  # run directory
wdir = str(Path(log_dir) / 'weights') + os.sep  # weights directory

os.makedirs(wdir, exist_ok=True)
last = wdir + 'last.pt'
best = wdir + 'best.pt'
results_file = log_dir + os.sep + 'results.txt'

# Save run settings
with open(Path(log_dir) / 'hyp.yaml', 'w') as f:
    yaml.dump(hyp, f, sort_keys=False)
with open(Path(log_dir) / 'opt.yaml', 'w') as f:
    yaml.dump(vars(opt), f, sort_keys=False)

1.2 载入图片、标签

# Configure
init_seeds(1)
with open(opt.data) as f:
    data_dict = yaml.load(f, Loader=yaml.FullLoader)  # model dict
train_path = data_dict['train']
test_path = data_dict['val']
nc, names = (1, ['item']) if opt.single_cls else (int(data_dict['nc']), data_dict['names'])  # number classes, names
assert len(names) == nc, '%g names found for nc=%g dataset in %s' % (len(names), nc, opt.data)  # check

1.3 确定图片训练的尺寸

# Image sizes
gs = int(max(model.stride))  # grid size (max stride)
imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size]  # verify imgsz are gs-multiples

1.4 判断参数的形式为weight或bias，定义模型不同部分，并设置相应的学习率和参数。

# Optimizer
nbs = 64  # nominal batch size
accumulate = max(round(nbs / batch_size), 1)  # accumulate loss before optimizing
hyp['weight_decay'] *= batch_size * accumulate / nbs  # scale weight_decay
pg0, pg1, pg2 = [], [], []  # optimizer parameter groups
for k, v in model.named_parameters():
    if v.requires_grad:
        if '.bias' in k:
            pg2.append(v)  # biases
        elif '.weight' in k and '.bn' not in k:
            pg1.append(v)  # apply weight decay
        else:
            pg0.append(v)  # all else

if hyp['optimizer'] == 'adam':  # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR
    optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999))  # adjust beta1 to momentum
else:
    optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)

optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']})  # add pg1 with weight_decay
optimizer.add_param_group({'params': pg2})  # add pg2 (biases)
print('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0)))
del pg0, pg1, pg2

1.5 设置学习率的变化方式，这里采用cosine 学习率趋势

因为学习率周期性变化可以解决陷入鞍点的问题。

# Scheduler https://arxiv.org/pdf/1812.01187.pdf
lf = lambda x: (((1 + math.cos(x * math.pi / epochs)) / 2) ** 1.0) * 0.9 + 0.1  # cosine
scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)

1.6 定义训练集和验证集的数据读取方式

有博主说区别在于：testloader使用的是测试图片，保留了原来的宽长比（矩形输入），没有数据增强和mosaic过程。
但是此代码中好像并未体现？

# Trainloader
dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt,
                                        hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect)
mlc = np.concatenate(dataset.labels, 0)[:, 0].max()  # max label class
nb = len(dataloader)  # number of batches
assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Correct your labels or your model.' % (mlc, nc, opt.cfg)

# Testloader
testloader = create_dataloader(test_path, imgsz_test, batch_size, gs, opt,
                               hyp=hyp, augment=False, cache=opt.cache_images, rect=True)[0]

1.7 统计样本分布

# Class frequency
labels = np.concatenate(dataset.labels, 0)
c = torch.tensor(labels[:, 0])  # classes
# cf = torch.bincount(c.long(), minlength=nc) + 1.
# model._initialize_biases(cf.to(device))
plot_labels(labels, save_dir=log_dir)
if tb_writer:
    # tb_writer.add_hparams(hyp, {})  # causes duplicate https://github.com/ultralytics/yolov5/pull/384
    tb_writer.add_histogram('classes', c, 0)

1.8 获取anchor

    # Check anchors
    if not opt.noautoanchor:
        check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz)

2. dataset.py 中的数据增强

2.1 mosaic数据增强 def load_mosaic(self, index)

step1.
随机挑选4张图构成一个大图

self.mosaic = self.augment and not self.rect  # load 4 images at a time into a mosaic (only during training)

step2.
获取该大图的中心位置 yc, xc。

    labels4 = []
    s = self.img_size
    yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border]  # mosaic center x, y
    indices = [index] + [random.randint(0, len(self.labels) - 1) for _ in range(3)]  # 3 additional image indices

此处的mosaic_border 定义如下：（其中，该代码中的img_size设置为640）

 self.mosaic_border = [-img_size // 2, -img_size // 2]

step3.
再额外随机挑选3张图片，索引号为indices

    indices = [index] + [random.randint(0, len(self.labels) - 1) for _ in range(3)]  # 3 additional image indices

step4.
将4张图片按照左上、左下、右上、右下放置
问题： step3.中挑选了3张图，此处的另一张小图来自于哪里？

           # place img in img4
        if i == 0:  # top left
            img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
            x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
            x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
        elif i == 1:  # top right
            x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
            x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
        elif i == 2:  # bottom left
            x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
            x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, max(xc, w), min(y2a - y1a, h)
        elif i == 3:  # bottom right
            x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
            x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)

x1a, y1a, x2a, y2a代表小图在大图中的相对位置；
x1b, y1b, x2b, y2b代表小图本身的绝对位置；

step4.
小图中的坐标变换：

        img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
        padw = x1a - x1b
        padh = y1a - y1b

        # Labels
        x = self.labels[index]
        labels = x.copy()
        if x.size > 0:  # Normalized xywh to pixel xyxy format
            labels[:, 1] = w * (x[:, 1] - x[:, 3] / 2) + padw
            labels[:, 2] = h * (x[:, 2] - x[:, 4] / 2) + padh
            labels[:, 3] = w * (x[:, 1] + x[:, 3] / 2) + padw
            labels[:, 4] = h * (x[:, 2] + x[:, 4] / 2) + padh
        labels4.append(labels)

step5.
对生成的坐标进行clip

    # Concat/clip labels
    if len(labels4):
        labels4 = np.concatenate(labels4, 0)
        # np.clip(labels4[:, 1:] - s / 2, 0, s, out=labels4[:, 1:])  # use with center crop
        np.clip(labels4[:, 1:], 0, 2 * s, out=labels4[:, 1:])  # use with random_affine

step6.
对mosaic之后的大图进行旋转、缩放等操作。
假设图片尺寸为6406403，经过mosaic操作后的尺寸为128012803，经过旋转，缩放后尺寸又变为6406403，也就是输入网络的图片尺寸。

    # Augment
    # img4 = img4[s // 2: int(s * 1.5), s // 2:int(s * 1.5)]  # center crop (WARNING, requires box pruning)
    img4, labels4 = random_affine(img4, labels4,
                                  degrees=self.hyp['degrees'],
                                  translate=self.hyp['translate'],
                                  scale=self.hyp['scale'],
                                  shear=self.hyp['shear'],
                                  border=self.mosaic_border)  # border to remove

    return img4, labels4

2.2 色彩空间HSV增强

def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5):
    r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1  # random gains
    hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))
    dtype = img.dtype  # uint8

    x = np.arange(0, 256, dtype=np.int16)
    lut_hue = ((x * r[0]) % 180).astype(dtype)
    lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
    lut_val = np.clip(x * r[2], 0, 255).astype(dtype)

    img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype)
    cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)  # no return needed

3. 模型文件 YOLO.py

主要讲解class Model

其中比较重要的是计算输入和输出特征图之间的尺度比值 stride

        # Build strides, anchors
        m = self.model[-1]  # Detect()
        if isinstance(m, Detect):
            s = 128  # 2x min stride
            m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))])  # forward
            m.anchors /= m.stride.view(-1, 1, 1)
            check_anchor_order(m)
            self.stride = m.stride
            self._initialize_biases()  # only run once
            # print('Strides: %s' % m.stride.tolist())