pytorch----retinaface(data文件)

data

文件目录
在这里插入图片描述
FDDB文件存放fddb数据集:
这里不做说明。
config.py:

# config.py

cfg_mnet = {
    'name': 'mobilenet0.25',
    'min_sizes': [[16, 32], [64, 128], [256, 512]],
    'steps': [8, 16, 32],##步幅与论文中有所不同
    'variance': [0.1, 0.2],##方差
    'clip': False,##梯度消失和梯度爆炸
    'loc_weight': 2.0,
    'gpu_train': True,
    'batch_size': 32,#批大小
    'ngpu': 1,
    'epoch': 250,#单次epoch的迭代次数减少,提高运行速度。(单次epoch=(全部训练样本/batchsize)/iteration=1)
    'decay1': 190,
    'decay2': 220,#衰变
    'image_size': 640,
    'pretrain': True,
    'return_layers': {'stage1': 1, 'stage2': 2, 'stage3': 3},
    'in_channel': 32,#输入通道
    'out_channel': 64#输出通道
}

cfg_re50 = {
    'name': 'Resnet50',
    'min_sizes': [[16, 32], [64, 128], [256, 512]],
    'steps': [8, 16, 32],
    'variance': [0.1, 0.2],
    'clip': False,
    'loc_weight': 2.0,
    'gpu_train': True,
    'batch_size': 24,
    'ngpu': 4,
    'epoch': 100,
    'decay1': 70,
    'decay2': 90,
    'image_size': 840,
    'pretrain': True,
    'return_layers': {'layer2': 1, 'layer3': 2, 'layer4': 3},
    'in_channel': 256,
    'out_channel': 256
}

这个文件是一些网络配置与超参数。
data_augment.py

import cv2
import numpy as np
import random
from utils.box_utils import matrix_iof
"""
训练过程中如果要做到多张图片一起训练需要保持每张图片的大小一致,且与网络的输入层尺寸一致,即训练过程中所有图片的大小均为640×640×3。
"""

def _crop(image, boxes, labels, landm, img_dim):
    height, width, _ = image.shape
    pad_image_flag = True

    for _ in range(250):#最大重复裁剪250次,直到某次裁剪合格
        """
        if random.uniform(0, 1) <= 0.2:
            scale = 1.0
        else:
            scale = random.uniform(0.3, 1.0)
        训练数据集的准备引入了数据增强的策略,对于图片做不同尺度的缩放,图片的基准尺寸用的是网络的输入大小640,首先将输入图片较短的维度缩放成基础尺寸640, 在此基础上根据PRE_SCALES = [0.3, 0.45, 0.6, 0.8, 1.0]再进行缩放,每张图片都会随机匹配一个PRE_SCALE,将图像短边缩放成640 / PRE_SCALE, 即图像的短边尺寸的取值包括[640, 800, 1067, 1422, 2133]
        """
        PRE_SCALES = [0.3, 0.45, 0.6, 0.8, 1.0]
        scale = random.choice(PRE_SCALES)
        short_side = min(width, height)
        w = int(scale * short_side)
        h = w

        if width == w:
            l = 0
        else:
            l = random.randrange(width - w)
        if height == h:
            t = 0
        else:
            t = random.randrange(height - h)
        roi = np.array((l, t, l + w, t + h))#剪裁后的roi

        value = matrix_iof(boxes, roi[np.newaxis])
        flag = (value >= 1)
        if not flag.any():
            continue

        ## 仅保留GT中心在img_n的img_n,若没有,则重新裁剪
        centers = (boxes[:, :2] + boxes[:, 2:]) / 2#求中心
        mask_a = np.logical_and(roi[:2] < centers, centers < roi[2:]).all(axis=1)
        boxes_t = boxes[mask_a].copy()
        labels_t = labels[mask_a].copy()
        landms_t = landm[mask_a].copy()
        landms_t = landms_t.reshape([-1, 5, 2])

        if boxes_t.shape[0] == 0:
            continue
        # 获取img_t的像素信息,注意height是第一维
        image_t = image[roi[1]:roi[3], roi[0]:roi[2]]
        # 对GT的坐标重新限定,主要是因为边界问题
        boxes_t[:, :2] = np.maximum(boxes_t[:, :2], roi[:2])
        boxes_t[:, :2] -= roi[:2]
        boxes_t[:, 2:] = np.minimum(boxes_t[:, 2:], roi[2:])
        boxes_t[:, 2:] -= roi[:2]

        # landm
        landms_t[:, :, :2] = landms_t[:, :, :2] - roi[:2]
        landms_t[:, :, :2] = np.maximum(landms_t[:, :, :2], np.array([0, 0]))
        landms_t[:, :, :2] = np.minimum(landms_t[:, :, :2], roi[2:] - roi[:2])
        landms_t = landms_t.reshape([-1, 10])


	# make sure that the cropped image contains at least one face > 16 pixel at training image scale
        #确保裁剪后的图像在训练图像比例上至少包含一个大于16像素的面
        b_w_t = (boxes_t[:, 2] - boxes_t[:, 0] + 1) / w * img_dim
        b_h_t = (boxes_t[:, 3] - boxes_t[:, 1] + 1) / h * img_dim
        mask_b = np.minimum(b_w_t, b_h_t) > 0.0
        boxes_t = boxes_t[mask_b]
        labels_t = labels_t[mask_b]
        landms_t = landms_t[mask_b]

        if boxes_t.shape[0] == 0:
            continue

        pad_image_flag = False

        return image_t, boxes_t, labels_t, landms_t, pad_image_flag
    return image, boxes, labels, landm, pad_image_flag

# 亮度对比度在RGB空间调整,色相饱和度在HSV空间调整,都是以0.5的概率
def _distort(image):

    def _convert(image, alpha=1, beta=0):
        tmp = image.astype(float) * alpha + beta
        tmp[tmp < 0] = 0
        tmp[tmp > 255] = 255
        image[:] = tmp

    image = image.copy()

    if random.randrange(2):

        #brightness distortion
        if random.randrange(2):
            _convert(image, beta=random.uniform(-32, 32))

        #contrast distortion
        if random.randrange(2):
            _convert(image, alpha=random.uniform(0.5, 1.5))

        image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

        #saturation distortion
        if random.randrange(2):
            _convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))

        #hue distortion
        if random.randrange(2):
            tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
            tmp %= 180
            image[:, :, 0] = tmp

        image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)

    else:

        #brightness distortion
        if random.randrange(2):
            _convert(image, beta=random.uniform(-32, 32))

        image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

        #saturation distortion
        if random.randrange(2):
            _convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))

        #hue distortion
        if random.randrange(2):
            tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
            tmp %= 180
            image[:, :, 0] = tmp

        image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)

        #contrast distortion
        if random.randrange(2):
            _convert(image, alpha=random.uniform(0.5, 1.5))

    return image

# 扩展图片,以p的概率,caffe中p=0.5,pytorch中p=0.6
def _expand(image, boxes, fill, p):
    if random.randrange(2):
        return image, boxes

    height, width, depth = image.shape#获得图片信息

    scale = random.uniform(1, p)#将随机生成下一个实数,它在 [x, y] 范围内
    w = int(scale * width)
    h = int(scale * height)
    ## 随机生成左上角的点的坐标
    left = random.randint(0, w - width)
    top = random.randint(0, h - height)
    # 对GT的坐标的调整
    boxes_t = boxes.copy()
    boxes_t[:, :2] += (left, top)
    boxes_t[:, 2:] += (left, top)
    ## 扩展后的图像,和原图重叠部分原像素填充;其他部分填充均值,因为后续需要减去均值,所以等价于0填充,即为黑边
    expand_image = np.empty(
        (h, w, depth),
        dtype=image.dtype)
    expand_image[:, :] = fill
    expand_image[top:top + height, left:left + width] = image
    image = expand_image

    return image, boxes_t

# 以0.5的概率水平翻转,返回处理后的图片和GT信息,landms
def _mirror(image, boxes, landms):
    _, width, _ = image.shape
    if random.randrange(2):
        image = image[:, ::-1]
        boxes = boxes.copy()
        boxes[:, 0::2] = width - boxes[:, 2::-2]

        # landm
        landms = landms.copy()
        landms = landms.reshape([-1, 5, 2])
        landms[:, :, 0] = width - landms[:, :, 0]
        tmp = landms[:, 1, :].copy()
        landms[:, 1, :] = landms[:, 0, :]
        landms[:, 0, :] = tmp
        tmp1 = landms[:, 4, :].copy()
        landms[:, 4, :] = landms[:, 3, :]
        landms[:, 3, :] = tmp1
        landms = landms.reshape([-1, 10])

    return image, boxes, landms


def _pad_to_square(image, rgb_mean, pad_image_flag):
    if not pad_image_flag:
        return image
    height, width, _ = image.shape
    long_side = max(width, height)
    image_t = np.empty((long_side, long_side, 3), dtype=image.dtype)
    image_t[:, :] = rgb_mean
    image_t[0:0 + height, 0:0 + width] = image
    return image_t

# 随机选择一种resize方式,进行resize,并将channel维度调到第一维
def _resize_subtract_mean(image, insize, rgb_mean):
    interp_methods = [cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_NEAREST, cv2.INTER_LANCZOS4]
    interp_method = interp_methods[random.randrange(5)]
    image = cv2.resize(image, (insize, insize), interpolation=interp_method)
    image = image.astype(np.float32)
    image -= rgb_mean
    return image.transpose(2, 0, 1)

# 数据增强类
class preproc(object):

    def __init__(self, img_dim, rgb_means):
        self.img_dim = img_dim
        self.rgb_means = rgb_means

    def __call__(self, image, targets):
        assert targets.shape[0] > 0, "this image does not have gt"
        # 下面的代码段实现拷贝作用,备份。
        boxes = targets[:, :4].copy()
        labels = targets[:, -1].copy()
        landm = targets[:, 4:-1].copy()

        # 数据增强部分
        image_t, boxes_t, labels_t, landm_t, pad_image_flag = _crop(image, boxes, labels, landm, self.img_dim)#剪裁
        image_t = _distort(image_t)# 亮度对比度色相饱和度等属性调整
        image_t = _pad_to_square(image_t,self.rgb_means, pad_image_flag)#裁剪后再扩展
        image_t, boxes_t, landm_t = _mirror(image_t, boxes_t, landm_t)# 水平翻转
        height, width, _ = image_t.shape
        image_t = _resize_subtract_mean(image_t, self.img_dim, self.rgb_means)# aug后的img进行resize并减去均值
        boxes_t[:, 0::2] /= width
        boxes_t[:, 1::2] /= height

        landm_t[:, 0::2] /= width
        landm_t[:, 1::2] /= height

        #np.expand_dims:用于扩展数组的形状
        labels_t = np.expand_dims(labels_t, 1)
        #np.hstack():在水平方向上平铺
        targets_t = np.hstack((boxes_t, landm_t, labels_t))#整合targets信息

        return image_t, targets_t

wider_face.py

import os
import os.path
import sys
import torch
import torch.utils.data as data
import cv2
import numpy as np
#封装数据集
class WiderFaceDetection(data.Dataset):
    def __init__(self, txt_path, preproc=None):
        self.preproc = preproc
        self.imgs_path = []
        self.words = []
        f = open(txt_path,'r')
        lines = f.readlines()
        isFirst = True
        labels = []
        for line in lines:
            line = line.rstrip()
            if line.startswith('#'):
                if isFirst is True:
                    isFirst = False
                else:
                    labels_copy = labels.copy()
                    self.words.append(labels_copy)
                    labels.clear()
                path = line[2:]
                path = txt_path.replace('label.txt','images/') + path
                self.imgs_path.append(path)
            else:
                line = line.split(' ')
                label = [float(x) for x in line]
                labels.append(label)

        self.words.append(labels)

    def __len__(self):
        return len(self.imgs_path)

    def __getitem__(self, index):
        img = cv2.imread(self.imgs_path[index])
        height, width, _ = img.shape

        labels = self.words[index]
        annotations = np.zeros((0, 15))
        if len(labels) == 0:
            return annotations
        for idx, label in enumerate(labels):
            annotation = np.zeros((1, 15))
            # bbox
            annotation[0, 0] = label[0]  # x1
            annotation[0, 1] = label[1]  # y1
            annotation[0, 2] = label[0] + label[2]  # x2
            annotation[0, 3] = label[1] + label[3]  # y2

            # landmarks
            annotation[0, 4] = label[4]    # l0_x
            annotation[0, 5] = label[5]    # l0_y
            annotation[0, 6] = label[7]    # l1_x
            annotation[0, 7] = label[8]    # l1_y
            annotation[0, 8] = label[10]   # l2_x
            annotation[0, 9] = label[11]   # l2_y
            annotation[0, 10] = label[13]  # l3_x
            annotation[0, 11] = label[14]  # l3_y
            annotation[0, 12] = label[16]  # l4_x
            annotation[0, 13] = label[17]  # l4_y
            if (annotation[0, 4]<0):
                annotation[0, 14] = -1
            else:
                annotation[0, 14] = 1

            annotations = np.append(annotations, annotation, axis=0)#为原始array添加一些values
        target = np.array(annotations)
        if self.preproc is not None:
            img, target = self.preproc(img, target)

        return torch.from_numpy(img), target

def detection_collate(batch):
    """Custom collate fn for dealing with batches of images that have a different
    number of associated object annotations (bounding boxes).
    自定义处理在同一个batch,含有不同数量的目标框的情况

    Arguments:
        batch: (tuple) A tuple of tensor images and lists of annotations

    Return:
        A tuple containing:
            1) (tensor) batch of images stacked on their 0 dim
            2) (list of tensors) annotations for a given image are stacked on 0 dim
    """
    targets = []
    imgs = []
    for _, sample in enumerate(batch):
        for _, tup in enumerate(sample):
            if torch.is_tensor(tup):
                imgs.append(tup)
            elif isinstance(tup, type(np.empty(0))):#isinstance() 函数来判断一个对象是否是一个已知的类型
                annos = torch.from_numpy(tup).float()
                targets.append(annos)

    return (torch.stack(imgs, 0), targets)

  • 2
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
好的,关于 Pytorch-RetinaFace 的代码讲解,我可以给您提供以下信息: 1. RetinaFace 模型介绍 RetinaFace 是一个用于人脸检测和人脸识别的深度学习模型,其核心思想是在多个尺度下进行特征提取,并利用类似于 Anchor 的方式进行边框预测。RetinaFace 模型的核心网络结构为 ResNet-50,通过 ResNet-50 提取特征,再分别在不同的特征图上进行预测,从而提高检测的准确率和速度。 2. Pytorch-RetinaFace 代码结构 Pytorch-RetinaFace 的代码结构主要包含以下几个文件: - data/ 目录:包含了数据集相关的代码和数据集文件; - models/ 目录:包含了 RetinaFace 模型的代码和模型文件; - layers/ 目录:包含了 RetinaFace 模型中用到的自定义层代码; - utils/ 目录:包含了一些工具类和函数; - train.py:训练脚本; - test.py:测试脚本; - demo.py:演示脚本。 3. RetinaFace 模型训练 RetinaFace 模型的训练主要包含以下几个步骤: - 数据集准备:将数据集按照指定格式进行划分和预处理; - 模型构建:使用 Pytorch 搭建 RetinaFace 模型,并定义损失函数和优化器; - 模型训练:使用训练集对模型进行训练,并在验证集上进行验证和调参; - 模型保存:将训练好的模型保存到指定的路径。 4. RetinaFace 模型测试 RetinaFace 模型的测试主要包含以下几个步骤: - 加载模型:使用 Pytorch 加载训练好的模型; - 图像预处理:将待检测的图像进行预处理,包括大小调整和归一化等; - 特征提取:使用 ResNet-50 提取图像的特征; - 预测边框:在不同的特征图上进行边框预测,并进行 NMS 处理; - 绘制结果:将预测出的边框和置信度绘制在原图上。 以上就是关于 Pytorch-RetinaFace 代码的讲解,希望能够对您有所帮助。
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值