重写yolo数据加载模块

原因

 

        yolo原本的Dataload可阅读性太差,可拓展性也很差。另一个为了熟悉源代码。

class DataFolder(VisionDataset):
    def __init__(
            self,
            root: str,
            img_size=640,
            batch_size=32,
            augment=True,
            hyp=None,
            transform: Optional[Callable] = None,
            target_transform: Optional[Callable] = None
    ) -> None:
        super().__init__(root, transform=transform, target_transform=target_transform)
        self.sample = {}
        self.findLabelAndImg(root, self.sample)
        # 创建索引
        self.indices = range(len(self.sample['filePath']))
        self.img_size = img_size
        self.batch_size = batch_size
        # 超参数
        self.hyp = hyp
        self.augment = augment
        # 增强
        self.mosaic = True
        self.mixup = True
        self.albumentations = Albumentations(size=img_size) if augment else None

    def findLabelAndImg(self, root, sample):
        try:
            f = []  # image files
            p = Path(root)
            if p.is_dir():  # dir
                f += glob.glob(str(p / '**' / '*.*'), recursive=True)
            im_files = sorted(x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in IMG_FORMATS)
            assert im_files, f'No images found'
        except Exception as e:
            raise Exception(f'Error loading data from {root}: {e}') from e
        sample['filePath'] = f
        sample['labelPath'] = img2label_paths(im_files)


    def _load_image(self, id: int):
        f = self.sample['filePath'][id]
        im = cv2.imread(f)
        return im, im.shape[:2]  # BGR

    def _load_target(self, id: int):
        lb_file = self.sample['labelPath'][id]
        if os.path.isfile(lb_file):
            nf = 1  # label found
            with open(lb_file) as f:
                lb = [x.split() for x in f.read().strip().splitlines() if len(x)]
                lb = np.array(lb, dtype=np.float32)
            nl = len(lb)
            if nl:
                assert lb.shape[1] == 5, f'labels require 5 columns, {lb.shape[1]} columns detected'
                assert (lb >= 0).all(), f'negative label values {lb[lb < 0]}'
                assert (lb[:, 1:] <= 1).all(), f'non-normalized or out of bounds coordinates {lb[:, 1:][lb[:, 1:] > 1]}'
            else:
                ne = 1  # label empty
                lb = np.zeros((0, 5), dtype=np.float32)
        else:
            print(f'未发现label{lb_file}')
            lb = np.zeros((0, 5), dtype=np.float32)
        return lb

    def __getitem__(self, index: int) -> Tuple[Any, Any]:
        id = self.indices[index]
        hyp = self.hyp
        img, (h, w) = self._load_image(id)
        img, ratio, pad = letterbox(img, self.img_size, auto=False, scaleup=self.augment)
        labels = self._load_target(index)
        if labels.size:  # normalized xywh to pixel xyxy format
            labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1])
        if self.augment:
                img, labels = random_perspective(img,
                                                 labels,
                                                 degrees=hyp['degrees'],
                                                 translate=hyp['translate'],
                                                 scale=hyp['scale'],
                                                 shear=hyp['shear'],
                                                 perspective=hyp['perspective'])

        nl = len(labels)  # number of labels
        # xyxy 坐标转换为 xywh坐标
        if nl:
            labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1E-3)
        if self.augment:
            # Albumentations
            img, labels = self.albumentations(img, labels)
            nl = len(labels)  # update after albumentations

            # HSV color-space
            augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])

            # Flip up-down
            if random.random() < hyp['flipud']:
                img = np.flipud(img)
                if nl:
                    labels[:, 2] = 1 - labels[:, 2]

            # Flip left-right
            if random.random() < hyp['fliplr']:
                img = np.fliplr(img)
                if nl:
                    labels[:, 1] = 1 - labels[:, 1]

            # Cutouts
            # labels = cutout(img, labels, p=0.5)
            # nl = len(labels)  # update after cutout
        return img, labels

    def __len__(self) -> int:
        return len(self.indices)

定义一个类:

class DataFolder(VisionDataset):

类的介绍:

    """
    Base Class For making datasets which are compatible with torchvision.
    It is necessary to override the ``__getitem__`` and ``__len__`` method.

    Args:
        root (string): Root directory of dataset.
        transforms (callable, optional): A function/transforms that takes in
            an image and a label and returns the transformed versions of both.
        transform (callable, optional): A function/transform that  takes in an PIL image
            and returns a transformed version. E.g, ``transforms.RandomCrop``
        target_transform (callable, optional): A function/transform that takes in the
            target and transforms it.

    .. note::

        :attr:`transforms` and the combination of :attr:`transform` and :attr:`target_transform` are mutually exclusive.
    """

具体的可以看看,我另外一篇关于这个类的源码阅读。这个类我觉得相当于定义了一种编码规范

实现

        

class DataFolder(VisionDataset):
    def __init__(
            self,
            root: str,
            transform: Optional[Callable] = None,
            target_transform: Optional[Callable] = None,
            cache=False,
    ) -> None:
        super().__init__(root, transform=transform, target_transform=target_transform)
        self.im_files, self.label_files = find_img_labels(self.root)
        self.samples = [list(x) + [Path(x[0]).with_suffix('.npy'), None] for x in zip(self.im_files, self.label_files)]
        self.index = range(len(self.label_files))
        self.cache_ram = cache is True or cache == 'ram'
        self.cache_disk = cache == 'disk'

    def _load_image(self, id: int):
        f, t, fn, im = self.samples[id]
        if self.cache_ram and im is None:
            im = cv2.imread(f)
        elif self.cache_disk:
            if not fn.exists():  # load npy
                np.save(fn.as_posix(), cv2.imread(f))
            im = np.load(fn)
        else:  # read image
            im = cv2.imread(f)  # BGR
        return im

    def _load_target(self, id: int):
        f, t, fn, im = self.samples[id]
        if os.path.isfile(t):
            with open(t) as f:
                lb = [x.split() for x in f.read().strip().splitlines() if len(x)]
                lb = np.array(lb, dtype=np.float32)
        else:
            raise FileNotFoundError(f'{t} does not exist')
        return lb

    def __getitem__(self, index: int) -> Tuple[Any, Any]:
        id = self.index[index]
        image = self._load_image(id)
        target = self._load_target(id)

        if self.transforms is not None:
            image, target = self.transforms(image, target)

        return image, target

总的思路: 读取图片地址,label地址,指定索引。然后定义两个文件,label加载器就行了。

效果:

mosaic:

self.mosaic = True

初始化方法加一个

        if self.mosaic:
            image, target = self.load_mosaic(id, self.img_size)
__getitem__ 加一句
load_mosaic:yolo源码拷贝下来就行了,只需要改一下加载图片和标签

 mixup:

同样拷贝下来源码就可以了

 总结

其他一些增强的方法,都差不多。主要就是索引处理,标签和图片加载。性能肯定比yolo原生的差多了,yolo原生标签,图片在初始化就统一处理了,然后缓存。所以代码量非常大,可阅读性很差。另外torchvision.datasets这个包提供了一套编码规范逻辑很清晰。初学的话,照这个来,我觉得是比较好的选择。

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值