yoloxmosaic增强2

最新推荐文章于 2024-01-15 16:13:40 发布

陈平安|

最新推荐文章于 2024-01-15 16:13:40 发布

阅读量368

点赞数

分类专栏： yolox 文章标签： pytorch

本文链接：https://blog.csdn.net/weixin_43361491/article/details/120100615

版权

yolox 专栏收录该内容

3 篇文章 2 订阅

订阅专栏

import torch
import xml.etree.ElementTree as ET
import os
import cv2
import numpy as np
from torchvision import transforms
import random


def get_mosaic_coordinate(mosaic_image, mosaic_index, xc, yc, w, h, input_h, input_w):
    # TODO update doc
    # index0 to top left part of image
    #
    if mosaic_index == 0:
        x1, y1, x2, y2 = max(xc - w, 0), max(yc - h, 0), xc, yc
        small_coord = w - (x2 - x1), h - (y2 - y1), w, h   # 记录是否边界，若超出moosaic底板，超出部分裁剪
    # index1 to top right part of image
    elif mosaic_index == 1:
        x1, y1, x2, y2 = xc, max(yc - h, 0), min(xc + w, input_w * 2), yc
        small_coord = 0, h - (y2 - y1), min(w, x2 - x1), h
    # index2 to bottom left part of image
    elif mosaic_index == 2:
        x1, y1, x2, y2 = max(xc - w, 0), yc, xc, min(input_h * 2, yc + h)
        small_coord = w - (x2 - x1), 0, w, min(y2 - y1, h)
    # index2 to bottom right part of image
    elif mosaic_index == 3:
        x1, y1, x2, y2 = xc, yc, min(xc + w, input_w * 2), min(input_h * 2, yc + h)  # noqa
        small_coord = 0, 0, min(w, x2 - x1), min(y2 - y1, h)
    return (x1, y1, x2, y2), small_coord




class VOCDataset(torch.utils.data.Dataset):

    CLASSES_NAME = (
        "__background__ ",                 # 记得加上背景类
        "aeroplane",
        "bicycle",
        "bird",
        "boat",
        "bottle",
        "bus",
        "car",
        "cat",
        "chair",
        "cow",
        "diningtable",
        "dog",
        "horse",
        "motorbike",
        "person",
        "pottedplant",
        "sheep",
        "sofa",
        "train",
        "tvmonitor",
    )
    # 初始化类
    def __init__(self, root_dir, resize_size=[800, 1024], split='test', use_difficult=False):

        self.root = root_dir
        self.use_difficult = use_difficult
        self.imgset = split

        self._annopath = os.path.join(self.root, "Annotations", "%s.xml")
        self._imgpath = os.path.join(self.root, "JPEGImages", "%s.jpg")
        self._imgsetpath = os.path.join(self.root, "ImageSets", "Main", "%s.txt")

        # 读取trainval.txt中内容
        with open(self._imgsetpath % self.imgset) as f:     # % 是python字符串中的一个转义字符可以百度下，不难
            self.img_ids = f.readlines()
        self.img_ids = [x.strip() for x in self.img_ids]    # ['000009', '000052']

        self.name2id = dict(zip(VOCDataset.CLASSES_NAME, range(len(VOCDataset.CLASSES_NAME))))
        # self.resize_size = resize_size
        self.mean = [0.485, 0.456, 0.406]      # voc数据集中所有图像矩阵的均值和方差，为后续图像归一化做准备
        self.std = [0.229, 0.224, 0.225]
        print("INFO=====>voc dataset init finished  ! !")

    def __len__(self):
        return len(self.img_ids)

    def _read_img_rgb(self, path):
        return cv2.cvtColor(cv2.imread(path)[...,::-1], cv2.COLOR_BGR2RGB)

    def __getitem__(self, item):

        input_w, input_h = 640, 640
        # yc = int(random.uniform(0.5 * input_h, 1.5 * input_h))
        yc = 582
        # xc = int(random.uniform(0.5 * input_w, 1.5 * input_w))
        xc = 587

        # indices = [item] + [random.randint(0, len(self.img_ids) - 1) for _ in range(3)]
        indices = [1, 1058, 1430, 1187]

        for i_mosaic, index in enumerate(indices):
            img_id = self.img_ids[index]
            img = self._read_img_rgb(self._imgpath % img_id)
            h0, w0 = img.shape[:2]  # orig hw
            cv2.imwrite("orial_"+str(i_mosaic)+".jpg", img)
            scale = min(1. * input_h / h0, 1. * input_w / w0)  # 按照对应边比例最小的进行缩放
            img = cv2.resize(
                img, (int(w0 * scale), int(h0 * scale)), interpolation=cv2.INTER_LINEAR
            )
            cv2.imwrite("resize_"+str(i_mosaic)+".jpg", img)

            (h, w, c) = img.shape
            if i_mosaic == 0:
                # 基于114创建一个 2* input_h, 2*input_w的灰色底板
                mosaic_img = np.full((input_h * 2, input_w * 2, c), 114, dtype=np.uint8)

            (l_x1, l_y1, l_x2, l_y2), (s_x1, s_y1, s_x2, s_y2) = get_mosaic_coordinate(
                mosaic_img, i_mosaic, xc, yc, w, h, input_h, input_w
            )
            # 将 （s_x1, s_y1, s_x2, s_y2）部分对应图像拼接到mosaic底板上
            print("~~~~~~~~~~~~mosaic_img")
            print(mosaic_img[l_y1:l_y2, l_x1:l_x2].shape)
            print("~~~~~~~~~~~~resize_img")
            print(img[s_y1:s_y2, s_x1:s_x2].shape)
            mosaic_img[l_y1:l_y2, l_x1:l_x2] = img[s_y1:s_y2, s_x1:s_x2]
            # padw, padh = l_x1 - s_x1, l_y1 - s_y1
        # cv2.imshow("mosaic_img", mosaic_img)
        cv2.imwrite("mosaic.jpg",mosaic_img)
        cv2.waitKey()

        anno  = ET.parse(self._annopath % img_id).getroot()  # 读取xml文档的根节点
        boxes = []
        classes = []

        for obj in anno.iter("object"):
            difficult = int(obj.find("difficult").text) == 1
            if not self.use_difficult and difficult:
                continue
            _box = obj.find("bndbox")
            box = [
                _box.find("xmin").text,
                _box.find("ymin").text,
                _box.find("xmax").text,
                _box.find("ymax").text,
            ]
            TO_REMOVE = 1                                  # 由于像素是网格存储，坐标2实质表示第一个像素格，所以-1
            box = tuple(
                map(lambda x: x - TO_REMOVE, list(map(float, box)))
            )
            boxes.append(box)

            name = obj.find("name").text.lower().strip()
            classes.append(self.name2id[name])             # 将类别映射回去

        boxes = np.array(boxes, dtype=np.float32)

        #将img,box和classes转成tensor
        img = transforms.ToTensor()(img)    # transforms 自动将 图像进行了归一化，
        boxes = torch.from_numpy(boxes)
        classes = torch.LongTensor(classes)

        return img, boxes, classes
if __name__ == '__main__':
    dataset = VOCDataset('D:\Py_code\Mosaic\data\VOC07_test\VOCdevkit\VOC2007') # 实例化一个对象
    img,box,cls = dataset[1]          # 返回第一张图像及box和对应的类别

    print(img.shape)
    print(box)
    print(cls)

    # 这里简单做一下可视化
    # # 由于opencv读入是矩阵，而img现在是tensor，因此，首先将tensor转成numpy.array
    # img_ = (img.numpy()*255).astype(np.uint8).transpose(1,2,0)# 注意由于图像像素分布0-255，所以转成uint8
    # # cv2.imshow('test',img_)
    # # cv2.waitKey(0)

原始4张图片