centernet 数据处理

全息数据
已于 2024-04-02 18:41:58 修改
阅读量5
点赞数 1
分类专栏：图像算法深度学习 pytorch 文章标签：深度学习目标检测 YOLO
于 2023-01-06 19:00:20 首次发布
本文链接：https://blog.csdn.net/qq_23022733/article/details/128583618
版权
深度学习同时被 3 个专栏收录
71 篇文章 11 订阅
订阅专栏
图像算法
60 篇文章 4 订阅
订阅专栏
pytorch
6 篇文章 0 订阅
订阅专栏
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import json
import os, cv2
import random
from PIL import Image
import uuid
import copy

import torch.utils.data as data
import sys

from PIL import Image, ImageDraw

sys.path.append("../../")


class LuggageMulscaleAll(data.Dataset):
    # setting

    num_classes = 2  # 1->2
    class_name = ['__background__', 'pedestrain', 'luggage']  # 增加 'pedestrain'
    data_dir = "/home/wangdongbing/myproject/dataset/LuggageDataset"
    # data_dir = r"C:\Users\9ling\Desktop\luggage_centernet\lu"

    min_gt_size = [0, 0]  # [h,w]
    rand_wh_padding = [2.6, 1.5]  # [w,h] None to disable

    mean = np.array([0.40789654, 0.44719302, 0.47026115], dtype=np.float32).reshape(1, 1, 3)
    std = np.array([0.28863828, 0.27408164, 0.27809835], dtype=np.float32).reshape(1, 1, 3)

    def __init__(self, opt, split):
        super(LuggageMulscaleAll, self).__init__()

        self.max_objs = 0
        self.split = split
        self.opt = opt
        self.interpolation = "linear"

        # load data
        self.data = self.load_split_data()
        self.num_samples = len(self.data)
        print("max_objs", self.max_objs)
        print("data_dir", self.data_dir)
        print("rand_padding", self.rand_wh_padding)
        print(self.class_name, self.num_classes)

        if (self.interpolation == "nearest"):
            self.resize_inter = cv2.INTER_NEAREST
        elif (self.interpolation == "linear"):
            self.resize_inter = cv2.INTER_LINEAR
        print("resize interpolation: " + self.interpolation)

        # data agument setting
        self._data_rng = np.random.RandomState(123)
        self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571], dtype=np.float32)
        self._eig_vec = np.array([[-0.58752847, -0.69563484, 0.41340352],
                                  [-0.5832747, 0.00994535, -0.81221408],
                                  [-0.56089297, 0.71832671, 0.41158938]
                                  ], dtype=np.float32)
        self.color = [(0, 0, 0), (255, 0, 0), (0, 255, 0), (0, 0, 255), ]

        # use multiscale training
        self.default_resolution = [384, 384]  # [h,w], need to be multiple of 128
        self.min_size = self.default_resolution[0] - 32 * 1
        self.max_size = self.default_resolution[0] + 32 * 3
        # random.choice：随机选取一个元素并返回
        self.single_resolution = random.choice(range(self.min_size, self.max_size + 1, 32))
        self.default_resolution = [self.single_resolution, self.single_resolution]
        print("input size: ", self.default_resolution)

    def rand(self, a=0, b=1):
        return np.random.rand() * (b - a) + a

    def load_split_data(self):
        alldata = []
        if self.split == "train":
            subsets = os.listdir(os.path.join(self.data_dir, self.split))
            count = [0 for c in self.class_name]
            for s in subsets:
                s_dir = os.path.join(self.data_dir, self.split, s)
                data = json.load(open(os.path.join(s_dir, s + ".json")))["labels"]
                print("Loading subset " + s + ", #images: " + str(len(data)))

                filtered_data = []
                for i in range(len(data)):
                    # 更改每个feature的路径
                    data[i]["filename"] = os.path.join(s_dir, data[i]["filename"])
                    # 把"annotations"分为：pedestrain的标注，luggage的标注
                    # ped_anno:[{ }, { }]
                    ped_anno = [a for a in data[i]["annotations"] if a["class"] == "pedestrain"]  # 只包含行人的标注
                    # anno：[{ }, { }]
                    anno = [a for a in data[i]["annotations"] if a["class"] in self.class_name]  # 包含行人、行李箱的标注
                    lugga_anno = [a for a in data[i]["annotations"] if a["class"] == "luggage"]  # 只包含行李箱的标注

                    # todo 下面 3行是新加的
                    # lugg_anno = []
                    # for item in data[i]["annotations"]:
                    #     if "subclass" in item:
                    #         lugg_anno += item["subclass"]

                    # todo or -> and 因为需要训练纯背景图片，所以注释
                    # if len(lugg_anno) == 0 and len(ped_anno) == 0:
                    #     continue

                    del data[i]["annotations"]
                    if len(ped_anno) > 0:
                        data[i]["pedestrain_annos"] = ped_anno
                    data[i]["annos"] = anno
                    data[i]["luggage_annos"] = lugga_anno
                    # [{ "luggage_annos": [{ }, { }, ...], "pedestrain_annos": { }, "filename":... }, ...]
                    filtered_data.append(data[i])
                    for a in anno:
                        count[self.class_name.index(a["class"])] += 1  # 统计训练数据集中有多少个行李箱
                    self.max_objs = max(len(anno), self.max_objs)  # 统计所有图片中哪张图片的行李箱数最多
                # [{ "luggage_annos": [{ }, { }, ...], "pedestrain_annos": { }, "filename":... }, ...]
                alldata += filtered_data

            sstr = "#images: {}".format(len(alldata))  # train中有多少个标注的图片
            for i in range(1, len(self.class_name)):
                sstr += " ({} {})".format(self.class_name[i], count[i])
            print(sstr)
            print("max_objs", self.max_objs)
        elif self.split == "val" or self.split == "benchmark":
            # we only use has_luggage data (this testing dataset doesn't matter, it will be only used during
            # training - actually its just dummy)      test_dir = os.path.join(self.data_dir, "test",
            # "has_luggage")
            test_dir = os.path.join(self.data_dir, "test", "has_luggage")  #
            subsets = os.listdir(test_dir)
            alldata = []
            for s in subsets:
                json_file = os.path.join(test_dir, s, s + ".json")
                data = json.load(open(json_file))["labels"]
                for d in data:
                    d["filename"] = os.path.join(test_dir, s, d["filename"])  # 每张测试集图片的路径
                    d["pedestrain_annos"] = [a for a in d["annotations"] if a["class"] == "pedestrain"]
                    d["annos"] = [a for a in d["annotations"] if a["class"] in self.class_name]
                    d["luggage_annos"] = [a for a in d["annotations"] if a["class"] == "luggage"]  # 只包含行李箱的标注

                    # todo 下面3行也是新加的
                    # d["luggage_annos"] = []
                    # for item1 in d["annotations"]:
                    #     if "subclass" in item1:
                    #         d["luggage_annos"] += item1["subclass"]

                    # if len(d["luggage_annos"]) == 0:  # 因为需要训练纯背景图片，所以注释
                    #     continue
                    del d["annotations"]
                    alldata.append(d)
            print("#val images: " + str(len(alldata)))
        # 所有的验证集的标注
        return alldata

    def add_padding(self, im, anns):
        im_w, im_h = im.shape[1], im.shape[0]
        # self.rand_wh_padding:[2.6, 1.5]
        w_r = random.uniform(1.0, self.rand_wh_padding[0])
        h_r = random.uniform(1.0, self.rand_wh_padding[1])
        extra_w_half = int((im_w * w_r - im_w) * 0.5)  # 0-0.8*in_w
        extra_h_half = int((im_h * h_r - im_h) * 0.5)  # 0-0.25*in_h
        # cv2.copyMakeBorder： 在feature map设置边界框，分别在top、bottom、left、right添加像素
        # cv2.BORDER_CONSTANT：添加边框的像素值为常数，需要指定另外的参数value
        im = cv2.copyMakeBorder(im, 0, extra_h_half, extra_w_half, extra_w_half, cv2.BORDER_CONSTANT, None, value=0)
        if len(anns) > 0:
            for a in anns:
                a["x"] += extra_w_half
        return im, anns

    def crop_im_annos(self, data):
        pim = cv2.imdecode(np.fromfile(data["filename"]), 1)
        if pim is None:
            pim = cv2.imread(data["filename"], 1)
        anno = copy.deepcopy(data["annos"])
        height, width = pim.shape[0:2]

        if len(data["luggage_annos"]) > 0:  # 对有行李箱的图片进行crop
            ped_anno = copy.deepcopy(data["pedestrain_annos"])[0]
            luggage_anno = copy.deepcopy(data["luggage_annos"])[0]
            ped_box = [ped_anno["x"], ped_anno["y"], ped_anno["x"] + ped_anno["width"],
                       ped_anno["y"] + ped_anno["height"]]
            left_xcoord = max(0, ped_anno["x"] - 0.65 * ped_anno["width"])
            right_xcoord = min(width, ped_anno["x"] + ped_anno["width"] + 0.65 * ped_anno["width"])
            right_ycoord = min(height,
                               ped_anno["y"] + ped_anno["height"] + 0.05 * ped_anno["height"])
            res_pim = pim[int(ped_anno["y"]):int(right_ycoord), int(left_xcoord):int(right_xcoord)]

            for i, s_anno in enumerate(anno):
                s_anno["x1"] = s_anno["x"]
                s_anno["y1"] = s_anno["y"]
                s_anno["x2"] = s_anno["x"] + s_anno["width"]
                s_anno["y2"] = s_anno["y"] + s_anno["height"]

                s_anno["x1"] = max(0, s_anno["x1"] - left_xcoord)
                s_anno["y1"] = max(0, s_anno["y1"] - ped_anno["y"])
                s_anno["x2"] = min(right_xcoord - left_xcoord, s_anno["x2"] - left_xcoord)
                s_anno["y2"] = min(int(right_ycoord) - int(ped_anno["y"]), s_anno["y2"] - ped_anno["y"])

                g_anno_w = data["annos"][i]["width"]
                g_anno_h = data["annos"][i]["height"]
                s_anno_w = s_anno["x2"] - s_anno["x1"]
                s_anno_h = s_anno["y2"] - s_anno["y1"]

                # 判断crop图片后的物体与crop图片前的物体IoU，如果IoU<0.9则做进一步的处理
                if (s_anno_w * s_anno_h) / (g_anno_w * g_anno_h) > 0.9:
                    s_anno["x"] = s_anno["x1"]
                    s_anno["y"] = s_anno["y1"]
                    s_anno["width"] = s_anno["x2"] - s_anno["x1"]
                    s_anno["height"] = s_anno["y2"] - s_anno["y1"]
                else:
                    anno = copy.deepcopy(data["annos"])
                    if luggage_anno["x"] >= ped_anno["x"]:
                        for s_pb in anno:
                            ped_box[0] = max(0, min(ped_box[0], s_pb["x"]))
                            ped_box[1] = max(0, min(ped_box[1], s_pb["y"]))
                            ped_box[2] = min(pim.shape[1] - 1, max(ped_box[2], s_pb["x"] + s_pb["width"]))
                            ped_box[3] = min(pim.shape[0] - 1, max(ped_box[3], s_pb["y"] + s_pb["height"]))
                        ped_box = [int(b) for b in ped_box]
                        res_pim = pim[ped_box[1]:ped_box[3], int(left_xcoord):ped_box[2]]
                        for sa in anno:
                            sa["x"] -= left_xcoord
                            sa["y"] -= ped_box[1]
                        break
                    else:
                        for s_pb in anno:
                            ped_box[0] = max(0, min(ped_box[0], s_pb["x"]))
                            ped_box[1] = max(0, min(ped_box[1], s_pb["y"]))
                            ped_box[2] = min(pim.shape[1] - 1, max(ped_box[2], s_pb["x"] + s_pb["width"]))
                            ped_box[3] = min(pim.shape[0] - 1, max(ped_box[3], s_pb["y"] + s_pb["height"]))
                        ped_box = [int(b) for b in ped_box]
                        res_pim = pim[ped_box[1]:ped_box[3], ped_box[0]:int(right_xcoord)]
                        for sa in anno:
                            sa["x"] -= ped_box[0]
                            sa["y"] -= ped_box[1]
                        break

        else:
            if len(data["pedestrain_annos"]) > 0:
                ped_anno = copy.deepcopy(data["pedestrain_annos"])[0]
                ped_box = [ped_anno["x"], ped_anno["y"], ped_anno["x"] + ped_anno["width"] - 1,
                           ped_anno["y"] + ped_anno["height"] - 1]
                left_xcoord = max(0, ped_anno["x"] - 0.65 * ped_anno["width"])
                right_xcoord = min(width, ped_anno["x"] + ped_anno["width"] + 0.65 * ped_anno["width"])
                right_ycoord = min(height,
                                   ped_anno["y"] + ped_anno["height"] + 0.05 * ped_anno["height"])
                res_pim = pim[int(ped_anno["y"]):int(right_ycoord), int(left_xcoord):int(right_xcoord)]

                for s_anno in anno:
                    s_anno["x1"] = s_anno["x"]
                    s_anno["y1"] = s_anno["y"]
                    s_anno["x2"] = s_anno["x"] + s_anno["width"]
                    s_anno["y2"] = s_anno["y"] + s_anno["height"]

                    s_anno["x1"] = max(0, s_anno["x1"] - left_xcoord)
                    s_anno["y1"] = max(0, s_anno["y1"] - ped_anno["y"])
                    s_anno["x2"] = min(right_xcoord - left_xcoord, s_anno["x2"] - left_xcoord)
                    s_anno["y2"] = max(0, s_anno["y2"] - ped_anno["y"])

                    s_anno["x"] = s_anno["x1"]
                    s_anno["y"] = s_anno["y1"]
                    s_anno["width"] = s_anno["x2"] - s_anno["x1"]
                    s_anno["height"] = s_anno["y2"] - s_anno["y1"]

        return res_pim, anno

    def load_im_anno(self, index):
        jdata = self.data[index]  # 取出训练集的每张图片的标注
        im, anno = self.crop_im_annos(jdata)  # 1、对feature map进行裁剪
        if self.split == "train" and self.rand_wh_padding is not None:  # self.rand_wh_padding:[2.6, 1.5]
            im, anno = self.add_padding(im, anno)  # 2、再对feature map加border

        # only resize im and boxes when training
        yscale, xscale = 1.0, 1.0
        in_h, in_w = im.shape[0], im.shape[1]
        if self.split in ["train", "val"]:
            in_h, in_w = self.default_resolution  # [384, 384]
            yscale, xscale = in_h / float(im.shape[0]), in_w / float(im.shape[1])
            im = cv2.resize(im, (in_w, in_h), interpolation=self.resize_inter)  # 3、再再对feature map缩放到[384, 384]

        anns = []

        for a in anno:
            if "x" not in a or "y" not in a or "width" not in a or "height" not in a:
                continue

            bbox = [a["x"], a["y"], a["x"] + a["width"] - 1, a["y"] + a["height"] - 1]
            # anno相应地乘以缩放比
            bbox[0] = max(0, bbox[0] * xscale)
            bbox[1] = max(0, bbox[1] * yscale)
            bbox[2] = min(in_w - 1, bbox[2] * xscale)
            bbox[3] = min(in_h - 1, bbox[3] * yscale)

            # convert to coco format (x,y,width,height)
            bbox = [bbox[0], bbox[1], (bbox[2] - bbox[0] + 1), (bbox[3] - bbox[1] + 1)]
            # [{"category_id":..., "bbox":...}, {...}, ...]
            anns.append({"category_id": self.class_name.index(a["class"]), "bbox": bbox})

        if 0:
            for a in anns:
                bbox = [int(a) for a in a["bbox"]]
                cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2] + bbox[0] - 1, bbox[3] + bbox[1] - 1),
                              self.color[a["category_id"]], 2)
            cv2.imshow("a", im)
            cv2.waitKey(0)
        return im, anns

    def __len__(self):
        return self.num_samples


if __name__ == '__main__':
    dataset = LuggageMulscaleAll(None, "train")
    idx = [i for i in range(len(dataset.data))]

    for index in idx:
        print(index)
        im, anns = dataset.load_im_anno(index)
        img = Image.fromarray(im.astype(np.uint8))
        for j in range(len(anns)):
            thickness = 3  # 表示绘框的粗细
            left, top, right, bottom = anns[j]["bbox"][0:4]
            draw = ImageDraw.Draw(img)
            for i in range(thickness):
                draw.rectangle([left, top, left + right, top + bottom], outline=(135, 38, 87))
        img.show()