from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import json
import os, cv2
import random
from PIL import Image
import uuid
import copy
import torch.utils.data as data
import sys
from PIL import Image, ImageDraw
sys.path.append("../../")
class LuggageMulscaleAll(data.Dataset):
# setting
num_classes = 2 # 1->2
class_name = ['__background__', 'pedestrain', 'luggage'] # 增加 'pedestrain'
data_dir = "/home/wangdongbing/myproject/dataset/LuggageDataset"
# data_dir = r"C:\Users\9ling\Desktop\luggage_centernet\lu"
min_gt_size = [0, 0] # [h,w]
rand_wh_padding = [2.6, 1.5] # [w,h] None to disable
mean = np.array([0.40789654, 0.44719302, 0.47026115], dtype=np.float32).reshape(1, 1, 3)
std = np.array([0.28863828, 0.27408164, 0.27809835], dtype=np.float32).reshape(1, 1, 3)
def __init__(self, opt, split):
super(LuggageMulscaleAll, self).__init__()
self.max_objs = 0
self.split = split
self.opt = opt
self.interpolation = "linear"
# load data
self.data = self.load_split_data()
self.num_samples = len(self.data)
print("max_objs", self.max_objs)
print("data_dir", self.data_dir)
print("rand_padding", self.rand_wh_padding)
print(self.class_name, self.num_classes)
if (self.interpolation == "nearest"):
self.resize_inter = cv2.INTER_NEAREST
elif (self.interpolation == "linear"):
self.resize_inter = cv2.INTER_LINEAR
print("resize interpolation: " + self.interpolation)
# data agument setting
self._data_rng = np.random.RandomState(123)
self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571], dtype=np.float32)
self._eig_vec = np.array([[-0.58752847, -0.69563484, 0.41340352],
[-0.5832747, 0.00994535, -0.81221408],
[-0.56089297, 0.71832671, 0.41158938]
], dtype=np.float32)
self.color = [(0, 0, 0), (255, 0, 0), (0, 255, 0), (0, 0, 255), ]
# use multiscale training
self.default_resolution = [384, 384] # [h,w], need to be multiple of 128
self.min_size = self.default_resolution[0] - 32 * 1
self.max_size = self.default_resolution[0] + 32 * 3
# random.choice:随机选取一个元素并返回
self.single_resolution = random.choice(range(self.min_size, self.max_size + 1, 32))
self.default_resolution = [self.single_resolution, self.single_resolution]
print("input size: ", self.default_resolution)
def rand(self, a=0, b=1):
return np.random.rand() * (b - a) + a
def load_split_data(self):
alldata = []
if self.split == "train":
subsets = os.listdir(os.path.join(self.data_dir, self.split))
count = [0 for c in self.class_name]
for s in subsets:
s_dir = os.path.join(self.data_dir, self.split, s)
data = json.load(open(os.path.join(s_dir, s + ".json")))["labels"]
print("Loading subset " + s + ", #images: " + str(len(data)))
filtered_data = []
for i in range(len(data)):
# 更改每个feature的路径
data[i]["filename"] = os.path.join(s_dir, data[i]["filename"])
# 把"annotations"分为:pedestrain的标注,luggage的标注
# ped_anno:[{ }, { }]
ped_anno = [a for a in data[i]["annotations"] if a["class"] == "pedestrain"] # 只包含行人的标注
# anno:[{ }, { }]
anno = [a for a in data[i]["annotations"] if a["class"] in self.class_name] # 包含行人、行李箱的标注
lugga_anno = [a for a in data[i]["annotations"] if a["class"] == "luggage"] # 只包含行李箱的标注
# todo 下面 3行是新加的
# lugg_anno = []
# for item in data[i]["annotations"]:
# if "subclass" in item:
# lugg_anno += item["subclass"]
# todo or -> and 因为需要训练纯背景图片,所以注释
# if len(lugg_anno) == 0 and len(ped_anno) == 0:
# continue
del data[i]["annotations"]
if len(ped_anno) > 0:
data[i]["pedestrain_annos"] = ped_anno
data[i]["annos"] = anno
data[i]["luggage_annos"] = lugga_anno
# [{ "luggage_annos": [{ }, { }, ...], "pedestrain_annos": { }, "filename":... }, ...]
filtered_data.append(data[i])
for a in anno:
count[self.class_name.index(a["class"])] += 1 # 统计训练数据集中有多少个行李箱
self.max_objs = max(len(anno), self.max_objs) # 统计所有图片中哪张图片的行李箱数最多
# [{ "luggage_annos": [{ }, { }, ...], "pedestrain_annos": { }, "filename":... }, ...]
alldata += filtered_data
sstr = "#images: {}".format(len(alldata)) # train中有多少个标注的图片
for i in range(1, len(self.class_name)):
sstr += " ({} {})".format(self.class_name[i], count[i])
print(sstr)
print("max_objs", self.max_objs)
elif self.split == "val" or self.split == "benchmark":
# we only use has_luggage data (this testing dataset doesn't matter, it will be only used during
# training - actually its just dummy) test_dir = os.path.join(self.data_dir, "test",
# "has_luggage")
test_dir = os.path.join(self.data_dir, "test", "has_luggage") #
subsets = os.listdir(test_dir)
alldata = []
for s in subsets:
json_file = os.path.join(test_dir, s, s + ".json")
data = json.load(open(json_file))["labels"]
for d in data:
d["filename"] = os.path.join(test_dir, s, d["filename"]) # 每张测试集图片的路径
d["pedestrain_annos"] = [a for a in d["annotations"] if a["class"] == "pedestrain"]
d["annos"] = [a for a in d["annotations"] if a["class"] in self.class_name]
d["luggage_annos"] = [a for a in d["annotations"] if a["class"] == "luggage"] # 只包含行李箱的标注
# todo 下面3行也是新加的
# d["luggage_annos"] = []
# for item1 in d["annotations"]:
# if "subclass" in item1:
# d["luggage_annos"] += item1["subclass"]
# if len(d["luggage_annos"]) == 0: # 因为需要训练纯背景图片,所以注释
# continue
del d["annotations"]
alldata.append(d)
print("#val images: " + str(len(alldata)))
# 所有的验证集的标注
return alldata
def add_padding(self, im, anns):
im_w, im_h = im.shape[1], im.shape[0]
# self.rand_wh_padding:[2.6, 1.5]
w_r = random.uniform(1.0, self.rand_wh_padding[0])
h_r = random.uniform(1.0, self.rand_wh_padding[1])
extra_w_half = int((im_w * w_r - im_w) * 0.5) # 0-0.8*in_w
extra_h_half = int((im_h * h_r - im_h) * 0.5) # 0-0.25*in_h
# cv2.copyMakeBorder: 在feature map设置边界框,分别在top、bottom、left、right添加像素
# cv2.BORDER_CONSTANT:添加边框的像素值为常数,需要指定另外的参数value
im = cv2.copyMakeBorder(im, 0, extra_h_half, extra_w_half, extra_w_half, cv2.BORDER_CONSTANT, None, value=0)
if len(anns) > 0:
for a in anns:
a["x"] += extra_w_half
return im, anns
def crop_im_annos(self, data):
pim = cv2.imdecode(np.fromfile(data["filename"]), 1)
if pim is None:
pim = cv2.imread(data["filename"], 1)
anno = copy.deepcopy(data["annos"])
height, width = pim.shape[0:2]
if len(data["luggage_annos"]) > 0: # 对有行李箱的图片进行crop
ped_anno = copy.deepcopy(data["pedestrain_annos"])[0]
luggage_anno = copy.deepcopy(data["luggage_annos"])[0]
ped_box = [ped_anno["x"], ped_anno["y"], ped_anno["x"] + ped_anno["width"],
ped_anno["y"] + ped_anno["height"]]
left_xcoord = max(0, ped_anno["x"] - 0.65 * ped_anno["width"])
right_xcoord = min(width, ped_anno["x"] + ped_anno["width"] + 0.65 * ped_anno["width"])
right_ycoord = min(height,
ped_anno["y"] + ped_anno["height"] + 0.05 * ped_anno["height"])
res_pim = pim[int(ped_anno["y"]):int(right_ycoord), int(left_xcoord):int(right_xcoord)]
for i, s_anno in enumerate(anno):
s_anno["x1"] = s_anno["x"]
s_anno["y1"] = s_anno["y"]
s_anno["x2"] = s_anno["x"] + s_anno["width"]
s_anno["y2"] = s_anno["y"] + s_anno["height"]
s_anno["x1"] = max(0, s_anno["x1"] - left_xcoord)
s_anno["y1"] = max(0, s_anno["y1"] - ped_anno["y"])
s_anno["x2"] = min(right_xcoord - left_xcoord, s_anno["x2"] - left_xcoord)
s_anno["y2"] = min(int(right_ycoord) - int(ped_anno["y"]), s_anno["y2"] - ped_anno["y"])
g_anno_w = data["annos"][i]["width"]
g_anno_h = data["annos"][i]["height"]
s_anno_w = s_anno["x2"] - s_anno["x1"]
s_anno_h = s_anno["y2"] - s_anno["y1"]
# 判断crop图片后的物体与crop图片前的物体IoU,如果IoU<0.9则做进一步的处理
if (s_anno_w * s_anno_h) / (g_anno_w * g_anno_h) > 0.9:
s_anno["x"] = s_anno["x1"]
s_anno["y"] = s_anno["y1"]
s_anno["width"] = s_anno["x2"] - s_anno["x1"]
s_anno["height"] = s_anno["y2"] - s_anno["y1"]
else:
anno = copy.deepcopy(data["annos"])
if luggage_anno["x"] >= ped_anno["x"]:
for s_pb in anno:
ped_box[0] = max(0, min(ped_box[0], s_pb["x"]))
ped_box[1] = max(0, min(ped_box[1], s_pb["y"]))
ped_box[2] = min(pim.shape[1] - 1, max(ped_box[2], s_pb["x"] + s_pb["width"]))
ped_box[3] = min(pim.shape[0] - 1, max(ped_box[3], s_pb["y"] + s_pb["height"]))
ped_box = [int(b) for b in ped_box]
res_pim = pim[ped_box[1]:ped_box[3], int(left_xcoord):ped_box[2]]
for sa in anno:
sa["x"] -= left_xcoord
sa["y"] -= ped_box[1]
break
else:
for s_pb in anno:
ped_box[0] = max(0, min(ped_box[0], s_pb["x"]))
ped_box[1] = max(0, min(ped_box[1], s_pb["y"]))
ped_box[2] = min(pim.shape[1] - 1, max(ped_box[2], s_pb["x"] + s_pb["width"]))
ped_box[3] = min(pim.shape[0] - 1, max(ped_box[3], s_pb["y"] + s_pb["height"]))
ped_box = [int(b) for b in ped_box]
res_pim = pim[ped_box[1]:ped_box[3], ped_box[0]:int(right_xcoord)]
for sa in anno:
sa["x"] -= ped_box[0]
sa["y"] -= ped_box[1]
break
else:
if len(data["pedestrain_annos"]) > 0:
ped_anno = copy.deepcopy(data["pedestrain_annos"])[0]
ped_box = [ped_anno["x"], ped_anno["y"], ped_anno["x"] + ped_anno["width"] - 1,
ped_anno["y"] + ped_anno["height"] - 1]
left_xcoord = max(0, ped_anno["x"] - 0.65 * ped_anno["width"])
right_xcoord = min(width, ped_anno["x"] + ped_anno["width"] + 0.65 * ped_anno["width"])
right_ycoord = min(height,
ped_anno["y"] + ped_anno["height"] + 0.05 * ped_anno["height"])
res_pim = pim[int(ped_anno["y"]):int(right_ycoord), int(left_xcoord):int(right_xcoord)]
for s_anno in anno:
s_anno["x1"] = s_anno["x"]
s_anno["y1"] = s_anno["y"]
s_anno["x2"] = s_anno["x"] + s_anno["width"]
s_anno["y2"] = s_anno["y"] + s_anno["height"]
s_anno["x1"] = max(0, s_anno["x1"] - left_xcoord)
s_anno["y1"] = max(0, s_anno["y1"] - ped_anno["y"])
s_anno["x2"] = min(right_xcoord - left_xcoord, s_anno["x2"] - left_xcoord)
s_anno["y2"] = max(0, s_anno["y2"] - ped_anno["y"])
s_anno["x"] = s_anno["x1"]
s_anno["y"] = s_anno["y1"]
s_anno["width"] = s_anno["x2"] - s_anno["x1"]
s_anno["height"] = s_anno["y2"] - s_anno["y1"]
return res_pim, anno
def load_im_anno(self, index):
jdata = self.data[index] # 取出训练集的每张图片的标注
im, anno = self.crop_im_annos(jdata) # 1、对feature map进行裁剪
if self.split == "train" and self.rand_wh_padding is not None: # self.rand_wh_padding:[2.6, 1.5]
im, anno = self.add_padding(im, anno) # 2、再对feature map加border
# only resize im and boxes when training
yscale, xscale = 1.0, 1.0
in_h, in_w = im.shape[0], im.shape[1]
if self.split in ["train", "val"]:
in_h, in_w = self.default_resolution # [384, 384]
yscale, xscale = in_h / float(im.shape[0]), in_w / float(im.shape[1])
im = cv2.resize(im, (in_w, in_h), interpolation=self.resize_inter) # 3、再再对feature map缩放到[384, 384]
anns = []
for a in anno:
if "x" not in a or "y" not in a or "width" not in a or "height" not in a:
continue
bbox = [a["x"], a["y"], a["x"] + a["width"] - 1, a["y"] + a["height"] - 1]
# anno相应地乘以缩放比
bbox[0] = max(0, bbox[0] * xscale)
bbox[1] = max(0, bbox[1] * yscale)
bbox[2] = min(in_w - 1, bbox[2] * xscale)
bbox[3] = min(in_h - 1, bbox[3] * yscale)
# convert to coco format (x,y,width,height)
bbox = [bbox[0], bbox[1], (bbox[2] - bbox[0] + 1), (bbox[3] - bbox[1] + 1)]
# [{"category_id":..., "bbox":...}, {...}, ...]
anns.append({"category_id": self.class_name.index(a["class"]), "bbox": bbox})
if 0:
for a in anns:
bbox = [int(a) for a in a["bbox"]]
cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2] + bbox[0] - 1, bbox[3] + bbox[1] - 1),
self.color[a["category_id"]], 2)
cv2.imshow("a", im)
cv2.waitKey(0)
return im, anns
def __len__(self):
return self.num_samples
if __name__ == '__main__':
dataset = LuggageMulscaleAll(None, "train")
idx = [i for i in range(len(dataset.data))]
for index in idx:
print(index)
im, anns = dataset.load_im_anno(index)
img = Image.fromarray(im.astype(np.uint8))
for j in range(len(anns)):
thickness = 3 # 表示绘框的粗细
left, top, right, bottom = anns[j]["bbox"][0:4]
draw = ImageDraw.Draw(img)
for i in range(thickness):
draw.rectangle([left, top, left + right, top + bottom], outline=(135, 38, 87))
img.show()
centernet 数据处理
于 2023-01-06 19:00:20 首次发布