本代码是pytorch版本的ssd实现,来源amdegroot/ssd.pytorch
一、PhotometricDistort
class PhotometricDistort(object):
def __init__(self):
#定义6个操作
self.pd = [
RandomContrast(),
ConvertColor(transform='HSV'),
RandomSaturation(),
RandomHue(),
ConvertColor(current='HSV', transform='BGR'),
RandomContrast()
]
self.rand_brightness = RandomBrightness()
self.rand_light_noise = RandomLightingNoise()
def __call__(self, image, boxes, labels):
im = image.copy()
im, boxes, labels = self.rand_brightness(im, boxes, labels)
if random.randint(2):
distort = Compose(self.pd[:-1]) #最先做RandomContrast
else:
distort = Compose(self.pd[1:]) #最后做RandomContrast
im, boxes, labels = distort(im, boxes, labels)
return self.rand_light_noise(im, boxes, labels)
RandomBrightness(随机改变亮度):
在原有图片像素上加一个实数(实数的范围在[-32,32])
其中:random.randint(2):在0和1之间随机产生一个数,random.uniform(x, y) :将随机生成一个实数,它在 [x,y] 范围
class RandomBrightness(object):
def __init__(self, delta=32):
#默认delta=32,delta的范围要在0-255之间
assert delta >= 0.0
assert delta <= 255.0
self.delta = delta
def __call__(self, image, boxes=None, labels=None):
if random.randint(2):
delta = random.uniform(-self.delta, self.delta)
image += delta
return image, boxes, labels
RandomContrast(随机改变对比度):
在原图像素上乘一个系数(系数的范围在[0.5,1.5])
class RandomContrast(object):
def __init__(self, lower=0.5, upper=1.5):
self.lower = lower
self.upper = upper
assert self.upper >= self.lower, "contrast upper must be >= lower."
assert self.lower >= 0, "contrast lower must be non-negative."
# expects float image
def __call__(self, image, boxes=None, labels=None):
if random.randint(2):
alpha = random.uniform(self.lower, self.upper)
image *= alpha
return image, boxes, labels
ConvertColor(变换颜色空间):
变换颜色空间,若当前为BGR则变换到HSV,若当前为HSV变换到BGR
其中,cv2.cvtColor函数功能是变换空间
class ConvertColor(object):
def __init__(self, current='BGR', transform='HSV'):
self.transform = transform #要变换到HSV
self.current = current #当前默认BGR
def __call__(self, image, boxes=None, labels=None):
if self.current == 'BGR' and self.transform == 'HSV':
image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
elif self.current == 'HSV' and self.transform == 'BGR':
image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
else:
raise NotImplementedError
return image, boxes, labels
RandomSaturation(随机改变饱和度):
在HSV空间的S维度上乘一个系数(系数在范围[0.5,1.5]中随机得到一个实数)
class RandomSaturation(object):
def __init__(self, lower=0.5, upper=1.5):
self.lower = lower
self.upper = upper
assert self.upper >= self.lower, "contrast upper must be >= lower."
assert self.lower >= 0, "contrast lower must be non-negative."
def __call__(self, image, boxes=None, labels=None):
if random.randint(2):
image[:, :, 1] *= random.uniform(self.lower, self.upper)
return image, boxes, labels
RandomHue(随机改变色调):
在HSV空间的H维度随机加一个实数(实数的范围[-18.0,18.0])
class RandomHue(object):
def __init__(self, delta=18.0):
assert delta >= 0.0 and delta <= 360.0
self.delta = delta
def __call__(self, image, boxes=None, labels=None):
if random.randint(2):
image[:, :, 0] += random.uniform(-self.delta, self.delta)
image[:, :, 0][image[:, :, 0] > 360.0] -= 360.0 #大于360的值减360
image[:, :, 0][image[:, :, 0] < 0.0] += 360.0 #小于0的值加上360
return image, boxes, labels
RandomLightingNoise(随机变换通道):
设置了6中变换方式,随机选择一种,将BGR三个通道顺序改变
class RandomLightingNoise(object):
def __init__(self):
self.perms = ((0, 1, 2), (0, 2, 1),
(1, 0, 2), (1, 2, 0),
(2, 0, 1), (2, 1, 0))
def __call__(self, image, boxes=None, labels=None):
if random.randint(2):
swap = self.perms[random.randint(len(self.perms))]
shuffle = SwapChannels(swap) # shuffle channels
image = shuffle(image)
return image, boxes, labels
二、Expand(随机扩张图片)
将原有图片的高和宽乘以一个ratio系数,将原有图片放在扩张后图片的右下角,其他位置像素值使用均值填充,相应的bbox也进行移动
class Expand(object):
def __init__(self, mean):
self.mean = mean
def __call__(self, image, boxes, labels):
if random.randint(2): #随机是否进行操作
return image, boxes, labels
height, width, depth = image.shape
ratio = random.uniform(1, 4) #在[1,4]随机一个实数
left = random.uniform(0, width*ratio - width) #设置放置原图的min_x坐标
top = random.uniform(0, height*ratio - height) #设置放置原图的min_y坐标
expand_image = np.zeros(
(int(height*ratio), int(width*ratio), depth),
dtype=image.dtype) #初始化expand图片
expand_image[:, :, :] = self.mean #使用均值填充expand的三个通道
expand_image[int(top):int(top + height),
int(left):int(left + width)] = image #将原图放在expand图像中
image = expand_image
boxes = boxes.copy() #处理变换后的框
boxes[:, :2] += (int(left), int(top))
boxes[:, 2:] += (int(left), int(top))
return image, boxes, labels
三、RandomSampleCrop(随机剪裁)
在图像上随机剪裁矩形区域,裁剪区域一定要包含bbox的中心点,将原始图bbox转换到剪裁区域的bbox
class RandomSampleCrop(object):
def __init__(self):
self.sample_options = (
# using entire original input image
None,
# sample a patch s.t. MIN jaccard w/ obj in .1,.3,.4,.7,.9
(0.1, None),
(0.3, None),
(0.7, None),
(0.9, None),
# randomly sample a patch
(None, None),
)
def __call__(self, image, boxes=None, labels=None):
height, width, _ = image.shape
while True:
# randomly choose a mode
mode = random.choice(self.sample_options)
if mode is None:
return image, boxes, labels
min_iou, max_iou = mode
if min_iou is None:
min_iou = float('-inf')
if max_iou is None:
max_iou = float('inf')
# max trails (50)
for _ in range(50):
current_image = image
w = random.uniform(0.3 * width, width) #裁剪的w范围[0.3*width, width]
h = random.uniform(0.3 * height, height)#裁剪的h范围[0.3*height, height]
# aspect ratio constraint b/t .5 & 2,如果长宽比不在[0.5,2]之间就重新尝试
if h / w < 0.5 or h / w > 2:
continue
left = random.uniform(width - w) #裁剪图像的min_x
top = random.uniform(height - h) #裁剪图像的max_x
# 得到裁剪图像的[min_x,min_y,max_x,max_y]
rect = np.array([int(left), int(top), int(left+w), int(top+h)])
# 将裁剪图像与gt的框计算IoU
overlap = jaccard_numpy(boxes, rect)
# is min and max overlap constraint satisfied? if not try again
if overlap.min() < min_iou and max_iou < overlap.max():
continue
# 从原图中剪裁新图像
current_image = current_image[rect[1]:rect[3], rect[0]:rect[2], :]
# 计算gt的bbox框的中心
centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0
# 检查剪裁图像的min_x, min_y要分别小于bbox的中心x, y
m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1])
# 检查剪裁图像的max_x, max_y要分别大于bbox的中心x, y
m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1])
# 上述两条要求都要为True
mask = m1 * m2
# 如果由不满足True的情况,就重新尝试
if not mask.any():
continue
# 初始化当前bbox
current_boxes = boxes[mask, :].copy()
# 获得当前各框标签
current_labels = labels[mask]
# 取当前各框的min_x和min_y
current_boxes[:, :2] = np.maximum(current_boxes[:, :2], rect[:2])
# 调整bbox中min_x, min_y位置
current_boxes[:, :2] -= rect[:2]
# 取当前各框的max_x和max_y
current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:], rect[2:])
# 调整bbox中max_x, max_y位置
current_boxes[:, 2:] -= rect[:2]
return current_image, current_boxes, current_labels