大家好~
本人主要做目标检测、生成对抗网络和聚类相关方向的工作,平时写一丢丢平时工作遇到的问题,欢迎关注!
带重叠的分割示意图:
本篇主要讲目标检测中为了增加对小目标检测的性能,考虑在检测端即detector上对原图像进行分割 检测。这样做的主要原因是在原图尺寸较大时,输入检测器时都会进行缩小,这样小目标的像素代表数就会严重缩水,导致检测器检测性能下降。
对图像进行拆分、分开检测是为了减少缩小图像时小目标像素的损失。带有重叠的拆分是考虑了硬拆分可能将目标在边界上分割开导致目标无法识别的特殊情况。
分拆与合并代码如下:
split_height = 1080*3//5
split_width = 1920*3//5
def split_img(img):
img_h, img_w, _ = img.shape
def start_points(size, split_size, overlap=0):
points = [0]
stride = int(split_size * (1-overlap))
counter = 1
while True:
pt = stride * counter
if pt + split_size >= size:
points.append(size - split_size)
break
else:
points.append(pt)
counter += 1
return points
X_points = start_points(img_w, split_width, 0.1)
Y_points = start_points(img_h, split_height, 0.1)
splitted_images = []
for i in Y_points:
for j in X_points:
split = img[i:i+split_height, j:j+split_width]
splitted_images.append(split)
return splitted_images,X_points,Y_points
def merge_subimg(img,X_points,Y_points,splitted_images):
#merge:img 指原图1080*1920
final_image = np.zeros_like(img)
index = 0
for i in Y_points:
for j in X_points:
final_image[i:i+split_height, j:j+split_width] = splitted_images[index]
index += 1
return final_image
在检测器中:
for path, img, im0s, vid_cap in dataset:
splitted_images,X_points,Y_points = split_img(im0s) ###########*********++++++++++ im0s是原图,再把splitted中的图视为“原图”,
pred_splitted_images = [] # save preds of 4 parts of im0s.
print("length of splitted_images: ",len(splitted_images))
for img0s in splitted_images:
print("img0s shape is: ",img0s.shape)
############***************++++++++ 整体缩进
img = letterbox(img0s)[0]
# Convert
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
img = np.ascontiguousarray(img)
print("img shape is: ",img.shape)
# return
##############***++++++
img = torch.from_numpy(img).to(device)
img = img.half() if half else img.float() # uint8 to fp16/32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
if img.ndimension() == 3:
img = img.unsqueeze(0) # 在0轴增加一个维度
# Inference
t1 = time_synchronized()
###########*****++++++++++++++++ split img,img0 ----> imgs,img0s
pred = model(img, augment=opt.augment)[0] # preds for 4 parts of an image.e.g 4 preds.
# Apply NMS
pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
t2 = time_synchronized()
# Apply Classifier
if classify:
pred = apply_classifier(pred, modelc, img, im0s)
# Process detections
for i, det in enumerate(pred): # detections per image
if webcam: # batch_size >= 1
p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(), dataset.count
else:
p, s, im0, frame = path, '', img0s, getattr(dataset, 'frame', 0)
p = Path(p) # to Path
save_path = str(save_dir / p.name) # img.jpg #############******+++++++++ merge 之后再用这个path 存储
txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt
s += '%gx%g ' % img.shape[2:] # print string
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
if len(det):
# Rescale boxes from img_size to im0 size
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
# Print results: number of each detected class
for c in det[:, -1].unique():
n = (det[:, -1] == c).sum() # detections per class
s += f'{n} {names[int(c)]}s, ' # add to string
# Write results
for *xyxy, conf, cls in reversed(det):
if save_txt: # Write to file
xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh) # label format
with open(txt_path + '.txt', 'a') as f:
f.write(('%g ' * len(line)).rstrip() % line + '\n')
if save_img or view_img: # Add bbox to image
label = f'{names[int(cls)]} {conf:.2f}'
plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3)
pred_splitted_images.append(im0)
print("the length of pred_splitted_images : ",len(pred_splitted_images))
######*********++++++++++++整体缩进+++++++++
# Print time (inference + NMS)
print(f'{s}Done. ({t2 - t1:.3f}s)')
# Stream results
if view_img:
cv2.imshow(str(p), im0)
# merge_subimages() as "img0"
final_image = merge_subimg(im0s,X_points,Y_points,pred_splitted_images)
###########****************++++++++++ merge img0s ----->img0
# Save results (image with detections)
if save_img:
if dataset.mode == 'image':
cv2.imwrite(save_path, final_image) ########## save merged final_image.
else: # 'video'
if vid_path != save_path: # new video
vid_path = save_path
if isinstance(vid_writer, cv2.VideoWriter):
vid_writer.release() # release previous video writer
fourcc = 'mp4v' # output video codec
fps = vid_cap.get(cv2.CAP_PROP_FPS)
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
vid_writer.write(im0)
if save_txt or save_img:
s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
print(f"Results saved to {save_dir}{s}")
print(f'Done. ({time.time() - t0:.3f}s)')