滑动窗口切割图片并重定位标注框_对滑窗切割后进行检测的结果如何还原-CSDN博客

本文链接：https://blog.csdn.net/liuweiyuxiang/article/details/86379723

在进行目标检测时候，尤其是小目标检测，将图片放大后再进行检测是一个常用的手段，但是方法的后的图像在输入网络的时候，有会被resize带一定得尺寸，这样的我们的放大就没有起到作用。所以有时候我们需要将图片放大后，使用滑动窗口切割产生多幅图像，然后再输入到网络中进行学习。在放大图像和切割图像的过程中，我们需要同时调整标注框。
下面分享一下我写的放大和滑动窗口切割图像的python代码：

#coding:utf-8
import cv2
import os
import codecs

def zomImg(impath, boxes, scale):
    #print(impath)
    img = cv2.imread(impath) #G:/head_dataset/head_train/images/UCSD/train_data/vidd1_33_018_f009.jpg
    height,width,c = img.shape

    resizeImg = cv2.resize(img,(int(width*scale),int(height*scale)),interpolation=cv2.INTER_LINEAR)
    for i in range(len(boxes)):
        nw = boxes[i][2] * scale
        nh = boxes[i][3] * scale
        centerx = (boxes[i][0] + boxes[i][2] / 2)
        centery = (boxes[i][1] + boxes[i][3] / 2)
        boxes[i][0] = (centerx * scale - nw / 2) if (centerx * scale - nw / 2) > 0 else 0
        boxes[i][1] = (centery * scale - nh / 2) if (centery * scale - nh / 2) > 0 else 0
        boxes[i][2] = nw
        boxes[i][3] = nh

    return resizeImg, boxes

#overlap_half=True滑动窗口切图，每次有一半区域重叠，这时候x方向的步长就是窗口宽度的一半，y方向的步长是窗口高度的一半，stridex和stridey参数将不再起作用
def slide_crop(img,kernelw,kernelh,overlap_half=True,stridex=0,stridey=0):
    height, width, _ = img.shape
    if overlap_half:
        stridex = kernelw / 2
        stridey = kernelh / 2
    img_list = []
    corner_list = []
    stepx = int(width / stridex)
    stepy = int(height / stridey)
    for r in range(stepy-1):
        startx = 0
        starty = r * stridey
        for c in range(stepx-1):
            startx = c*stridex
            corner_list.append((startx,starty))
            img_list.append(img[starty:starty+kernelh,startx:startx+kernelw,:])
    return img_list,corner_list

def show_box(img,boxes):
    for box in boxes:
        cv2.rectangle(img, (box[0],box[1]), (box[0]+box[2],box[1]+box[3]), (0, 255, 0), 1)
    cv2.imshow('image',img)
    cv2.waitKey(0)

def crop_dataset(imgpath, scale, annotation, cropAnno, savePath,dataset,sub=''):
    savePath = os.path.join(savePath, dataset+sub+'_crop_'+str(scale))
    if os.path.exists(savePath) is False:
        os.mkdir(savePath)

    with codecs.open(annotation,'r',encoding='utf-8') as f:
        annotationList = f.readlines()

    count = 0
    with codecs.open(os.path.join(cropAnno,'head_train_'+dataset+'_crop_'+str(scale)+'_bbx_gt.txt'),'w',encoding='utf-8') as f:

        while count < len(annotationList):
            name = annotationList[count].strip('\n')
            print(name)
            count += 1
            num = int(annotationList[count])
            count += 1
            boxes = []
            for i in range(num):
                box = []
                for point in annotationList[count].split(' ')[0:-1]:
                    box.append(int(point))
                boxes.append(box)
                count += 1

            resizeImg, boxes = zomImg(imgpath +'/'+ name, boxes, scale)
            #show_box(resizeImg, boxes)
            height, width, _ = resizeImg.shape
            kernelw = width / scale
            kernelh = height / scale
            img_list, corner_list = slide_crop(resizeImg,kernelw,kernelh,overlap_half=True)
            boxes_list = [[] for i in range(len(corner_list))]
            for i, (x, y) in enumerate(corner_list):
                for box in boxes:
                    if(box[0] >= x and box[1] >= y and (box[0]+box[2]) <= (x + kernelw) and (box[1]+box[3]) <= (y + kernelh)): #被切开的标注框过滤掉
                        rescale_box = [] #注意python中引用
                        rescale_box.append(box[0] - x)
                        rescale_box.append(box[1] - y)
                        rescale_box.append(box[2])
                        rescale_box.append(box[3])
                        boxes_list[i].append(rescale_box)

            name_split = name.split('/')
            i = 0
            name_list = []
            for img in img_list:
                img_name = name.split('/')[1] + '/' + name_split[2].split('.')[0] + '_' + str(i) + '.' + name_split[2].split('.')[-1]
                #print(savePath + '/' +name.split('/')[1]+ '/' + name_split[2].split('.')[0]+'_'+str(i)+'.'+name_split[2].split('.')[-1])
                #cv2.imwrite(savePath + '/'+name.split('/')[1]+'/' +name_split[2].split('.')[0]+'_'+str(i)+'.'+name_split[2].split('.')[-1],img)
                print(savePath + '/' + img_name)
                cv2.imwrite(savePath + '/' + img_name, img)
                name_list.append(dataset+'_crop_'+str(scale)+'/'+img_name)
                i += 1
            for i,img_name in enumerate(name_list):
                boxes = boxes_list[i]
                if len(boxes) == 0: #没有目标的图片不写标注文件
                    continue
                f.write(img_name+'\n')
                f.write(str(len(boxes)) + '\n')
                for box in boxes:
                    f.write(str(box[0])+' '+str(box[1])+' '+str(box[2])+' '+str(box[3])+'\n')


if __name__ == '__main__':
    dataset = 'Part_A'
    scale = 2
    imgpath = 'G:/head_dataset/head_train/images'
    annotation = 'G:/head_dataset/head_split/head_train_' + dataset + '_bbx_gt.txt'
    cropAnno = 'G:/head_dataset/head_crop/split/'
    savePath = 'G:/head_dataset/head_crop/images/'
    crop_dataset(imgpath, scale, annotation, cropAnno, savePath, dataset, sub='')

原图的标注文件head_train_’ + dataset + '_bbx_gt.txt是这种形式的：

Part_B/val_data/IMG_335
23 #标注框的个数
0 540 96 96  # x,y,w,h
96 652 126 126 
85 265 78 78 
215 294 96 96 
289 266 109 109 
392 378 109 109 
859 702 93 93 
502 178 57 57 
301 153 60 60 
220 150 47 47 
200 12 26 26 
254 17 26 26 
278 12 26 26 
290 60 26 26 
315 73 26 26 
348 69 38 38 
431 62 45 45 
672 79 52 52 
294 0 26 26 
324 1 26 26 
652 129 52 52 
630 87 52 52 
402 7 38 38

可视化后是这样的：
在这里插入图片描述切割放大两倍后滑动切割，每次滑动重叠一半会产生9张图，产生的标注框是这样的：