yolov5训练模型-正方形改为长方形

不思进步

已于 2024-12-12 18:03:01 修改

阅读量1.3k

点赞数 2

文章标签： YOLO 深度学习人工智能

于 2024-07-29 10:39:14 首次发布

本文链接：https://blog.csdn.net/herobinbin/article/details/140763046

版权

由于yolov5 模型是正方形尺寸，即使输入的图片是长方形，也会被resize成正方形，实际使用起来不够灵活，这里对源码进行修改，使得输出的模型是长方形尺寸

1、train.py中做如下修改：

parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=768, help='train, val image size (pixels)')
#改为：
parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[384, 768], help='train, val image size (pixels),(height,width)')
#这里的384为输入图像的高，768为输入图像的宽
#-------------------------------------------


#-------------------------------------------
parser.add_argument('--rect', action='store_true', help='rectangular training')
#改为：
parser.add_argument('--rect', action='store_true',default=True, help='rectangular training')
#--------------------------------------------


#--------------------------------------------
imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2)  # verify imgsz is gs-multiple
#改为：
if isinstance(opt.imgsz, int):
    imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2)
else:
    imgsz = [check_img_size(x, gs, floor=gs * 2) for x in opt.imgsz]
#--------------------------------------------


#--------------------------------------------
hyp['obj'] *= (imgsz / 640) ** 2 * 3. / nl  # scale to image size and layers
#改为：
if isinstance(imgsz_train,int):
    hyp['obj'] *= (imgsz / 640) ** 2 * 3. / nl  # scale to image size and layers
else:
    hyp['obj'] *= (max(imgsz) / 640) ** 2 * 3. / nl  # scale to image size and layers
#--------------------------------------------


#--------------------------------------------
sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs  # size
#改为：
if isinstance(imgsz,int):
    sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs  # size
else:
    sz = random.randrange(int(max(imgsz) * 0.5), int(max(imgsz) * 1.5) + gs) // gs * gs  # size
#--------------------------------------------

2、val.py中做如下修改

parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)')
#改为:
parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[384, 768], help='inference size (pixels)(height,width)')
#--------------------------------------------


#--------------------------------------------
imgsz = check_img_size(imgsz, s=gs)  # check image size
#改为:
if isinstance(imgsz,int):
    imgsz = check_img_size(imgsz, s=gs)  # check image size
else:
    imgsz = [check_img_size(x, s=gs) for x in imgsz]
#--------------------------------------------


#--------------------------------------------
model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters())))  # run once
#改为:
if isinstance(imgsz,int):
    model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters())))  # run once
else:
    model(torch.zeros(1, 3, imgsz[0], imgsz[1]).to(device).type_as(next(model.parameters())))  # run once
#--------------------------------------------


#--------------------------------------------
if not training:
    shape = (batch_size, 3,  imgsz, imgsz)
#改为:
if not training:
    if isinstance(imgsz,int):
        shape = (batch_size, 3, imgsz, imgsz)
    else:
        shape = (batch_size, 3, imgsz[0], imgsz[1])
#--------------------------------------------

3、detect.py中做如下修改：

parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)')
#改为:
parser.add_argument('--imgsz', '--img', '--img-size',  nargs='+', type=int, default=[384, 768], help='inference size (pixels)(height,widht')
#--------------------------------------------


#--------------------------------------------
imgsz = check_img_size(imgsz, s=stride)  # check image size
#改为:
if isinstance(imgsz,int):
    imgsz = check_img_size(imgsz, s=stride)  # check image size
else:
    imgsz = [check_img_size(x, s=stride) for x in imgsz]
#--------------------------------------------


#--------------------------------------------
# Run inference
if pt and device.type != 'cpu':
    model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters())))  # run once
#改为:
# Run inference
if pt and device.type != 'cpu':
    if isinstance(imgsz,int): 
        model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters())))  # run once
    else:
        model(torch.zeros(1, 3, imgsz[0], imgsz[1]).to(device).type_as(next(model.parameters())))  # run once
#--------------------------------------------

4、datasets.py中做如下修改：

#----------------LoadImagesAndLabels----------------------------
self.mosaic = self.augment and not self.rect  # load 4 images at a time into a mosaic (only during training)
self.mosaic_border = [-img_size // 2, -img_size // 2]
#改为:
self.mosaic = self.augment
if isinstance(img_size, int):
    self.mosaic_border = [-img_size // 2, -img_size // 2]
else:
    self.mosaic_border = [-img_size[0] // 2, -img_size[1] // 2] #height,width
#----------------LoadImagesAndLabels----------------------------


#----------------load_image-中---------------------------
r = self.img_size / max(h0, w0)  # ratio
if r != 1:  # if sizes are not equal
    img = cv2.resize(img, (int(w0 * r), int(h0 * r)),interpolation=cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR)
#改为:
if isinstance(self.img_size, int): 
    r = self.img_size / max(h0, w0)  # ratio
    if r != 1:  # if sizes are not equal
        img = cv2.resize(img, (int(w0 * r), int(h0 * r)),interpolation=cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR)
    else:
    img = cv2.resize(img, (self.img_size[1], self.img_size[0]), interpolation=cv2.INTER_AREA)#(width,height)
#----------------load_image-中---------------------------



#----------------load_mosaic-中---------------------------
yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border]  # mosaic center x, y
#改为:
if isinstance(self.img_size,int): 
    yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border]  # mosaic center x, y
else:
    s_h, s_w = s[0], s[1]
    yc, xc = [int(random.uniform(-x, 2 * s + x)) for x, s in zip(self.mosaic_border, self.img_size)]  # mosaic center x, y



#--------------------------------------------
# place img in img4
if i == 0:  # top left
    img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
    x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
    x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
elif i == 1:  # top right
    x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
    x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
elif i == 2:  # bottom left
    x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
    x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
elif i == 3:  # bottom right
    x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
    x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
#改为：
if isinstance(self.img_size, int):
    # place img in img4
    if i == 0:  # top left
        img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
        x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
        x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
    elif i == 1:  # top right
        x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
        x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
    elif i == 2:  # bottom left
        x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
        x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
    elif i == 3:  # bottom right
        x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
        x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
else:
    # place img in img4
    if i == 0:  # top left
        img4 = np.full((s_h * 2, s_w * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
        x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
        x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
    elif i == 1:  # top right
        x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s_w * 2), yc
        x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
    elif i == 2:  # bottom left
        x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s_h * 2, yc + h)
        x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
    elif i == 3:  # bottom right
        x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s_w * 2), min(s_h * 2, yc + h)
        x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
#--------------------------------------------



#--------------------------------------------
for x in (labels4[:, 1:], *segments4):
    np.clip(x, 0, 2 * s, out=x)  # clip when using random_perspective()
#改为：
for x in (labels4[:, 1:], *segments4): 
    if isinstance(s, int):
    np.clip(x, 0, 2 * s, out=x)  # clip when using random_perspective()
    else:
    np.clip(x, 0, 2 * max(s), out=x)  # clip when using random_perspective()
#--------------------------------------------
#----------------load_mosaic-中---------------------------

5.export.py中做如下修改：

parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image (height, width)')
#改为:
parser.add_argument('--img-size', nargs='+', type=int, default=[384, 768], help='image (height, width)')