由于yolov5 模型是正方形尺寸,即使输入的图片是长方形,也会被resize成正方形,实际使用起来不够灵活,这里对源码进行修改,使得输出的模型是长方形尺寸
1、train.py中做如下修改:
parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=768, help='train, val image size (pixels)')
#改为:
parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[384, 768], help='train, val image size (pixels),(height,width)')
#这里的384为输入图像的高,768为输入图像的宽
#-------------------------------------------
#-------------------------------------------
parser.add_argument('--rect', action='store_true', help='rectangular training')
#改为:
parser.add_argument('--rect', action='store_true',default=True, help='rectangular training')
#--------------------------------------------
#--------------------------------------------
imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2) # verify imgsz is gs-multiple
#改为:
if isinstance(opt.imgsz, int):
imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2)
else:
imgsz = [check_img_size(x, gs, floor=gs * 2) for x in opt.imgsz]
#--------------------------------------------
#--------------------------------------------
hyp['obj'] *= (imgsz / 640) ** 2 * 3. / nl # scale to image size and layers
#改为:
if isinstance(imgsz_train,int):
hyp['obj'] *= (imgsz / 640) ** 2 * 3. / nl # scale to image size and layers
else:
hyp['obj'] *= (max(imgsz) / 640) ** 2 * 3. / nl # scale to image size and layers
#--------------------------------------------
#--------------------------------------------
sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size
#改为:
if isinstance(imgsz,int):
sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size
else:
sz = random.randrange(int(max(imgsz) * 0.5), int(max(imgsz) * 1.5) + gs) // gs * gs # size
#--------------------------------------------
2、val.py中做如下修改
parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)')
#改为:
parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[384, 768], help='inference size (pixels)(height,width)')
#--------------------------------------------
#--------------------------------------------
imgsz = check_img_size(imgsz, s=gs) # check image size
#改为:
if isinstance(imgsz,int):
imgsz = check_img_size(imgsz, s=gs) # check image size
else:
imgsz = [check_img_size(x, s=gs) for x in imgsz]
#--------------------------------------------
#--------------------------------------------
model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once
#改为:
if isinstance(imgsz,int):
model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once
else:
model(torch.zeros(1, 3, imgsz[0], imgsz[1]).to(device).type_as(next(model.parameters()))) # run once
#--------------------------------------------
#--------------------------------------------
if not training:
shape = (batch_size, 3, imgsz, imgsz)
#改为:
if not training:
if isinstance(imgsz,int):
shape = (batch_size, 3, imgsz, imgsz)
else:
shape = (batch_size, 3, imgsz[0], imgsz[1])
#--------------------------------------------
3、detect.py中做如下修改:
parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)')
#改为:
parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[384, 768], help='inference size (pixels)(height,widht')
#--------------------------------------------
#--------------------------------------------
imgsz = check_img_size(imgsz, s=stride) # check image size
#改为:
if isinstance(imgsz,int):
imgsz = check_img_size(imgsz, s=stride) # check image size
else:
imgsz = [check_img_size(x, s=stride) for x in imgsz]
#--------------------------------------------
#--------------------------------------------
# Run inference
if pt and device.type != 'cpu':
model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once
#改为:
# Run inference
if pt and device.type != 'cpu':
if isinstance(imgsz,int):
model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once
else:
model(torch.zeros(1, 3, imgsz[0], imgsz[1]).to(device).type_as(next(model.parameters()))) # run once
#--------------------------------------------
4、datasets.py中做如下修改:
#----------------LoadImagesAndLabels----------------------------
self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training)
self.mosaic_border = [-img_size // 2, -img_size // 2]
#改为:
self.mosaic = self.augment
if isinstance(img_size, int):
self.mosaic_border = [-img_size // 2, -img_size // 2]
else:
self.mosaic_border = [-img_size[0] // 2, -img_size[1] // 2] #height,width
#----------------LoadImagesAndLabels----------------------------
#----------------load_image-中---------------------------
r = self.img_size / max(h0, w0) # ratio
if r != 1: # if sizes are not equal
img = cv2.resize(img, (int(w0 * r), int(h0 * r)),interpolation=cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR)
#改为:
if isinstance(self.img_size, int):
r = self.img_size / max(h0, w0) # ratio
if r != 1: # if sizes are not equal
img = cv2.resize(img, (int(w0 * r), int(h0 * r)),interpolation=cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR)
else:
img = cv2.resize(img, (self.img_size[1], self.img_size[0]), interpolation=cv2.INTER_AREA)#(width,height)
#----------------load_image-中---------------------------
#----------------load_mosaic-中---------------------------
yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border] # mosaic center x, y
#改为:
if isinstance(self.img_size,int):
yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border] # mosaic center x, y
else:
s_h, s_w = s[0], s[1]
yc, xc = [int(random.uniform(-x, 2 * s + x)) for x, s in zip(self.mosaic_border, self.img_size)] # mosaic center x, y
#--------------------------------------------
# place img in img4
if i == 0: # top left
img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)
elif i == 1: # top right
x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
elif i == 2: # bottom left
x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
elif i == 3: # bottom right
x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
#改为:
if isinstance(self.img_size, int):
# place img in img4
if i == 0: # top left
img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)
elif i == 1: # top right
x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
elif i == 2: # bottom left
x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
elif i == 3: # bottom right
x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
else:
# place img in img4
if i == 0: # top left
img4 = np.full((s_h * 2, s_w * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)
elif i == 1: # top right
x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s_w * 2), yc
x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
elif i == 2: # bottom left
x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s_h * 2, yc + h)
x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
elif i == 3: # bottom right
x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s_w * 2), min(s_h * 2, yc + h)
x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
#--------------------------------------------
#--------------------------------------------
for x in (labels4[:, 1:], *segments4):
np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective()
#改为:
for x in (labels4[:, 1:], *segments4):
if isinstance(s, int):
np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective()
else:
np.clip(x, 0, 2 * max(s), out=x) # clip when using random_perspective()
#--------------------------------------------
#----------------load_mosaic-中---------------------------
5.export.py中做如下修改:
parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image (height, width)')
#改为:
parser.add_argument('--img-size', nargs='+', type=int, default=[384, 768], help='image (height, width)')