一、letter_box,逆letter_box
深度学习模型输入图片的尺寸为正方形,而数据集中的图片一般为长方形,粗暴的resize会使得图片失真,采用letterbox可以较好的解决这个问题。该方法可以保持图片的长宽比例,剩下的部分采用灰色填充。对于语义分割任务,我们从模型推理出来的mask图尺寸和模型输入尺寸一致,要想获取原始的图像尺寸的预测结果,这时候就还需要逆letter_box操作。
二、代码
本例中,模型输入尺寸为604x640,而我们读取的图片的实际尺寸为128x384,通过letter_box操作,实现将原始图像以不失真的方式调整为640x640。再通过逆letter_box操作,使得尺寸再恢复为128x384。
from PIL import Image
import numpy as np
import cv2
# ------------------------------------------------------------------#
# letter_box操作
# ------------------------------------------------------------------#
def letter_box_pil(orgin_image, target_size):
orgin_image.show()
orgin_w, orgin_h = orgin_image.size
target_w, target_h = target_size
# ------------------------------------------------------------------#
# 以原图较长的边为准,将长边resize到target_size大小;原图较短的边, 进行等比缩放
# ------------------------------------------------------------------#
scale = min(target_w/orgin_w, target_h/orgin_h)
new_w = int(orgin_w*scale)
new_h = int(orgin_h*scale)
image = orgin_image.resize((new_w,new_h), Image.BICUBIC)
# ------------------------------------------------------------------#
# 构建输出图像
# ------------------------------------------------------------------#
target_image = Image.new('RGB', target_size, (128,128,128))
target_image.paste(image, ((target_w-new_w)//2, (target_h-new_h)//2))
target_image.show("pil_image_letter_box")
return target_image, new_w,new_h,orgin_w,orgin_h
# ------------------------------------------------------------------#
# 逆letter_box操作
# ------------------------------------------------------------------#
def letter_box_pil_inv(target_image,target_size,new_h,new_w):
# ------------------------------------------------------------------#
# PIL.Image -> array
# ------------------------------------------------------------------#
arr_image = np.array(target_image)
#--------------------------------------#
# 将灰条部分截取掉
#--------------------------------------#
arr_image = arr_image[int((target_size[0] - new_h) // 2) : int((target_size[0] - new_h) // 2 + new_h), \
int((target_size[1] - new_w) // 2) : int((target_size[1] - new_w) // 2 + new_w)]
# ------------------------------------------------------------------#
# 进行图片的resize
# ------------------------------------------------------------------#
arr_image = cv2.resize(arr_image, (orgin_w, orgin_h), interpolation = cv2.INTER_LINEAR)
# ------------------------------------------------------------------#
# array -> PIL.Image
# ------------------------------------------------------------------#
pil_image = Image.fromarray(arr_image)
pil_image.show("pil_image_letter_box_inv")
return pil_image
orgin_image = Image.open('demo.jpg')
target_size = [640, 640]
# ------------------------------------------------------------------#
# letter_box操作
# ------------------------------------------------------------------#
target_image, new_w, new_h,orgin_w,orgin_h = letter_box_pil(orgin_image, target_size)
# ------------------------------------------------------------------#
# 逆letter_box操作
# ------------------------------------------------------------------#
image = letter_box_pil_inv(target_image,target_size,new_h,new_w)
运行结果