python对图像中物体的bounding box区域做裁剪(使用PIL头文件)
bounding box表达形式:
在图像处理和计算机视觉中,bounding box 通常有两种表示形式:
xyxy 形式
即 [𝑥min,𝑦min,𝑥max,𝑦max]
,即表示物体的左上角
和右下角
的坐标。
xywh 形式
即 [𝑥,𝑦,𝑤,ℎ]
表示左上角的坐标(𝑥,𝑦)
以及宽度 𝑤
和高度 ℎ
。
在不同的表示形式下,代码处理会有所不同。
给图像做裁剪的几种方法(xyxy形式)
当你知道图像中物体的bounding box之后,给图像做裁剪的方法有很多种,具体方法依赖于你的需求和具体的场景,以下是几种常见的裁剪方
法:
直接裁剪单个物体
:根据单个物体的bounding box裁剪图像。裁剪包含两个物体的区域
:根据两个物体的bounding box的最小外接矩形裁剪图像。扩大边界裁剪
:在bounding box的基础上扩大一定的边界,然后裁剪图像。- 随机裁剪增强:在bounding box附近随机裁剪,作为数据增强的方法。
- 固定比例裁剪:按照固定的宽高比例裁剪bounding box区域。
1. 直接裁剪单个物体
from PIL import Image
def crop_single_object(image_path, bbox):
with Image.open(image_path) as img:
cropped_img = img.crop(bbox)
cropped_img.show() # 展示裁剪后的图像
# cropped_img.save("path_to_save_cropped_image.jpg") # 保存裁剪后的图像
image_file = "path_to_image.jpg"
bbox = [50, 50, 200, 200] # x_min, y_min, x_max, y_max
crop_single_object(image_file, bbox)
2. 裁剪包含两个物体的区域
from PIL import Image
def crop_two_objects(image_path, bbox1, bbox2):
x_min = min(bbox1[0], bbox2[0])
y_min = min(bbox1[1], bbox2[1])
x_max = max(bbox1[2], bbox2[2])
y_max = max(bbox1[3], bbox2[3])
with Image.open(image_path) as img:
cropped_img = img.crop((x_min, y_min, x_max, y_max))
cropped_img.show() # 展示裁剪后的图像
# cropped_img.save("path_to_save_cropped_image.jpg") # 保存裁剪后的图像
image_file = "path_to_image.jpg"
bbox1 = [50, 50, 150, 150]
bbox2 = [100, 100, 200, 200]
crop_two_objects(image_file, bbox1, bbox2)
3. 扩大边界裁剪
from PIL import Image
def crop_with_margin(image_path, bbox, margin=10):
x_min = max(0, bbox[0] - margin)
y_min = max(0, bbox[1] - margin)
x_max = bbox[2] + margin
y_max = bbox[3] + margin
with Image.open(image_path) as img:
cropped_img = img.crop((x_min, y_min, x_max, y_max))
cropped_img.show() # 展示裁剪后的图像
# cropped_img.save("path_to_save_cropped_image.jpg") # 保存裁剪后的图像
image_file = "path_to_image.jpg"
bbox = [50, 50, 200, 200]
crop_with_margin(image_file, bbox)
4. 随机裁剪增强
from PIL import Image
import random
def random_crop(image_path, bbox, max_offset=20):
x_min = bbox[0] + random.randint(-max_offset, max_offset)
y_min = bbox[1] + random.randint(-max_offset, max_offset)
x_max = bbox[2] + random.randint(-max_offset, max_offset)
y_max = bbox[3] + random.randint(-max_offset, max_offset)
with Image.open(image_path) as img:
cropped_img = img.crop((x_min, y_min, x_max, y_max))
cropped_img.show() # 展示裁剪后的图像
# cropped_img.save("path_to_save_cropped_image.jpg") # 保存裁剪后的图像
image_file = "path_to_image.jpg"
bbox = [50, 50, 200, 200]
random_crop(image_file, bbox)
5. 固定比例裁剪
from PIL import Image
def crop_fixed_ratio(image_path, bbox, ratio=1.0):
width = bbox[2] - bbox[0]
height = bbox[3] - bbox[1]
center_x = bbox[0] + width // 2
center_y = bbox[1] + height // 2
if width > height:
new_height = width // ratio
new_bbox = [center_x - width // 2, center_y - new_height // 2, center_x + width // 2, center_y + new_height // 2]
else:
new_width = height * ratio
new_bbox = [center_x - new_width // 2, center_y - height // 2, center_x + new_width // 2, center_y + height // 2]
with Image.open(image_path) as img:
cropped_img = img.crop(new_bbox)
cropped_img.show() # 展示裁剪后的图像
# cropped_img.save("path_to_save_cropped_image.jpg") # 保存裁剪后的图像
image_file = "path_to_image.jpg"
bbox = [50, 50, 200, 200]
crop_fixed_ratio(image_file, bbox, ratio=1.5)
xywh的案例
假设 bounding box 是 [𝑥,𝑦,𝑤,ℎ]形式的,需要将其转换为 [𝑥min,𝑦min,𝑥max,𝑦max]形式后再进行裁剪:
这里只提供几个案例助大家理解:
单物体示例:包含单个物体的裁剪:
from PIL import Image
def crop_xywh(image_path, bbox):
x, y, w, h = bbox
x_min = x
y_min = y
x_max = x + w
y_max = y + h
with Image.open(image_path) as img:
cropped_img = img.crop((x_min, y_min, x_max, y_max))
cropped_img.show() # 展示裁剪后的图像
# cropped_img.save("path_to_save_cropped_image.jpg") # 保存裁剪后的图像
image_file = "path_to_image.jpg"
bbox = [50, 50, 150, 150] # x, y, w, h
crop_xywh(image_file, bbox)
综合示例:处理包含两个物体的区域裁剪
from PIL import Image
def crop_two_objects_xywh(image_path, bbox1, bbox2):
x1, y1, w1, h1 = bbox1
x2, y2, w2, h2 = bbox2
x_min = min(x1, x2)
y_min = min(y1, y2)
x_max = max(x1 + w1, x2 + w2)
y_max = max(y1 + h1, y2 + h2)
with Image.open(image_path) as img:
cropped_img = img.crop((x_min, y_min, x_max, y_max))
cropped_img.show() # 展示裁剪后的图像
# cropped_img.save("path_to_save_cropped_image.jpg") # 保存裁剪后的图像
image_file = "path_to_image.jpg"
bbox1 = [50, 50, 100, 100] # x, y, w, h
bbox2 = [100, 100, 100, 100] # x, y, w, h
crop_two_objects_xywh(image_file, bbox1, bbox2)