SODA-D数据处理脚本，将原始数据集转换成为能够喂给Faster-RCNN 的简单模式

本文链接：https://blog.csdn.net/X131644/article/details/137481345

import os
import json
from PIL.Image import Image
from PIL import Image


def box_in_sub_img(coordinates, cor1, cor2):
    """
    Filter the rectangles that are within the specified sub-image region.

    Args:
    - coordinates (list): A list of rectangles, where each rectangle is represented as [x, y, width, height, category_id].
    - cor1 (tuple): The top-left coordinate (x1, y1) of the sub-image region.
    - cor2 (tuple): The bottom-right coordinate (x2, y2) of the sub-image region.

    Returns:
    - list: A list of rectangles that are within the sub-image region.
    """
    x1, y1 = cor1
    x2, y2 = cor2

    rectangles_in_sub_img = []
    for rectangle in coordinates:
        x, y, width, height, category_id = rectangle
        x2_rect = x + width
        y2_rect = y + height

        # Modify the condition to exclude the case when the rectangle's bottom-right corner is exactly equal to the sub-image's top-left corner
        if x > x1 and y > y1 and x2_rect < x2 and y2_rect < y2:
            # Calculate the relative coordinates of the rectangle with respect to the sub-image's top-left corner
            rel_x = x - x1
            rel_y = y - y1
            rectangles_in_sub_img.append([rel_x, rel_y, width, height, category_id])

    return rectangles_in_sub_img


def process_one_imag(image_path, coordinate, output_folder="output"):
    # 检查目标文件夹是否存在，如果不存在则创建它
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # 裁切图片的尺寸
    crop_size = 800
    # 打开图片
    with Image.open(image_path) as img:
        width, height = img.size

        # 计算裁切的行数和列数
        rows = height // crop_size
        cols = width // crop_size
        # print(f"width={width}, height={height}, rows={rows}, cols={cols}", end="          ")

        lines = []
        filename_with_extension = os.path.basename(image_path)
        filename, extension = os.path.splitext(filename_with_extension)

        # 逐行逐列裁切图片并保存
        for i in range(rows):
            for j in range(cols):
                # 计算裁切区域的坐标
                left = j * crop_size
                upper = i * crop_size
                right = left + crop_size
                lower = upper + crop_size
                sub_box = box_in_sub_img(coordinate, (left, upper), (right, lower))
                
                #如果裁切的图片内 物体数目大于2 我们才保存这个图片
                if len(sub_box)>2:
                # 裁切图片并保存
                    # print(f"left ：{left}  upper：{upper}  right：{right} lower：{lower} ", end="——————")
                    cropped_img = img.crop((left, upper, right, lower))
                    # print(image_path, end="——————")
    
                    save_img_name = f"{filename}_{i}_{j}"
                    save_path = os.path.join(output_folder, f"{save_img_name}.jpg")
                    # print(save_path)
                    cropped_img.save(save_path)
                    print(f"Saved cropped image: {save_path}")
    
                    # 标签信息书写
                    image_path = save_path
                    bbox_str = ' '.join([','.join(map(str, bbox)) for bbox in sub_box])
                    line = f"{image_path} {bbox_str}\n"
                    lines.append(line)

        return lines


def parse_json_to_dict(json_file_path):
    # 打开 JSON 文件并读取内容
    with open(json_file_path, "r") as json_file:
        data = json.load(json_file)

    # 解析 JSON 数据并存储到字典中
    dic_id2img_name = {}
    out = data["images"]
    for single_data in data['images']:
        image_name = single_data['file_name']
        image_id = single_data['id']
        dic_id2img_name[image_id] = image_name

    dic_id2box = {}
    for single_data in data['annotations']:
        image_id = single_data['image_id']
        image_name = dic_id2img_name[image_id]
        category_id = single_data['category_id']
        bbox = single_data['bbox']
        bbox.append(category_id)
        if image_name not in dic_id2box:
            dic_id2box[image_name] = [bbox]
        else:
            dic_id2box[image_name].append(bbox)

    return dic_id2box


def get_images_to_process(images_folder, image_names):
    """
    Get the paths of images to be processed.

    Args:
    - images_folder (str): The folder path containing the images.
    - image_names (list): A list of image names.

    Returns:
    - list: A list of image paths.
    """
    image_paths = []
    for image_name in image_names:
        image_path = os.path.join(images_folder, image_name)
        image_paths.append(image_path)
    return image_paths


def process_images_and_write_labels(images_folder, image_names, coordinate_dict, output_folder="output",
                                    output_txt="output_labels.txt"):
    """
    Process images and write labels to a text file.

    Args:
    - images_folder (str): The folder path containing the images.
    - image_names (list): A list of image names.
    - coordinate_dict (dict): A dictionary containing image names as keys and corresponding coordinates as values.
    - output_folder (str): The folder path to save the processed images.
    - output_txt (str): The path of the output text file.

    Returns:
    - None
    """
    with open(output_txt, "w") as txt_file:
        for image_name in image_names:
            image_path = os.path.join(images_folder, image_name)
            coordinates = coordinate_dict.get(image_name, [])
            label_line = process_one_imag(image_path, coordinates, output_folder)
            if label_line:
                for line in label_line:
                    txt_file.write(line)


if __name__ == '__main__':
    # Specify the paths
    images_folder = "Images"  # Folder containing the images
    json_file_path = "annotations/train.json"  # Path to the JSON file
    output_folder = "output"  # Folder to save the processed images
    output_txt = "output_labels.txt"  # Path of the output text file

    # Parse the JSON file to get coordinate dictionary
    coordinate_dict = parse_json_to_dict(json_file_path)

    # Get the image names to process
    image_names = list(coordinate_dict.keys())


    # # Get the paths of images to process
    # image_paths = get_images_to_process(images_folder, image_names)
    # image_names=image_names[]
    # Process images and write labels to a text file
    process_images_and_write_labels(images_folder, image_names, coordinate_dict, output_folder, output_txt)

import os
import json
from PIL.Image import Image
from PIL import Image


def box_in_sub_img(coordinates, cor1, cor2):
    """
    Filter the rectangles that are within the specified sub-image region.

    Args:
    - coordinates (list): A list of rectangles, where each rectangle is represented as [x, y, width, height, category_id].
    - cor1 (tuple): The top-left coordinate (x1, y1) of the sub-image region.
    - cor2 (tuple): The bottom-right coordinate (x2, y2) of the sub-image region.

    Returns:
    - list: A list of rectangles that are within the sub-image region.
    """
    x1, y1 = cor1
    x2, y2 = cor2

    rectangles_in_sub_img = []
    for rectangle in coordinates:
        x, y, width, height, category_id = rectangle
        x2_rect = x + width
        y2_rect = y + height

        # Modify the condition to exclude the case when the rectangle's bottom-right corner is exactly equal to the sub-image's top-left corner
        if x > x1 and y > y1 and x2_rect < x2 and y2_rect < y2:
            # Calculate the relative coordinates of the rectangle with respect to the sub-image's top-left corner
            rel_x = x - x1
            rel_y = y - y1
            rectangles_in_sub_img.append([rel_x, rel_y, width, height, category_id - 1])

    return rectangles_in_sub_img


def process_one_imag(image_path, coordinate, output_folder="output"):
    # 检查目标文件夹是否存在，如果不存在则创建它
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # 裁切图片的尺寸
    crop_size = 800
    # 打开图片
    with Image.open(image_path) as img:
        width, height = img.size

        # 计算裁切的行数和列数
        rows = height // crop_size
        cols = width // crop_size
        print(f"width={width}, height={height}, rows={rows}, cols={cols}")

        lines = []
        filename_with_extension = os.path.basename(image_path)
        filename, extension = os.path.splitext(filename_with_extension)

        # 逐行逐列裁切图片并保存
        for i in range(rows):
            for j in range(cols):
                # 计算裁切区域的坐标
                left = j * crop_size
                upper = i * crop_size
                right = left + crop_size
                lower = upper + crop_size
                sub_box = box_in_sub_img(coordinate, (left, upper), (right, lower))

                # 如果裁切的图片内 物体数目大于2 我们才保存这个图片
                if len(sub_box) > 2:
                    # 裁切图片并保存
                    print(f"left ：{left}  upper：{upper}  right：{right} lower：{lower} ")
                    cropped_img = img.crop((left, upper, right, lower))
                    print(image_path)

                    save_img_name = f"{filename}_{i}_{j}"
                    save_path = os.path.join(output_folder, f"{save_img_name}.jpg")
                    # print(save_path)
                    cropped_img.save(save_path)
                    print(f"Saved cropped image: {save_path}")

                    # 标签信息书写
                    image_path = save_path
                    bbox_str = ' '.join([','.join(map(str, bbox)) for bbox in sub_box])
                    line = f"{image_path} {bbox_str}\n"
                    lines.append(line)

        return lines


def parse_json_to_dict(json_file_path):
    # 打开 JSON 文件并读取内容
    with open(json_file_path, "r") as json_file:
        data = json.load(json_file)

    # 解析 JSON 数据并存储到字典中
    dic_id2img_name = {}
    out = data["images"]
    for single_data in data['images']:
        image_name = single_data['file_name']
        image_id = single_data['id']
        dic_id2img_name[image_id] = image_name

    dic_id2box = {}
    for single_data in data['annotations']:
        image_id = single_data['image_id']
        image_name = dic_id2img_name[image_id]
        category_id = single_data['category_id']
        bbox = single_data['bbox']
        bbox.append(category_id)
        if image_name not in dic_id2box:
            dic_id2box[image_name] = [bbox]
        else:
            dic_id2box[image_name].append(bbox)

    return dic_id2box


def get_images_to_process(images_folder, image_names):
    """
    Get the paths of images to be processed.

    Args:
    - images_folder (str): The folder path containing the images.
    - image_names (list): A list of image names.

    Returns:
    - list: A list of image paths.
    """
    image_paths = []
    for image_name in image_names:
        image_path = os.path.join(images_folder, image_name)
        image_paths.append(image_path)
    return image_paths


def process_images_and_write_labels(images_folder, image_names, coordinate_dict, output_folder="output",
                                    output_txt="output_labels.txt"):
    """
    Process images and write labels to a text file.

    Args:
    - images_folder (str): The folder path containing the images.
    - image_names (list): A list of image names.
    - coordinate_dict (dict): A dictionary containing image names as keys and corresponding coordinates as values.
    - output_folder (str): The folder path to save the processed images.
    - output_txt (str): The path of the output text file.

    Returns:
    - None
    """
    with open(output_txt, "w") as txt_file:
        for image_name in image_names:
            image_path = os.path.join(images_folder, image_name)
            coordinates = coordinate_dict.get(image_name, [])
            label_line = process_one_imag(image_path, coordinates, output_folder)
            if label_line:
                for line in label_line:
                    txt_file.write(line)


if __name__ == '__main__':
    # Specify the paths
    images_folder = "Images"  # Folder containing the images
    json_file_path = "annotations/train.json"  # Path to the JSON file
    output_folder = "output"  # Folder to save the processed images
    output_txt = "output_labels_2.txt"  # Path of the output text file

    # Parse the JSON file to get coordinate dictionary
    coordinate_dict = parse_json_to_dict(json_file_path)

    # Get the image names to process
    image_names = list(coordinate_dict.keys())

    # # Get the paths of images to process
    # image_paths = get_images_to_process(images_folder, image_names)
    image_names = image_names
    # Process images and write labels to a text file
    process_images_and_write_labels(images_folder, image_names, coordinate_dict, output_folder, output_txt)