给reflacx数据集的胸片数据加上病变区域框，标记好病变名

榆钱不知秋

已于 2024-06-27 19:28:11 修改

阅读量455

点赞数 8

分类专栏：自监督学习文章标签： python

于 2024-06-24 14:33:24 首次发布

本文链接：https://blog.csdn.net/weixin_45861496/article/details/139923484

版权

自监督学习专栏收录该内容

5 篇文章 0 订阅

订阅专栏

给胸片添加病变区域框

Step 1 整合anomaly_location_ellipses病变信息文件
Step 2 将病变框绘制到胸片上，并保存为师兄规定的格式。
- 改一下上面的，将病变框加到师兄之前生成好的加了gaze heatmap 的胸片上

Step 1 整合anomaly_location_ellipses病变信息文件

将main_data文件夹下所有Pxxx文件下的anomaly_location_ellipses.csv文件都整合到anomaly_location_ellipses文件夹中，并将其命名为Pxxx.csv。

在这里插入图片描述

# merge_anomalies.py
"""将所有anomaly_location_ellipses整合到一个文件夹中"""
import os
import shutil

def copy_and_rename_files(src_folder, dst_folder):
    # 创建目标文件夹 dst_folder，如果不存在的话
    if not os.path.exists(dst_folder):
        os.makedirs(dst_folder)

    # 遍历src_folder文件夹下的所有子文件夹
    for subdir in os.listdir(src_folder):
        subdir_path = os.path.join(src_folder, subdir)
        
        # 检查子文件夹是否以P开头并且是文件夹
        if os.path.isdir(subdir_path) and subdir.startswith('P'):
            src_file = os.path.join(subdir_path, 'anomaly_location_ellipses.csv')
            if os.path.exists(src_file):
                dst_file = os.path.join(dst_folder, f'{subdir}.csv')
                shutil.copy(src_file, dst_file)
                print(f'Copied {src_file} to {dst_file}')


src_folder = './main_data'  # 源文件夹路径
dst_folder = './anomaly_location_ellipses'  # 目标文件夹路径
copy_and_rename_files(src_folder, dst_folder)

Step 2 将病变框绘制到胸片上，并保存为师兄规定的格式。

师兄的要求：

格式布局如attention
不管这个胸片有没有bbox都要放到最终的输出文件
在病变框上方加上病名

思考：
读取reflacx.json文件，遍历其中的每个记录。
读取每个记录的study_id、image_path和reflacx_id属性。
读取anomaly_location_ellipses文件夹下的相应CSV文件，获取每个长方形框的坐标（xmin、ymin、xmax、ymax）和病变标签。
在图像上绘制长方形框，并在框上方绘制标签。
将处理后的图像保存到指定目录，目录结构为anomalies_bbox_data/study_id/reflacx_id.png。

# anomalies_bbox.py
"""将所有bbox画在胸片上，并且标注好病变名字"""
import random
import os
import pandas as pd
import cv2
import json

# 定义感兴趣的标签列表
LABELS = [
    'Airway wall thickening', 'Atelectasis', 'Consolidation', 'Emphysema', 
    'Enlarged cardiac silhouette', 'Fibrosis', 'Fracture', 'Groundglass opacity', 
    'Mass', 'Nodule', 'Other', 'Pleural effusion', 'Pleural thickening', 
    'Pneumothorax', 'Pulmonary edema', 'Quality issue', 'Support devices', 
    'Wide mediastinum', 'Abnormal mediastinal contour', 'Acute fracture', 
    'Enlarged hilum', 'Hiatal hernia', 'High lung volume / emphysema', 
    'Interstitial lung disease', 'Lung nodule or mass', 'Pleural abnormality'
]

def draw_rectangles(image_path, annotations, output_path):
    # 读取图像
    image = cv2.imread(image_path)
    if image is None:
        print(f"Error reading image {image_path}")
        return

    # 定义颜色字典
    color_dict = {label: (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) for label in LABELS}

    if not annotations.empty:
        for index, row in annotations.iterrows():
            xmin, ymin, xmax, ymax = row['xmin'], row['ymin'], row['xmax'], row['ymax']

            # 绘制所有True的标签
            labels = [label for label in LABELS if row.get(label, False)]
            if labels:
                print(f"Drawing rectangles for labels: {labels}")
            for label_index, label in enumerate(labels):
                color = color_dict[label]
                # 绘制矩形框
                cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color, 3)
                # 调整标签位置，避免重叠
                label_y_position = ymin - 10 - (label_index * 30)  # 调整标签间距
                if label_y_position < 0:
                    label_y_position = ymax + 20 + (label_index * 30)
                # 在框上方绘制标签
                cv2.putText(image, label, (xmin, label_y_position), cv2.FONT_HERSHEY_SIMPLEX, 1.2, color, 3)

    else:
        print("No annotations to draw.")

    # 保存图像，无论是否有标注
    cv2.imwrite(output_path, image)
    print(f"Saved image with annotations to {output_path}")

def main():
    # 读取reflacx.json文件
    with open('reflacx.json', 'r') as f:
        data = json.load(f)

    # 遍历每个记录
    for record in data:
        study_id = record['study_id']
        image_path = record['image_path']
        reflacx_id = record['reflacx_id']

        # 读取相应的csv文件
        csv_path = os.path.join('anomaly_location_ellipses', f'{reflacx_id}.csv')
        if os.path.exists(csv_path):
            annotations = pd.read_csv(csv_path)

            # 确保annotations中有需要的列
            required_columns = ['xmin', 'ymin', 'xmax', 'ymax']
            if not all(column in annotations.columns for column in required_columns):
                print(f"CSV file {csv_path} is missing required columns.")
                annotations = pd.DataFrame()  # 如果缺少必要的列，则设置为空DataFrame
            else:
                print(f"Annotations loaded for {reflacx_id}")
        else:
            print(f"CSV file {csv_path} not found.")
            annotations = pd.DataFrame()  # 如果CSV文件不存在，则设置为空DataFrame

        # 创建输出目录
        output_dir = os.path.join('anomalies_bbox_data', study_id)
        os.makedirs(output_dir, exist_ok=True)

        # 输出图像路径
        output_path = os.path.join(output_dir, f'{reflacx_id}.png')

        # 绘制并保存带注释的图像
        draw_rectangles(image_path, annotations, output_path)

if __name__ == '__main__':
    main()

最后生成的anomalies_bbox_data 文件夹
在这里插入图片描述

改一下上面的，将病变框加到师兄之前生成好的加了gaze heatmap 的胸片上

# bbox_saliency_overlay.py
"""将所有bbox画在胸片上，并且标注好病变名字，这里在上面的基础上，更改了图片来源为加了gaze heatmap 的胸片"""
import random
import os
import pandas as pd
import cv2
import json

# 定义感兴趣的标签列表
LABELS = [
    'Airway wall thickening', 'Atelectasis', 'Consolidation', 'Emphysema', 
    'Enlarged cardiac silhouette', 'Fibrosis', 'Fracture', 'Groundglass opacity', 
    'Mass', 'Nodule', 'Other', 'Pleural effusion', 'Pleural thickening', 
    'Pneumothorax', 'Pulmonary edema', 'Quality issue', 'Support devices', 
    'Wide mediastinum', 'Abnormal mediastinal contour', 'Acute fracture', 
    'Enlarged hilum', 'Hiatal hernia', 'High lung volume / emphysema', 
    'Interstitial lung disease', 'Lung nodule or mass', 'Pleural abnormality'
]

def draw_rectangles(image_path, annotations, output_path):
    # 读取图像
    image = cv2.imread(image_path)
    if image is None:
        print(f"Error reading image {image_path}")
        return

    # 定义颜色字典
    color_dict = {label: (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) for label in LABELS}

    if not annotations.empty:
        for index, row in annotations.iterrows():
            xmin, ymin, xmax, ymax = row['xmin'], row['ymin'], row['xmax'], row['ymax']

            # 绘制所有True的标签
            labels = [label for label in LABELS if row.get(label, False)]
            if labels:
                print(f"Drawing rectangles for labels: {labels}")
            for label_index, label in enumerate(labels):
                color = color_dict[label]
                # 绘制矩形框
                cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color, 3)
                # 调整标签位置，避免重叠
                label_y_position = ymin - 10 - (label_index * 30)  # 调整标签间距
                if label_y_position < 0:
                    label_y_position = ymax + 20 + (label_index * 30)
                # 在框上方绘制标签
                cv2.putText(image, label, (xmin, label_y_position), cv2.FONT_HERSHEY_SIMPLEX, 1.2, color, 3)

    else:
        print("No annotations to draw.")

    # 保存图像，无论是否有标注
    cv2.imwrite(output_path, image)
    print(f"Saved image with annotations to {output_path}")

def main():
    # 读取reflacx.json文件
    with open('reflacx.json', 'r') as f:
        data = json.load(f)

    # 遍历每个记录
    for record in data:
        study_id = record['study_id']
        # image_path = record['image_path']
        reflacx_id = record['reflacx_id']
        image_path = './saliency_overlay_std400'+'/'+ study_id +'/'+ reflacx_id + '.png'

        # 读取相应的csv文件
        csv_path = os.path.join('anomaly_location_ellipses', f'{reflacx_id}.csv')
        if os.path.exists(csv_path):
            annotations = pd.read_csv(csv_path)

            # 确保annotations中有需要的列
            required_columns = ['xmin', 'ymin', 'xmax', 'ymax']
            if not all(column in annotations.columns for column in required_columns):
                print(f"CSV file {csv_path} is missing required columns.")
                annotations = pd.DataFrame()  # 如果缺少必要的列，则设置为空DataFrame
            else:
                print(f"Annotations loaded for {reflacx_id}")
        else:
            print(f"CSV file {csv_path} not found.")
            annotations = pd.DataFrame()  # 如果CSV文件不存在，则设置为空DataFrame

        # 创建输出目录
        output_dir = os.path.join('saliency_bbox_data', study_id)
        os.makedirs(output_dir, exist_ok=True)

        # 输出图像路径
        output_path = os.path.join(output_dir, f'{reflacx_id}.png')

        # 绘制并保存带注释的图像
        draw_rectangles(image_path, annotations, output_path)

if __name__ == '__main__':
    main()