将JPG流程图与xml中的数据进行准确映射

gatinaa

已于 2024-07-01 15:15:59 修改

阅读量488

点赞数 9

文章标签：流程图 xml

于 2024-07-01 15:13:40 首次发布

本文链接：https://blog.csdn.net/weixin_46636042/article/details/140101386

版权

一、背景

上文工作将xml另存为jpg文件后，

利用drawo.io将xml转化为JPG_xml怎么变图片-CSDN博客

算是得到了数据，那么后面我们如何得到准确的标签呢？本文将总结如何将JPG流程图与xml的数据进行准确映射，最后得到数据标签存储在json文件中。

二、思路

在xml提取element、edge等关键信息存储在json文件中，利用这些数据用PIL库，在得到的jpg图片上进行再作图，观察偏移和箭头等信息，调整偏移和其他参数信息。

三、方法

xml_to_json

# -*- coding:utf-8 -*-

import os
import json
import xml.etree.ElementTree as ET
from tqdm import tqdm

# 定义输入和输出目录
base_dir = r''
output_dir = r''
os.makedirs(output_dir, exist_ok=True)


def parse_xml(xml_path):
    tree = ET.parse(xml_path)
    root = tree.getroot()

    elements = []
    edges = []
    min_x, min_y = float('inf'), float('inf')
    max_x, max_y = float('-inf'), float('-inf')

    def update_bounds(x, y):
        nonlocal min_x, min_y, max_x, max_y
        min_x = min(min_x, x)
        min_y = min(min_y, y)
        max_x = max(max_x, x)
        max_y = max(max_y, y)

    # 解析节点
    for cell in root.findall(".//mxCell[@vertex='1']"):
        element = {}
        element['id'] = cell.get('id')
        element['value'] = cell.get('value')
        geometry = cell.find('mxGeometry')
        if geometry is not None:
            x = float(geometry.get('x', 0))
            y = float(geometry.get('y', 0))
            width = float(geometry.get('width', 0))
            height = float(geometry.get('height', 0))

            element['x'] = x
            element['y'] = y
            element['width'] = width
            element['height'] = height

            update_bounds(x, y)
            update_bounds(x + width, y + height)

        element['style'] = cell.get('style')
        elements.append(element)

    # 解析边缘
    for edge in root.findall(".//mxCell[@edge='1']"):
        edge_element = {}
        edge_element['id'] = edge.get('id')
        edge_element['value'] = edge.get('value')
        edge_element['style'] = edge.get('style')
        edge_element['source'] = edge.get('source')
        edge_element['target'] = edge.get('target')
        geometry = edge.find('mxGeometry')
        if geometry is not None:
            edge_element['points'] = []
            for point in geometry.findall('Array[@as="points"]/mxPoint'):
                x = float(point.get('x', 0))
                y = float(point.get('y', 0))
                point_data = {'x': x, 'y': y}
                edge_element['points'].append(point_data)

                update_bounds(x, y)
        edges.append(edge_element)

    # 解析边框和箭头
    for cell in root.findall(".//mxCell"):
        if cell.get('vertex') != '1' and cell.get('edge') != '1':
            geometry = cell.find('mxGeometry')
            if geometry is not None:
                x = float(geometry.get('x', 0))
                y = float(geometry.get('y', 0))
                width = float(geometry.get('width', 0))
                height = float(geometry.get('height', 0))

                update_bounds(x, y)
                update_bounds(x + width, y + height)

    # 计算最大矩形
    max_rectangle = {
        'min_x': min_x,
        'min_y': min_y,
        'max_x': max_x,
        'max_y': max_y,
        'width': max_x - min_x,
        'height': max_y - min_y
    }

    return {'elements': elements, 'edges': edges, 'max_rectangle': max_rectangle}


for name in tqdm(os.listdir(base_dir)[::-1]):
    # 跳过不是 .xml 结尾的文件
    if not name.endswith('.xml'):
        continue

    xml_path = os.path.join(base_dir, name)
    json_path = os.path.join(output_dir, name[:-4] + '.json')

    # 解析 XML 文件
    data = parse_xml(xml_path)

    # 将结果保存为 JSON 文件
    with open(json_path, 'w', encoding='utf-8') as json_file:
        json.dump(data, json_file, ensure_ascii=False, indent=4)

    print(f'Converted {xml_path} to {json_path}')

match_json_xml

得到初始的json文件，要判断是否能以此为标签，需要利用PIL匹配JPG，观测匹配度.

黑色是原图，红色是element中的参数，蓝色是edge中的参数。

# -*- coding=utf-8 -*-
import json
import os
from PIL import Image, ImageDraw, ImageFont
from tqdm import tqdm


def load_json(json_file_path):
    with open(json_file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
    return data


def parse_style(style):
    style_dict = {}
    for item in style.split(';'):
        if '=' in item:
            key, value = item.split('=')
            try:
                style_dict[key] = float(value)
            except ValueError:
                style_dict[key] = value  # keep as string if it cannot be converted to float
    return style_dict


def get_edge_points(source, target, style, max_rectangle):
    # Adjust coordinates based on max_rectangle
    source_x = source['x'] - max_rectangle['min_x']
    source_y = source['y'] - max_rectangle['min_y']
    target_x = target['x'] - max_rectangle['min_x']
    target_y = target['y'] - max_rectangle['min_y']

    # Get exit and entry points
    exit_x = source_x + style.get('exitX', 0.5) * source['width']
    exit_y = source_y + style.get('exitY', 0.5) * source['height']
    entry_x = target_x + style.get('entryX', 0.5) * target['width']
    entry_y = target_y + style.get('entryY', 0.5) * target['height']

    return exit_x, exit_y, entry_x, entry_y


def validate_elements(json_data, image_path, output_path):

    image = Image.open(image_path)
    draw = ImageDraw.Draw(image)
    font = ImageFont.load_default()

    elements = json_data['elements']
    edges = json_data['edges']
    max_rectangle = json_data['max_rectangle']

    for element in elements:
        x = element['x'] - max_rectangle['min_x']
        y = element['y'] - max_rectangle['min_y']
        width = element['width']
        height = element['height']
        draw.rectangle([x, y, x + width, y + height], outline="red", width=2)
        if element['value'] is not None:
            draw.text((x + 2, y + 2), element['value'], fill="red", font=font)

    for edge in edges:
        source = next((element for element in elements if element['id'] == edge['source']), None)
        target = next((element for element in elements if element['id'] == edge['target']), None)
        if source and target:
            style = parse_style(edge['style'])
            start_x, start_y, end_x, end_y = get_edge_points(source, target, style, max_rectangle)
            draw.line([start_x, start_y, end_x, end_y], fill="blue", width=2)
            if edge['value'] is not None:
                draw.text(((start_x + end_x) / 2, (start_y + end_y) / 2), edge['value'], fill="blue", font=font)

    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    # Save the image with annotations
    image.save(output_path)


def batch_process(json_directory, jpg_directory, output_directory):
    files = [f for f in os.listdir(json_directory) if f.endswith('.json')]
    for filename in tqdm(files, total=len(files)):
        json_file_path = os.path.join(json_directory, filename)
        image_file_path = os.path.join(jpg_directory, filename.replace('.json', '.jpg'))
        output_file_path = os.path.join(output_directory, filename.replace('.json', '_annotated.jpg'))

        if os.path.exists(image_file_path):
            json_data = load_json(json_file_path)
            validate_elements(json_data, image_file_path, output_file_path)
            print(f"Processed and saved: {output_file_path}")
        else:
            print(f"Image file not found for {json_file_path}")


if __name__ == "__main__":
    json_directory = r''
    jpg_directory = r''
    output_directory = r'match_jpg'
    os.makedirs(output_directory, exist_ok=True)

    batch_process(json_directory, jpg_directory, output_directory)

remove_offset

匹配效果好的话，就可以进行下一步，去除初始json的偏移量，得到最后的JSON标签。

# -*- coding=utf-8 -*-
import json
import os
from PIL import Image
from tqdm import tqdm


def load_json(json_file_path):
    with open(json_file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
    return data


def save_json(data, json_file_path):
    with open(json_file_path, 'w', encoding='utf-8') as file:
        json.dump(data, file, ensure_ascii=False, indent=4)


def update_json_values(json_data, image_width, image_height):
    max_rectangle = json_data['max_rectangle']
    min_x = max_rectangle['min_x']
    min_y = max_rectangle['min_y']

    for element in json_data['elements']:
        element['x'] -= min_x
        element['y'] -= min_y

    for edge in json_data['edges']:
        source = next((el for el in json_data['elements'] if el['id'] == edge['source']), None)
        target = next((el for el in json_data['elements'] if el['id'] == edge['target']), None)
        if source and target:
            edge['points'] = [
                {'x': source['x'] + source['width'], 'y': source['y'] + source['height'] / 2},
                {'x': target['x'], 'y': target['y'] + target['height'] / 2}
            ]

    # Update max_rectangle
    max_rectangle['min_x'] = 0
    max_rectangle['min_y'] = 0
    max_rectangle['max_x'] = image_width
    max_rectangle['max_y'] = image_height
    max_rectangle['width'] = image_width
    max_rectangle['height'] = image_height

    return json_data


def process_file(json_file_path, image_file_path, output_json_path):
    json_data = load_json(json_file_path)
    image = Image.open(image_file_path)
    image_width, image_height = image.size

    updated_json_data = update_json_values(json_data, image_width, image_height)
    save_json(updated_json_data, output_json_path)


def batch_process(json_directory, jpg_directory, output_directory):
    files = [f for f in os.listdir(json_directory) if f.endswith('.json')]
    for filename in tqdm(files, total=len(files)):
        json_file_path = os.path.join(json_directory, filename)
        image_file_path = os.path.join(jpg_directory, filename.replace('.json', '.jpg'))
        output_json_path = os.path.join(output_directory, filename)

        if os.path.exists(image_file_path):
            process_file(json_file_path, image_file_path, output_json_path)
            print(f"Processed and saved: {output_json_path}")
        else:
            print(f"Image file not found for {json_file_path}")


if __name__ == "__main__":
    json_directory = r''
    jpg_directory = r''
    output_directory = r''
    os.makedirs(output_directory, exist_ok=True)

    batch_process(json_directory, jpg_directory, output_directory)

得到最后的标签数据。

gatinaa

关注

9
点赞
踩
8

收藏

觉得还不错? 一键收藏
打赏
0
评论
将JPG流程图与xml中的数据进行准确映射

上文工作将xml另存为jpg文件后，算是得到了数据，那么后面我们如何得到准确的标签呢？本文将总结如何将JPG流程图与xml的数据进行准确映射，最后得到数据标签存储在json文件中。
复制链接

扫一扫