import json
import os
import xml.etree.ElementTree as ET
# 初始化JSON数据结构
all_data=[]
# 指定XML文件所在的目录
xml_directory = '/Data/dfc/chengdeyancao'
i=0
# 遍历目录下的所有XML文件
for filename in os.listdir(xml_directory):
if filename.endswith('.xml'):
# 构建完整的文件路径
file_path = os.path.join(xml_directory, filename)
i=i+1
# 解析XML文件
tree = ET.parse(file_path)
root = tree.getroot()
file_name = root.find('filename').text
json_data = {
"id": f"identity_{i}",
"conversations": []
}
for obj_elem in root.findall('object'):
# 提取必要的数据
object_name = obj_elem.find('name').text
bbox = obj_elem.find('bndbox')
if bbox is not None: # 确保bndbox元素存在
xmin = int(bbox.find('xmin').text)
ymin = int(bbox.find('ymin').text)
xmax = int(bbox.find('xmax').text)
ymax = int(bbox.find('ymax').text)
else:
# 处理bndbox不存在的情况
print(f"Warning: No bounding box found in {filename}")
continue
# 创建单个conversation对象并添加到JSON数组中
conversation1 = {
"from": "user",
"value": f"Picture 1: <img>{file_name}</img>"
}
json_data['conversations'].append(conversation1)
conversation = {
"from": "assistant",
"value": f"<ref>{object_name}</ref><box>({xmin},{ymin}),({xmax},{ymax})</box>。"
}
json_data['conversations'].append(conversation)
all_data.append(json_data)
# 将JSON对象转换为字符串并保存到文件中
with open('output.json', 'w', encoding='utf-8') as f:
json.dump(all_data, f, ensure_ascii=False, indent=2)
print("转换完成,所有XML文件的数据已整合到output.json文件中。")
根据自己的xml文件进行相应修改里头的属性