提示:文章写完后,目录可以自动生成,如何生成可参考右边的帮助文档
前言
提示:这里可以添加本文要记录的大概内容:
参考上篇博客,记录co-detr训练推理过程,现在需要用其他的数据集进行训练,先要研究当前数据集的json文件格式,提取出{"images": [], "annotations": [], "categories": []}
这些内容进行填充
例子如下:
"images": [
{
"id": 1,
"file_name": "1x.jpg",
"width": 3840,
"height": 2160
},
],
"annotations": [
{
"id": 246,
"image_id": 97,
"category_id": 31,
"segmentation": [
[
]
],
"area": 3180514.6301418752,
"bbox": [
0,
0,
3840,
828.2590182661133
],
"iscrowd": 0
},],
"categories": [
{
"id": 1,
"name": "Arrow",
"supercategory": "Traffic signs and markings"
}]
参考转化代码
import os
import json
import random
import shutil
def convert_to_coco(json_dir, output_files):
# 初始化 COCO 输出字典
coco_outputs = {
"train": {"images": [], "annotations": [], "categories": []},
"val": {"images": [], "annotations": [], "categories": []},
"test": {"images": [], "annotations": [], "categories": []}
}
# 读取类别信息
# categories = []
for key in coco_outputs:
coco_outputs[key]["categories"] = categories
annotation_id = 1
image_id = 1
# 获取所有 JSON 文件并打乱顺序
json_files = [f for f in os.listdir(json_dir) if f.endswith('.json')]
random.shuffle(json_files)
# 计算数据集分割
total_files = len(json_files)
train_end = int(total_files * 0.8)
val_end = int(total_files * 1.0)
for idx, json_file in enumerate(json_files):
json_path = os.path.join(json_dir, json_file)
# print("json_path is ", json_path)
with open(json_path, 'r', encoding='utf-8') as f:
data = json.load(f)
image_info = {
"id": image_id,
"file_name": os.path.splitext(os.path.basename(json_file))[0] + ".jpg",
"width": data["markData"]["width"],
"height": data["markData"]["height"],
}
# 根据索引分配到训练、验证或测试集中
if idx < train_end:
dataset = "train"
#elif idx < val_end:
# dataset = "val"
else:
dataset = "test"
coco_outputs[dataset]["images"].append(image_info)
# 复制图片到相应的目录
img_file_name = os.path.splitext(json_file)[0] + ".jpg"
img_src_path = os.path.join("path/to/your/img", img_file_name)
img_dst_path = os.path.join("path/to/now/need", dataset, img_file_name)
shutil.copy(img_src_path, img_dst_path)
for annotation in data["markData"]["annotations"]:
category_name = annotation["title"]
category_name = cn_to_en[annotation["title"]]
iscrowd = 1 if category_name in ['People', 'Riders'] else 0
for obj in annotation["data"]:
# 获取多边形点位
segmentation = []
for point in obj["relativePos"]:
segmentation.append([point["x"], point["y"]])
# 检查 relativePos 是否为空
if len(segmentation) == 0:
continue # 跳过空的分割
# 计算边界框 (xmin, ymin, width, height)
x_coordinates = [p["x"] for p in obj["relativePos"]]
y_coordinates = [p["y"] for p in obj["relativePos"]]
# 确保坐标列表非空
if not x_coordinates or not y_coordinates:
continue # 跳过没有有效坐标的分割
xmin, ymin = min(x_coordinates), min(y_coordinates)
xmax, ymax = max(x_coordinates), max(y_coordinates)
bbox = [xmin, ymin, xmax - xmin, ymax - ymin]
# 创建注释
annotation_info = {
"id": annotation_id,
"image_id": image_id,
"category_id": category_id,
"segmentation": [sum(segmentation, [])], # 将多边形点压平
"area": (xmax - xmin) * (ymax - ymin),
"bbox": bbox,
"iscrowd": iscrowd
}
coco_outputs[dataset]["annotations"].append(annotation_info)
annotation_id += 1
image_id += 1
# 保存各个数据集的 COCO 格式 JSON 文件
for key, output_file in output_files.items():
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(coco_outputs[key], f, indent=4, ensure_ascii=False)
# 定义路径
json_dir = 'Directory containing JSON files'
output_files = {
"train": './coco_json/instances_train.json',
#"val": './coco_json/instances_val.json',
"test": './coco_json/instances_test.json'
}
print("start")
convert_to_coco(json_dir, output_files)
print("end")