数据集标注常用的是labelme和labelimg这两个api,labelme可以自由标注多边形形状,但是支持的格式有限,labelimg支持的格式较多,很多人可能会遇到json格式文件,然后想使用yolo测试无法使用,那么可以使用下边这种转换。
可以批量的在同一文件夹下的json文件转化为yolo可识别的格式。
yolo可识别的格式为(标签索引,中心点横坐标,中心点纵坐标,宽度,高度)坐标和宽高均为归一化值。
代码如下:
import json
import os
def convert_json_to_yolo(json_path, classes_path, output_path):
with open(classes_path, 'r') as f:
classes = f.read().strip().split('\n')
with open(json_path, 'r') as f:
json_data = json.load(f)
image_width = json_data["imageWidth"]
image_height = json_data["imageHeight"]
with open(output_path, 'w') as f:
for shape in json_data["shapes"]:
label = shape["label"]
points = shape["points"]
# 计算多边形的中心点坐标
x_center = sum(point[0] for point in points) / len(points)
y_center = sum(point[1] for point in points) / len(points)
# 计算归一化的坐标
x_center_normalized = x_center / image_width
y_center_normalized = y_center / image_height
width_normalized = (points[2][0] - points[0][0]) / image_width
height_normalized = (points[2][1] - points[0][1]) / image_height
# 查找类别ID
class_id = classes.index(label)
# 将信息保存到txt文件中
line = f"{class_id} {x_center_normalized:.6f} {y_center_normalized:.6f} {width_normalized:.6f} {height_normalized:.6f}\n"
f.write(line)
def batch_process_json_files(input_folder, classes_path, output_folder):
os.makedirs(output_folder, exist_ok=True)
json_files = [file for file in os.listdir(input_folder) if file.endswith(".json")]
for json_file in json_files:
json_path = os.path.join(input_folder, json_file)
output_path = os.path.join(output_folder, json_file.replace(".json", ".txt"))
convert_json_to_yolo(json_path, classes_path, output_path)
if __name__ == "__main__":
input_folder = "C:/Users/rjx/Desktop/input" # 替换为JSON文件的路径
classes_path = "C:/Users/rjx/Desktop/classes.txt" # 替换为类别标签文件的路径
output_folder = "C:/Users/rjx/Desktop/output" # 替换为输出文件的路径
batch_process_json_files(input_folder, classes_path, output_folder)