import json
import os
name2id = {"block":0,'can':1,'Charger':2,'cup':3,'mouse':4,'nail clippers':5,'pen':6,'screw':7,'tissue box':8,'toothbrush':9} # Mapping from label names to IDs
def convert(img_size, box):
dw = 1. / img_size[0]
dh = 1. / img_size[1]
x = (box[0] + box[2]) / 2.0 - 1
y = (box[1] + box[3]) / 2.0 - 1
w = box[2] - box[0]
h = box[3] - box[1]
x = abs(x * dw)
w = abs(w * dw)
y = abs(y * dh)
h = abs(h * dh)
return (x, y, w, h)
def decode_json(json_path, txt_output_folder):
txt_name = os.path.join(txt_output_folder, os.path.basename(json_path)[:-5] + '.txt')
os.makedirs(os.path.dirname(txt_name), exist_ok=True)
with open(txt_name, 'w') as txt_file:
try:
data = json.load(open(json_path, 'r', encoding='gb2312'))
except UnicodeDecodeError:
data = json.load(open(json_path, 'r', encoding='utf-8'))
img_w = data['imageWidth']
img_h = data['imageHeight']
for i in data['shapes']:
label_name = i['label']
if label_name not in name2id:
print(f"Skipping label: {label_name}")
continue
if i['shape_type'] == 'rectangle':
x1 = int(i['points'][0][0])
y1 = int(i['points'][0][1])
x2 = int(i['points'][1][0])
y2 = int(i['points'][1][1])
bb = (x1, y1, x2, y2)
bbox = convert((img_w, img_h), bb)
txt_file.write(str(name2id[label_name]) + " " + " ".join([str(a) for a in bbox]) + '\n')
if __name__ == "__main__":
json_folder_path = 'C:\\Users\\周\\Desktop\\原始数据集\\'
txt_output_folder = 'labels'
for root, _, files in os.walk(json_folder_path):
for file in files:
if file.endswith('.json'):
json_path = os.path.join(root, file)
print(f'Processing {json_path}')
decode_json(json_path, txt_output_folder)
def convert(img_size, box):
“”“”“”“”“”“”“”“”“
x = abs(x * dw)
w = abs(w * dw)
y = abs(y * dh)
h = abs(h * dh)
return (x, y, w, h)
————————————————
返回时加入绝对值
参考原文链接:https://blog.csdn.net/m0_69331214/article/details/134029719
可以实现在路径下找到json文件,排除标签之外的类别转txt
可以划分yolo数据集
import os
import random
import shutil
from math import floor
def create_directory(dir_path):
if not os.path.exists(dir_path):
os.makedirs(dir_path)
def copy_files(files, source_image_folder, source_label_folder, target_folder):
for file in files:
base_name = os.path.splitext(file)[0]
image_file = file
label_file = base_name + '.txt'
# Copy image file
shutil.copy(os.path.join(source_image_folder, image_file), os.path.join(target_folder, 'images', image_file))
# Copy corresponding label file if it exists
if os.path.exists(os.path.join(source_label_folder, label_file)):
shutil.copy(os.path.join(source_label_folder, label_file), os.path.join(target_folder, 'labels', label_file))
def split_dataset(image_folder, label_folder, output_folder, train_ratio=0.7, val_ratio=0.15, test_ratio=0.15, seed=None):
if seed is not None:
random.seed(seed)
# Create output directories
train_folder = os.path.join(output_folder, 'train')
val_folder = os.path.join(output_folder, 'val')
test_folder = os.path.join(output_folder, 'test')
for folder in [train_folder, val_folder, test_folder]:
create_directory(folder)
create_directory(os.path.join(folder, 'images'))
create_directory(os.path.join(folder, 'labels'))
# Get list of image files
image_extensions = ['.jpg', '.jpeg', '.png']
image_files = [f for f in os.listdir(image_folder) if os.path.splitext(f)[1].lower() in image_extensions]
# Shuffle the list
random.shuffle(image_files)
# Calculate split sizes
total_files = len(image_files)
train_size = floor(total_files * train_ratio)
val_size = floor(total_files * val_ratio)
test_size = total_files - train_size - val_size # Ensure all files are used
# Split the list
train_files = image_files[:train_size]
val_files = image_files[train_size:train_size + val_size]
test_files = image_files[train_size + val_size:]
# Copy files to respective folders
copy_files(train_files, image_folder, label_folder, train_folder)
copy_files(val_files, image_folder, label_folder, val_folder)
copy_files(test_files, image_folder, label_folder, test_folder)
print("Dataset split completed!")
if __name__ == "__main__":
image_folder = 'images' # Replace with your source images folder
label_folder = 'labels' # Replace with your source labels folder
output_folder = 'data' # Replace with your target output folder
split_dataset(image_folder, label_folder, output_folder, seed=42)