import os
import shutil
def extract_subset(img_dir, labels_dir, output_dir, interval=15, image_ext='.jpg', label_ext='.txt'):
# Create output directories if they don't exist
output_img_dir = os.path.join(output_dir, 'img')
output_labels_dir = os.path.join(output_dir, 'labels')
os.makedirs(output_img_dir, exist_ok=True)
os.makedirs(output_labels_dir, exist_ok=True)
# List all images assuming they end with the specified image extension
images = [f for f in os.listdir(img_dir) if f.endswith(image_ext)]
# Sort images to maintain a consistent order
images.sort()
# Filter images to pick every 15th image
selected_images = images[::interval]
# Copy selected images and their corresponding YOLO label files
for img in selected_images:
# Construct the full path for image and label
img_path = os.path.join(img_dir, img)
label_path = os.path.join(labels_dir, os.path.splitext(img)[
0] + label_ext) # Assumes label has same base name with .txt extension
# Check if corresponding label file exists
if os.path.exists(label_path):
# Copy image
shutil.copy(img_path, output_img_dir)
# Copy label
shutil.copy(label_path, output_labels_dir)
else:
print(f"Label file for {img} not found. Skipping this file.")
print(f"Extracted {len(selected_images)} images and their labels to {output_dir}")
# Example usage
if __name__ == "__main__":
img_dir = "D:\Myself文档\Dataset\官方Dataset\spain\original\original\imgs"
labels_dir = "D:\Myself文档\Dataset\官方Dataset\spain\original\original\yolo_labels"
new_dataset_dir = "D:\Myself文档\Dataset\官方Dataset\spain\original\original_select_15"
extract_subset(img_dir, labels_dir, new_dataset_dir)