在网上找了两篇教程,最终发现,只有每个文件夹的第一张图片所对应的标注框是准的,其余都有偏差,比如第二张
于是仔细检查代码内容,发现txt文件的读取顺序如下:
因为os.listdir读取的文件不一定是按顺序的,所以对此做了一些更改,具体教程如下:
1.生成txt文件
首先要将gt_whole.txt里边的数据集按照图片名称进行分割,保证分割后的txt文件中包含该图片的所有标注信息,代码如下:
import os
# 文件夹的路径
folder_path = r"E:\dataset\UAVDT\UAV-benchmark-M\M1401"
folder_path1 = os.path.join(folder_path, 'gt') # 存放分割后的txt文件的路径
folder_path2 = os.path.join(folder_path, 'img1') # 照片的路径,主要用于统计有多少张照片,方便创建txt文件
folder_path3 = os.path.join(folder_path, 'gt', 'M1401_gt_whole.txt') # 存放标签的txt文件
photo_count = 0
# 判断图片数量
for file_name in os.listdir(folder_path2):
# 使用os.path.splitext()函数获取文件扩展名
_, extension = os.path.splitext(file_name)
# 如果文件是照片文件(例如.jpg或.png),则增加计数器
if extension.lower() in ['.jpg', '.jpeg', '.png', '.gif', '.bmp']:
photo_count += 1
print("Total photos:", photo_count)
if not os.path.exists(folder_path):
os.makedirs(folder_path)
with open(folder_path3, 'r') as file:
lines = file.readlines()
for i in range(1, photo_count + 1):
file_path1 = os.path.join(folder_path1, str(i) + '.txt')
with open(file_path1, 'w') as target_file:
for line in lines:
data = line.split(',')
if data[0] == str(i):
target_file.write(line)
2.生成完整的图片和标签文件夹
将各文件夹下的图片和txt文件按照依次递增的顺序存放到一个文件夹中,代码如下:
import os
import shutil
# 图片文件夹和标签文件夹的根路径
root_folder = r"E:\dataset\UAVDT\UAV-benchmark-M"
# 目标图片文件夹路径
destination_img_folder = r"E:\dataset\UAVDT\img"
# 目标标签文件夹路径
destination_ann_folder = r"E:\dataset\UAVDT\ann"
# 用于计数的变量
img_count = 1
ann_count = 1
# 遍历指定文件夹中的所有文件夹
for folder_name in os.listdir(root_folder):
# 构建当前文件夹中的图片文件夹路径
img_folder_path = os.path.join(root_folder, folder_name, "img1")
# 如果图片文件夹存在
if os.path.exists(img_folder_path):
# 遍历图片文件夹中的所有文件
for filename in os.listdir(img_folder_path):
# 获取文件的完整路径
file_path = os.path.join(img_folder_path, filename)
# 获取文件的扩展名
_, extension = os.path.splitext(filename)
# 生成新的文件名
new_filename = f"image_{img_count}{extension}"
# 构建目标文件的完整路径
destination_file = os.path.join(destination_img_folder, new_filename)
# 移动文件到目标文件夹
shutil.copy(file_path, destination_file)
# 更新计数器
img_count += 1
# 构建当前文件夹中的标签文件夹路径
ann_folder_path = os.path.join(root_folder, folder_name, "gt")
# 如果标签文件夹存在
if os.path.exists(ann_folder_path):
dir=os.listdir(ann_folder_path)
dir.sort(key=lambda x:int(x[:-4]))
# 遍历标签文件夹中的所有文件
for filename in dir:
# 获取文件的完整路径
file_path = os.path.join(ann_folder_path, filename)
# 生成新的文件名
new_filename = f"image_{ann_count}.txt"
# 构建目标文件的完整路径
destination_file = os.path.join(destination_ann_folder, new_filename)
# 移动文件到目标文件夹
shutil.copy(file_path, destination_file)
# 更新计数器
ann_count += 1
3.右对齐编号
import os
# 文件夹路径
folder_path = r'E:\dataset\UAVDT\40735\anno'
# 获取文件夹中的所有文件
files = os.listdir(folder_path)
# 遍历文件
for filename in files:
if filename.startswith('image_'):
# 分离文件名和扩展名
name, ext = os.path.splitext(filename)
# 提取数字部分并转换为整数
number_str = name.split('_')[1]
number = int(number_str)
# 格式化为右对齐并补零
new_filename = f'image_{number:05d}{ext}'
# 旧文件路径和新文件路径
old_path = os.path.join(folder_path, filename)
new_path = os.path.join(folder_path, new_filename)
# 重命名文件
os.rename(old_path, new_path)
print(f'Renamed {filename} to {new_filename}')
4.划分数据集
将全部的文件按照一定的比例进行切分,分成训练集和测试集
import os
import random
import shutil
def split_dataset(source_dir, train_dir, test_dir, split_ratio=0.8):
# 创建训练集和测试集文件夹
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)
# 获取所有图片文件的列表
image_files = [f for f in os.listdir(source_dir) if f.endswith('.jpg') or f.endswith('.png')]
# 随机化文件列表
random.shuffle(image_files)
# 计算训练集和测试集的数量
num_train = int(len(image_files) * split_ratio)
num_test = len(image_files) - num_train
# 将文件移动到相应的文件夹中
for i, image_file in enumerate(image_files):
source_path = os.path.join(source_dir, image_file)
if i < num_train:
target_path = os.path.join(train_dir, image_file)
else:
target_path = os.path.join(test_dir, image_file)
shutil.move(source_path, target_path)
# 设置源文件夹和目标文件夹路径
source_directory = r"E:\dataset\UAVDT\img"
train_directory = r"E:\dataset\UAVDT\train"
test_directory = r"E:\dataset\UAVDT\test"
# 划分数据集并移动文件
split_dataset(source_directory, train_directory, test_directory)
5.划分标签
按照分割后的图片名称,将对应的标签文件进行分割
import os
import shutil
def copy_txt_files(image_folder, txt_folder, output_folder):
# 创建输出文件夹
os.makedirs(output_folder, exist_ok=True)
# 获取图片文件夹中所有图片文件的名称列表
image_files = [f for f in os.listdir(image_folder) if f.endswith('.jpg') or f.endswith('.png')]
# 遍历图片文件夹中的图片文件
for image_file in image_files:
# 构建对应的txt文件路径
txt_file = os.path.splitext(image_file)[0] + '.txt'
txt_path = os.path.join(txt_folder, txt_file)
# 检查对应的txt文件是否存在
if os.path.exists(txt_path):
# 复制txt文件到输出文件夹中
shutil.copy(txt_path, output_folder)
# 设置图片文件夹路径、txt文件夹路径和输出文件夹路径
image_folder = r"E:\dataset\UAVDT\img\test"
txt_folder = r"E:\dataset\UAVDT\ann"
output_folder = r"E:\dataset\UAVDT\ann\test"
# 将txt文件复制到新的文件夹中
copy_txt_files(image_folder, txt_folder, output_folder)
6.转成COCO(json)格式
将分割后的标签文件由txt格式转化为json进行保存,代码如下:
import json
import os
from PIL import Image # 如果没有安装PIL库,请先安装:pip install pillow
# 创建一个空的COCO格式数据结构
coco_data = {
"info": {},
"licenses": [],
"categories": [],
"images": [],
"annotations": []
}
# 读取txt文件并解析数据
def parse_txt_file(txt_file):
with open(txt_file, 'r') as f:
lines = f.readlines()
annotations = []
for line in lines:
# 解析每一行数据,格式为 <frame_index>,<target_id>,<bbox_left>,<bbox_top>,<bbox_width>,<bbox_height>,<out-of-view>,<occlusion>,<object_category>
frame_index, target_id, bbox_left, bbox_top, bbox_width, bbox_height, out_of_view, occlusion, object_category = map(
str.strip, line.split(','))
annotations.append({
"bbox": [int(bbox_left), int(bbox_top), int(bbox_width), int(bbox_height)],
"category_id": int(object_category)
})
return annotations
# 图像文件夹路径
image_folder = r"E:\dataset\UAVDT\10184\images"
# 遍历txt文件夹
txt_folder = r"E:\dataset\UAVDT\10184\MOT"
dir = os.listdir(txt_folder)
for filename in dir:
if filename.endswith('.txt') and filename.startswith('image_'):
txt_file = os.path.join(txt_folder, filename)
image_file = os.path.join(image_folder, os.path.splitext(filename)[0] + '.jpg')
# 解析txt文件
annotations = parse_txt_file(txt_file)
# 获取图像的宽度和高度
with Image.open(image_file) as img:
image_width, image_height = img.size
# 将图像信息添加到COCO数据结构中
image_info = {
"id": len(coco_data["images"]) + 1,
"file_name": os.path.splitext(filename)[0] + '.jpg', # 假设txt文件与图像文件同名
"width": image_width,
"height": image_height
}
coco_data["images"].append(image_info)
# 将目标标注信息添加到COCO数据结构中
for annotation in annotations:
annotation_info = {
"id": len(coco_data["annotations"]) + 1,
"image_id": image_info["id"],
"bbox": annotation["bbox"],
"category_id": annotation["category_id"],
"iscrowd": 0 # 假设目标不是一个crowd
}
coco_data["annotations"].append(annotation_info)
# 保存COCO格式数据为JSON文件
with open(r"E:\dataset\UAVDT\10184\10184.json", 'w') as f:
json.dump(coco_data, f)
7.将COCO(json)格式转为VOC(xml)格式
import os
import json
import xml.etree.ElementTree as ET
# VOC 数据集目录
voc_dataset_dir = r"E:\dataset\UAVDT\10184\annotations"
# 创建 VOC 数据集目录(如果不存在)
if not os.path.exists(voc_dataset_dir):
os.makedirs(voc_dataset_dir)
# 解析 JSON 文件并生成 VOC 标注文件
def convert_to_voc(json_file):
with open(json_file, 'r') as f:
coco_data = json.load(f)
for image_info in coco_data["images"]:
image_filename = image_info["file_name"]
annotations = [annotation for annotation in coco_data["annotations"] if
annotation["image_id"] == image_info["id"]]
# 获取图像序号(假设文件名格式为 /workspace/uavdt_night/all/test/image_1.jpg)
image_number = int(os.path.splitext(os.path.basename(image_filename))[0].split('_')[-1])
# 创建 VOC 标注文件
root = ET.Element("annotation")
# 添加图像文件名
filename_element = ET.SubElement(root, "filename")
filename_element.text = image_filename
# 创建size子节点
sizes = ET.SubElement(root, 'size')
# 添加图像尺寸信息(假设图像尺寸信息已经在 JSON 文件中提供)
width_element = ET.SubElement(sizes, "width")
width_element.text = str(image_info.get("width", "unknown"))
height_element = ET.SubElement(sizes, "height")
height_element.text = str(image_info.get("height", "unknown"))
depth_element = ET.SubElement(sizes, "depth")
depth_element.text = "3" # 假设图像是 RGB 格式的
# 添加目标标注信息
for annotation in annotations:
bbox = annotation["bbox"]
category_id = annotation["category_id"]
# 将类别 ID 转换为 VOC 格式的类别名称
category_name = "unknown" # 默认值
if category_id == 1:
category_name = "car"
elif category_id == 2:
category_name = "truck"
elif category_id == 3:
category_name = "bus"
# 添加 VOC 标签信息
object_element = ET.SubElement(root, "object")
name_element = ET.SubElement(object_element, "name")
name_element.text = category_name
bbox_element = ET.SubElement(object_element, "bndbox")
xmin_element = ET.SubElement(bbox_element, "xmin")
xmin_element.text = str(bbox[0])
ymin_element = ET.SubElement(bbox_element, "ymin")
ymin_element.text = str(bbox[1])
xmax_element = ET.SubElement(bbox_element, "xmax")
xmax_element.text = str(bbox[0] + bbox[2])
ymax_element = ET.SubElement(bbox_element, "ymax")
ymax_element.text = str(bbox[1] + bbox[3])
# 保存 VOC 标注文件
voc_annotation_file = os.path.join(voc_dataset_dir, os.path.splitext(image_filename)[0] + ".xml")
tree = ET.ElementTree(root)
tree.write(voc_annotation_file)
# 调用函数将 JSON 文件转换为 VOC 格式
json_file_path = r"E:\dataset\UAVDT\10184\10184.json"
convert_to_voc(json_file_path)
8.将VOC格式转为YOLO格式
import xml.etree.ElementTree as ET
import os
def convert(size, box):
x_center = (box[0] + box[1]) / 2.0
y_center = (box[2] + box[3]) / 2.0
x = x_center / size[0]
y = y_center / size[1]
w = (box[1] - box[0]) / size[0]
h = (box[3] - box[2]) / size[1]
return (x, y, w, h)
def convert_annotation(xml_files_path, save_txt_files_path, classes):
xml_files = os.listdir(xml_files_path)
print(xml_files)
for xml_name in xml_files:
xml_file = os.path.join(xml_files_path, xml_name)
out_txt_path = os.path.join(save_txt_files_path, xml_name.split('.')[0] + '.txt')
out_txt_f = open(out_txt_path, 'w')
tree = ET.parse(xml_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
if obj.find('difficult'):
difficult = int(obj.find('difficult').text)
else:
difficult = 0
cls = obj.find('name').text
if cls not in classes or int(difficult) == 1:
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
float(xmlbox.find('ymax').text))
# b=(xmin, xmax, ymin, ymax)
print(w, h, b)
bb = convert((w, h), b)
out_txt_f.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
if __name__ == "__main__":
# 1、指定yolo类别
classes1 = ['car','truck','bus']
# 2、voc格式的xml标签文件路径
xml_files1 = r'E:\dataset\UAVDT\10184\annotations'
# 3、转化为yolo格式的txt标签文件存储路径
save_txt_files1 = r'E:\dataset\UAVDT\10184\labels'
if not os.path.exists(save_txt_files1):
os.makedirs(save_txt_files1)
convert_annotation(xml_files1, save_txt_files1, classes1)
with open(save_txt_files1 + '/classes.txt', 'w') as file:
for class_name in classes1:
file.write(class_name + '\n')
运行到第7步时就可检查一下标注框是否准确,比如第二张