标记数据是一件枯燥且累的事情,但是训练模型需要大量的数据,此时我们可以利用模型预刷一些数据,利用模型迭代数据,再利用数据迭代模型...
ultralytics-main\ultralytics\cfg\default.yaml这里有一个标识位,save_txt,将这个标志位设为True,就会将检测到的目标的类别和位置信息保存为YOLO格式的信息(txt)。txt转xml和txt转json的脚本如下:
txt2xml
from xml.dom.minidom import Document
import os
import cv2
def makexml(picPath, txtPath, xmlPath): # txt所在文件夹路径,xml文件保存路径,图片所在文件夹路径
"""此函数用于将yolo格式txt标注文件转换为voc格式xml标注文件"""
dic = {'0': "empty", # 创建字典用来对类型进行转换
'1': "full"}
files = os.listdir(txtPath)
for i, name in enumerate(files):
xmlBuilder = Document()
annotation = xmlBuilder.createElement("annotation") # 创建annotation标签
xmlBuilder.appendChild(annotation)
txtFile = open(os.path.join(txtPath, name), 'r')
txtList = txtFile.readlines()
imgPath = os.path.join(picPath, name[0:-4] + ".jpg")
img = cv2.imread(imgPath)
Pheight, Pwidth, Pdepth = img.shape
folder = xmlBuilder.createElement("folder") # folder标签
foldercontent = xmlBuilder.createTextNode("driving_annotation_dataset")
folder.appendChild(foldercontent)
annotation.appendChild(folder) # folder标签结束
filename = xmlBuilder.createElement("filename") # filename标签
filenamecontent = xmlBuilder.createTextNode(name[0:-4] + ".jpg")
filename.appendChild(filenamecontent)
annotation.appendChild(filename) # filename标签结束
size = xmlBuilder.createElement("size") # size标签
width = xmlBuilder.createElement("width") # size子标签width
widthcontent = xmlBuilder.createTextNode(str(Pwidth))
width.appendChild(widthcontent)
size.appendChild(width) # size子标签width结束
height = xmlBuilder.createElement("height") # size子标签height
heightcontent = xmlBuilder.createTextNode(str(Pheight))
height.appendChild(heightcontent)
size.appendChild(height) # size子标签height结束
depth = xmlBuilder.createElement("depth") # size子标签depth
depthcontent = xmlBuilder.createTextNode(str(Pdepth))
depth.appendChild(depthcontent)
size.appendChild(depth) # size子标签depth结束
annotation.appendChild(size) # size标签结束
for j in txtList:
oneline = j.strip().split(" ")
object = xmlBuilder.createElement("object") # object 标签
picname = xmlBuilder.createElement("name") # name标签
namecontent = xmlBuilder.createTextNode(dic[oneline[0]])
picname.appendChild(namecontent)
object.appendChild(picname) # name标签结束
pose = xmlBuilder.createElement("pose") # pose标签
posecontent = xmlBuilder.createTextNode("Unspecified")
pose.appendChild(posecontent)
object.appendChild(pose) # pose标签结束
truncated = xmlBuilder.createElement("truncated") # truncated标签
truncatedContent = xmlBuilder.createTextNode("0")
truncated.appendChild(truncatedContent)
object.appendChild(truncated) # truncated标签结束
difficult = xmlBuilder.createElement("difficult") # difficult标签
difficultcontent = xmlBuilder.createTextNode("0")
difficult.appendChild(difficultcontent)
object.appendChild(difficult) # difficult标签结束
bndbox = xmlBuilder.createElement("bndbox") # bndbox标签
xmin = xmlBuilder.createElement("xmin") # xmin标签
mathData = int(((float(oneline[1])) * Pwidth + 1) - (float(oneline[3])) * 0.5 * Pwidth)
xminContent = xmlBuilder.createTextNode(str(mathData))
xmin.appendChild(xminContent)
bndbox.appendChild(xmin) # xmin标签结束
ymin = xmlBuilder.createElement("ymin") # ymin标签
mathData = int(((float(oneline[2])) * Pheight + 1) - (float(oneline[4])) * 0.5 * Pheight)
yminContent = xmlBuilder.createTextNode(str(mathData))
ymin.appendChild(yminContent)
bndbox.appendChild(ymin) # ymin标签结束
xmax = xmlBuilder.createElement("xmax") # xmax标签
mathData = int(((float(oneline[1])) * Pwidth + 1) + (float(oneline[3])) * 0.5 * Pwidth)
xmaxContent = xmlBuilder.createTextNode(str(mathData))
xmax.appendChild(xmaxContent)
bndbox.appendChild(xmax) # xmax标签结束
ymax = xmlBuilder.createElement("ymax") # ymax标签
mathData = int(((float(oneline[2])) * Pheight + 1) + (float(oneline[4])) * 0.5 * Pheight)
ymaxContent = xmlBuilder.createTextNode(str(mathData))
ymax.appendChild(ymaxContent)
bndbox.appendChild(ymax) # ymax标签结束
object.appendChild(bndbox) # bndbox标签结束
annotation.appendChild(object) # object标签结束
xmlFilePath = os.path.join(xmlPath, name[0:-4] + ".xml")
f = open(xmlFilePath, 'w')
xmlBuilder.writexml(f, indent='\t', newl='\n', addindent='\t', encoding='utf-8')
f.close()
if __name__ == "__main__":
picPath = # 图片所在文件夹路径,后面的/一定要带上
txtPath = # txt所在文件夹路径,后面的/一定要带上
xmlPath = # xml文件保存路径,后面的/一定要带上
if not os.path.exists(xmlPath):
os.makedirs(xmlPath)
makexml(picPath, txtPath, xmlPath)
txt2json
import os
import json
import cv2
# 定义类别名称到ID的映射
class_names = ['ice']
class_to_id = {name: idx for idx, name in enumerate(class_names)}
# 定义输入和输出路径
label_path = ''
image_path = ''
output_path = ''
# 创建输出路径
os.makedirs(output_path, exist_ok=True)
# 获取所有图像文件
image_files = [f for f in os.listdir(image_path) if f.endswith('.jpg')]
# 遍历每个图像文件
for image_file in image_files:
# 构建YOLO标签文件路径
label_file = os.path.splitext(image_file)[0] + '.txt'
label_file_path = os.path.join(label_path, label_file)
# 读取YOLO标签文件
try:
with open(label_file_path, 'r') as f:
lines = f.readlines()
except:
continue
# 初始化JSON数据结构
json_data = {
"version": "5.4.1",
"flags": {},
"shapes": [],
"imagePath": image_file,
"imageData": None,
"imageHeight": None,
"imageWidth": None
}
# 读取图像尺寸
image = cv2.imread(os.path.join(image_path, image_file))
height, width, _ = image.shape
# 更新JSON数据的高度和宽度
json_data["imageHeight"] = height
json_data["imageWidth"] = width
# 处理每一行标签
for line in lines:
if line == '\n':
continue
data = line.strip().split()
class_name = class_names[int(data[0])]
points = list(map(float, data[1:]))
# 将归一化的坐标转换为像素坐标
points = [(int(point[0] * width), int(point[1] * height)) for point in zip(*[iter(points)]*2)]
# 添加到JSON数据
shape = {
"label": class_name,
"points": points,
"group_id": None,
"description": "",
"shape_type": "polygon",
"flags": {},
"mask": None
}
json_data["shapes"].append(shape)
# 写入JSON文件
json_file = os.path.splitext(image_file)[0] + '.json'
json_file_path = os.path.join(output_path, json_file)
with open(json_file_path, 'w') as f:
json.dump(json_data, f, indent=4)
print("Conversion completed.")
为了验证转完之后的结果正确与否,我们可以打开labelimg或者labelme,如果不想通过这种方式,也可以用下面的脚本直接在IDE中进行可视化:
vis_txt
import cv2
import os
import numpy as np
# 定义YOLO标签格式与颜色映射
classes = ['empty', 'full']
colors = [(0, 0, 255), (0, 255, 0)]
YOLO_LABELS = {}
for i, label in enumerate(classes):
YOLO_LABELS[label] = colors[i]
def draw_boxes(image_path, label_path, output_dir):
"""
读取图像和YOLO标签文件,绘制目标并在指定路径保存绘制后的图像。
Args:
image_path (str): 图像文件路径
label_path (str): YOLO格式标签文件路径
output_dir (str): 输出绘制后图像的目录
"""
if not os.path.exists(output_dir):
os.makedirs(output_dir)
img_name = os.path.splitext(os.path.basename(image_path))[0]
output_path = os.path.join(output_dir, img_name + '.jpg')
# 读取图像并获取其宽高
img = cv2.imread(image_path)
img_height, img_width = img.shape[:2]
# 解析YOLO标签文件
with open(label_path, 'r') as f:
for line in f:
line_split = line.strip().split(' ')
label_id = int(line_split[0])
x_center, y_center, w, h = map(float, line_split[1:])
x_min = int((x_center - w / 2) * img_width)
y_min = int((y_center - h / 2) * img_height)
x_max = int((x_center + w / 2) * img_width)
y_max = int((y_center + h / 2) * img_height)
# 获取标签名称及对应颜色
label_name = list(YOLO_LABELS.keys())[label_id]
color = YOLO_LABELS[label_name]
# 在图像上绘制矩形框
cv2.rectangle(img, (x_min, y_min), (x_max, y_max), color=color, thickness=10)
# # 可选:在矩形框内显示标签名称
# cv2.putText(img, label_name, (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
# 保存绘制后的图像
cv2.imwrite(output_path, img)
# 示例用法:
image_folder = ''
label_folder = ''
output_folder = ''
os.makedirs(output_folder, exist_ok=True)
for image_file in os.listdir(image_folder):
if image_file.endswith(('.jpg', '.png')):
image_path = os.path.join(image_folder, image_file)
label_file = os.path.splitext(image_file)[0] + '.txt'
label_path = os.path.join(label_folder, label_file)
draw_boxes(image_path, label_path, output_folder)
print("Annotation process completed.")
vis_xml
import xml.etree.ElementTree as ET
import cv2
import os
colors = {"empty": (0, 0, 255),
"full": (0, 255, 0)
}
def visualize_xml_labels(xml_file, image_file, output_image_file):
# 解析XML文件
tree = ET.parse(xml_file)
root = tree.getroot()
# 加载图像
image = cv2.imread(image_file)
# 遍历XML文件中的对象
for obj in root.findall('object'):
# 获取类别标签
name = obj.find('name').text
# 获取边界框坐标
bndbox = obj.find('bndbox')
xmin = int(bndbox.find('xmin').text)
ymin = int(bndbox.find('ymin').text)
xmax = int(bndbox.find('xmax').text)
ymax = int(bndbox.find('ymax').text)
if name in ["empty", "full"]:
color = colors[name]
cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color, 2)
# 在图像上放置标签
# cv2.putText(image, name, (xmin, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
# 保存带有标注的图像
cv2.imwrite(output_image_file, image)
if __name__ == '__main__':
xml_folder = ''
image_folder = ''
output_folder = ''
if not os.path.exists(output_folder):
os.makedirs(output_folder)
for xml_file_name in os.listdir(xml_folder):
if xml_file_name.endswith('.xml'):
# 构建完整的XML和图像文件路径
xml_file_path = os.path.join(xml_folder, xml_file_name)
image_file_name = os.path.splitext(xml_file_name)[0] + '.jpg' # 假设图像扩展名为.jpg
image_file_path = os.path.join(image_folder, image_file_name)
# 构建输出图像文件路径
output_image_file_path = os.path.join(output_folder, image_file_name)
# 调用函数处理单个文件对
visualize_xml_labels(xml_file_path, image_file_path, output_image_file_path)
print("Visualization complete.")
vis_json
import os
import cv2
import json
import numpy as np
base_path = ''
path_list = [i.split('.')[0] for i in os.listdir(base_path) if 'json' in i]
for path in path_list:
image_path = f'{base_path}/{path}.jpg'
image = cv2.imread(image_path)
h, w, c = image.shape
label = np.zeros((h, w), dtype=np.uint8)
with open(f'{base_path}/{path}.json') as f:
mask = json.load(f)['shapes']
for i in mask:
points = np.array([np.array(j) for j in i['points']])
label = cv2.fillPoly(label, [np.array(points, dtype=np.int32)], color=255)
image = cv2.bitwise_and(image, image, mask=label)
cv2.imwrite(image_path.split('.jpg')[0] + '_vis' + '.jpg', image)