目录
一、标注文件格式转换
1、XML格式转到YOLO格式:.xml→.txt
我觉得第一个好
第1个xml_to_labels.py
# xml_to_labels.py
from xml.etree import ElementTree as ET
import numpy as np
import os
import shutil
from pathlib import Path
from shutil import copyfile
from PIL import Image, ImageDraw
from xml.dom.minidom import parse
from tqdm import tqdm
"""
修改类别,labels文件路径,图片文件路径,annotation文件路径
"""
def cord_converter(size, box):
"""
将标注的 xml 文件标注转换为 darknet 形的坐标
:param size: 图片的尺寸: [w,h]
:param box: anchor box 的坐标 [左上角x,左上角y,右下角x,右下角y,]
:return: 转换后的 [x,y,w,h]
"""
x1 = int(box[0])
y1 = int(box[1])
x2 = int(box[2])
y2 = int(box[3])
dw = np.float32(1. / int(size[0]))
dh = np.float32(1. / int(size[1]))
w = x2 - x1
h = y2 - y1
x = x1 + (w / 2)
y = y1 + (h / 2)
return [x, y, w, h]
class XMLProcess(object):
def __init__(self, num_classes, classes_name, file_path, labels_path):
self.xml_path = file_path
self.num_classes = num_classes
self.classes_name = classes_name
self.data = {}
self.labels_path = labels_path
def process_xml(self):
"""
处理图片的标注信息,解析图片的大小,类别。并存入.pkl文件
:return: None
"""
i = 1
for filename in tqdm(os.listdir(self.xml_path)):
# xml解析根路径
et = ET.parse(self.xml_path + filename)
root = et.getroot()
# 获取图片基础属性 size ---> [width, height, depth]
size = root.find('size')
width = float(size.find("width").text)
height = float(size.find("height").text)
depth = float(size.find("depth").text)
# print(width, height, depth)
# 解析每张图片中多个物体的属性
object_labels = []
one_hots = []
for object_tree in root.findall("object"):
# 对每个目标进行解析
[x, y, w, h] = [0, 0, 0, 0]
for res in object_tree.iter("bndbox"):
# xmin, ymin, xmax, ymax---> 以标准化的结果去保存值
xmin = float(res.find("xmin").text) / width
ymin = float(res.find("ymin").text) / height
xmax = float(res.find("xmax").text) / width
ymax = float(res.find("ymax").text) / height
# print(xmin, ymin, xmax, ymax)
# 将[x1, y1, x2, y2] 转换成 [x, y, ,w, h]
w = float(xmax - xmin)
h = float(ymax - ymin)
x = float(xmin + w / 2)
y = float(ymin + h / 2)
# 每个object都有一个名称,目标值保存为one-hot编码
object_name = object_tree.find('name').text
object_one_hot = self.one_hot(object_name)
object_id = str(np.argmax(object_one_hot)) # 目标类别编号
# print([object_id, x, y, w, h])
object_label = [object_id, x, y, w, h]
object_labels.append(object_label)
# print(object_labels)
# 创建Labels文件夹
labels_path = self.labels_path
if not os.path.exists(labels_path):
os.makedirs(labels_path)
# 将labels写入.txt文件
# write_path = f"../datasets/part_yawn/Labels/{filename[:-4]}.txt"
write_path = f"{labels_path}/{filename[:-4]}.txt"
# write_path = f'../commodity_label/{filename[:-4]}.txt'
f = open(write_path, 'w')
for label in object_labels:
for item in label:
f.write(str(item) + ' ')
f.write("\n")
if i == 4:
break
return None
def one_hot(self, name):
"""
one-hot编码
:param name: 目标名称
:return: one_hot_vector
"""
one_hot_vector = [0] * self.num_classes
classes_name = self.classes_name
identify_num = 0
for i, commodity_name in enumerate(classes_name):
if name == classes_name[i]:
one_hot_vector[i] = 1
break
else:
identify_num += 1
# print(identify_num)
if identify_num >= self.num_classes:
print("Unknown Label: ", name)
return one_hot_vector
if __name__ == '__main__':
# a = XMLProcess(num_classes=4,
# classes_name=['fire', 'smoke'],
# file_path=r'D:\桌面文件\fix\anno\\', # xml文件标注路径 old
# labels_path=r'D:\桌面文件\fix\labels') # txt文件路径 new
#
a = XMLProcess(num_classes=4,
classes_name=['open_mouth', 'closed_mouth', 'open_eye', 'closed_eye'], # 依次为0,1,2,3
file_path=r'F:\yolo\yolov8\ultralytics-main\datasets\fdd-dataset\Annotations\\', # xml文件标注路径 old
labels_path=r'F:\yolo\yolov8\ultralytics-main\datasets\fdd-dataset\labels\\') # txt文件路径 new
a.process_xml()
第2个xml_to_labels-2.py
# xml_to_labels-2.py
# -*- coding: utf-8 -*-
import xml.etree.ElementTree as ET
import os
sets = ['train', 'val', 'test'] # 如果你的Main文件夹没有test.txt,就删掉'test'
# classes = ["a", "b"] # 改成自己的类别,VOC数据集有以下20类别
classes = ['person'] # class names
abs_path = os.getcwd()
def convert(size, box):
dw = 1. / (size[0])
dh = 1. / (size[1])
x = (box[0] + box[1]) / 2.0 - 1
y = (box[2] + box[3]) / 2.0 - 1
w = box[1] - box[0]
h = box[3] - box[2]
x = x * dw
w = w * dw
y = y * dh
h = h * dh
return x, y, w, h
def convert_annotation(image_id):
in_file = open(abs_path + '/INRIAPerson/VOCperson/Annotations/%s.xml' % (image_id))
out_file = open(abs_path + '/INRIAPerson/VOCperson/label/%s.txt' % (image_id), 'w')
tree = ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
difficult = obj.find('difficult').text
# difficult = obj.find('Difficult').text
cls = obj.find('name').text
if cls not in classes or int(difficult) == 1:
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
float(xmlbox.find('ymax').text))
b1, b2, b3, b4 = b
# 标注越界修正
if b2 > w:
b2 = w
if b4 > h:
b4 = h
b = (b1, b2, b3, b4)
bb = convert((w, h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
for image_set in sets:
if not os.path.exists(abs_path + '/INRIAPerson/VOCperson/label/'):
os.makedirs(abs_path + '/INRIAPerson/VOCperson/label/')
image_ids = open(abs_path + '/INRIAPerson/VOCperson/ImageSets/Main/%s.txt' % (image_set)).read().strip().split()
list_file = open(abs_path + '/INRIAPerson/VOCperson/VOC2007/%s.txt' % (image_set), 'w')#文件输出的路径
for image_id in image_ids:
list_file.write(abs_path + '/INRIAPerson/VOCperson/JPEGImages/%s.jpg\n' % (image_id)) # 要么自己补全路径,只写一半可能会报错
convert_annotation(image_id)
list_file.close()
第二个实现两个功能(1)XML格式标注文件转换到YOLO格式标注文件
(2)产生train.txt、val.txt以及test.txt文件的路径形式
下面代码只实现(2)的功能
数据集划分txt.py
#数据集划分txt.py
# -*- coding:utf-8 -*
import os
import random
val_percent = 0.1
test_percent = 0.1
train_percent = 0.8
imagesfile_path = './images/' # 图片名
total_images = os.listdir(imagesfile_path)
random.shuffle(total_images)
num = len(total_images) # 统计所有的标注文件
train_num = int(num * train_percent) # 设置训练和验证集的数目
val_num = int(num * val_percent) # 设置训练集的数目
test_num = int(num * test_percent)
ftest = open('./test.txt', 'w')
ftrain = open('./train.txt', 'w')
fval = open('./val.txt', 'w')
for i in range(num):
# name = './images/'+total_images[i] + '\n'
name = './images/'+total_images[i] + '\n'
if i < train_num:
ftrain.write(name)
elif train_num <= i < val_num + train_num:
fval.write(name)
else:
ftest.write(name)
ftrain.close()
fval.close()
ftest.close()
数据集划分txt2.py
#数据集划分txt2.py
# -*- coding: utf-8 -*-
import xml.etree.ElementTree as ET
import os
sets = ['train', 'val','test'] # 如果你的Main文件夹没有test.txt,就删掉'test'
# classes = ["a", "b"] # 改成自己的类别,VOC数据集有以下20类别
classes = ['person'] # class names
abs_path = os.getcwd()
for image_set in sets:
image_ids = open(abs_path + '/KAIST/ImageSets/Main/%s.txt' % (image_set)).read().strip().split()
list_file = open(abs_path + '/KAIST/%s.txt' % (image_set), 'w')#文件输出的路径
for image_id in image_ids:
list_file.write(abs_path + '/KAIST/Images/%s.jpg\n' % (image_id)) # 要么自己补全路径,只写一半可能会报错
list_file.close()
图-标签对应查找划分数据集划分3
#自动划分数据集(含随机打乱功能,图像与标签依旧对应)
import random
import os
import shutil
from tqdm import tqdm
# 标签数据路径 Images & Labels
# image_path = '../data/road/images'
# label_path = '../data/road/labels'
image_path = '../yuanshuju/images'
label_path = '../yuanshuju/Labels'
image_names = os.listdir(image_path)
label_names = os.listdir(label_path)
# ##打开后随机打乱顺序 标签
# random.shuffle(image_names)
random.shuffle(label_names)
# print(label_names)
# print(image_names)
# 生成dataset的路径,路径最后加上 '/'
# dataset_file_path = '../datasets/road/' # 生成训练数据集路径,路径最后加上 '/'
dataset_file_path = '../datasets1/' # 生成训练数据集路径,路径最后加上 '/'
if not os.path.exists(dataset_file_path): #判断是否存在路径(dataset_file_path)
os.makedirs(dataset_file_path)
if os.path.exists(dataset_file_path):
if not os.path.exists(dataset_file_path + 'images/train'): # image/train
os.makedirs(dataset_file_path + 'images/train')
if not os.path.exists(dataset_file_path + 'images/val'): # image/val
os.makedirs(dataset_file_path + 'images/val')
if not os.path.exists(dataset_file_path + 'labels/train'): # label_txt/train
os.makedirs(dataset_file_path + 'labels/train')
if not os.path.exists(dataset_file_path + 'labels/val'): # label_txt/val
os.makedirs(dataset_file_path + 'labels/val')
new_train_image_path = dataset_file_path + 'images/train/'
new_val_image_path = dataset_file_path + 'images/val/'
new_train_label_path = dataset_file_path + 'labels/train/'
new_val_label_path = dataset_file_path + 'labels/val/'
train_num = int(len(image_names) * 0.9) # 0.8:train : val = 8 : 2
print(train_num, len(image_names))
# copy img && label_txt
for index, label in enumerate(tqdm(label_names)):
for img in image_names:
# 标签不动找对应图片
if label[:-4] == img[:-4]: # e.g. a1011.img --> a1011.xml
old_label_path = label_path + '\\' + label
old_image_path = image_path + '\\' + img
# print(old_label_path, old_image_path)
if index <= train_num:
shutil.copyfile(old_image_path, new_train_image_path + img)
shutil.copyfile(old_label_path, new_train_label_path + label)
else:
shutil.copyfile(old_image_path, new_val_image_path + img)
shutil.copyfile(old_label_path, new_val_label_path + label)
2、YOLO格式转到XML格式:.txt→.xml
from xml.dom.minidom import Document
import os
import cv2
# def makexml(txtPath, xmlPath, picPath): # txt所在文件夹路径,xml文件保存路径,图片所在文件夹路径
def makexml(picPath, txtPath, xmlPath): # txt所在文件夹路径,xml文件保存路径,图片所在文件夹路径
"""此函数用于将yolo格式txt标注文件转换为voc格式xml标注文件
在自己的标注图片文件夹下建三个子文件夹,分别命名为picture、txt、xml
"""
dic = {'0': "person", # 创建字典用来对类型进行转换
}
files = os.listdir(txtPath)
for i, name in enumerate(files):
xmlBuilder = Document()
annotation = xmlBuilder.createElement("annotation") # 创建annotation标签
xmlBuilder.appendChild(annotation)
txtFile = open(txtPath +'\\'+ name)
txtList = txtFile.readlines()
for root,dirs,filename in os.walk(picPath):
img = cv2.imread(root+ '\\'+filename[i])
Pheight, Pwidth, Pdepth = img.shape
folder = xmlBuilder.createElement("folder") # folder标签
foldercontent = xmlBuilder.createTextNode("driving_annotation_dataset")
folder.appendChild(foldercontent)
annotation.appendChild(folder) # folder标签结束
filename = xmlBuilder.createElement("filename") # filename标签
filenamecontent = xmlBuilder.createTextNode(name[0:-4] + ".jpg")
filename.appendChild(filenamecontent)
annotation.appendChild(filename) # filename标签结束
size = xmlBuilder.createElement("size") # size标签
width = xmlBuilder.createElement("width") # size子标签width
widthcontent = xmlBuilder.createTextNode(str(Pwidth))
width.appendChild(widthcontent)
size.appendChild(width) # size子标签width结束
height = xmlBuilder.createElement("height") # size子标签height
heightcontent = xmlBuilder.createTextNode(str(Pheight))
height.appendChild(heightcontent)
size.appendChild(height) # size子标签height结束
depth = xmlBuilder.createElement("depth") # size子标签depth
depthcontent = xmlBuilder.createTextNode(str(Pdepth))
depth.appendChild(depthcontent)
size.appendChild(depth) # size子标签depth结束
annotation.appendChild(size) # size标签结束
for j in txtList:
oneline = j.strip().split(" ")
object = xmlBuilder.createElement("object") # object 标签
picname = xmlBuilder.createElement("name") # name标签
namecontent = xmlBuilder.createTextNode(dic[oneline[0]])
picname.appendChild(namecontent)
object.appendChild(picname) # name标签结束
pose = xmlBuilder.createElement("pose") # pose标签
posecontent = xmlBuilder.createTextNode("Unspecified")
pose.appendChild(posecontent)
object.appendChild(pose) # pose标签结束
truncated = xmlBuilder.createElement("truncated") # truncated标签
truncatedContent = xmlBuilder.createTextNode("0")
truncated.appendChild(truncatedContent)
object.appendChild(truncated) # truncated标签结束
difficult = xmlBuilder.createElement("difficult") # difficult标签
difficultcontent = xmlBuilder.createTextNode("0")
difficult.appendChild(difficultcontent)
object.appendChild(difficult) # difficult标签结束
bndbox = xmlBuilder.createElement("bndbox") # bndbox标签
xmin = xmlBuilder.createElement("xmin") # xmin标签
mathData = int(((float(oneline[1])) * Pwidth + 1) - (float(oneline[3])) * 0.5 * Pwidth)
xminContent = xmlBuilder.createTextNode(str(mathData))
xmin.appendChild(xminContent)
bndbox.appendChild(xmin) # xmin标签结束
ymin = xmlBuilder.createElement("ymin") # ymin标签
mathData = int(((float(oneline[2])) * Pheight + 1) - (float(oneline[4])) * 0.5 * Pheight)
yminContent = xmlBuilder.createTextNode(str(mathData))
ymin.appendChild(yminContent)
bndbox.appendChild(ymin) # ymin标签结束
xmax = xmlBuilder.createElement("xmax") # xmax标签
mathData = int(((float(oneline[1])) * Pwidth + 1) + (float(oneline[3])) * 0.5 * Pwidth)
xmaxContent = xmlBuilder.createTextNode(str(mathData))
xmax.appendChild(xmaxContent)
bndbox.appendChild(xmax) # xmax标签结束
ymax = xmlBuilder.createElement("ymax") # ymax标签
mathData = int(((float(oneline[2])) * Pheight + 1) + (float(oneline[4])) * 0.5 * Pheight)
ymaxContent = xmlBuilder.createTextNode(str(mathData))
ymax.appendChild(ymaxContent)
bndbox.appendChild(ymax) # ymax标签结束
object.appendChild(bndbox) # bndbox标签结束
annotation.appendChild(object) # object标签结束
f = open(xmlPath +'\\'+ name[0:-4] + ".xml", 'w')
xmlBuilder.writexml(f, indent='\t', newl='\n', addindent='\t', encoding='utf-8')
f.close()
if __name__ == "__main__":
picPath = r"D:\MOT17\images\train" # 图片所在文件夹路径,后面的/一定要带上
txtPath = r"D:\MOT17\labels\train" # yolo txt所在文件夹路径,后面的/一定要带上
xmlPath = r"D:\MOT17\xml" # xml文件保存路径,后面的/一定要带上
makexml(picPath, txtPath, xmlPath)
二、标注验证
1、XML格式文件的验证xml
# -*- coding: utf-8 -*-
from __future__ import division
import os
import xml.dom.minidom
import cv2
import sys
import numpy as np
# from imp import reload
# reload(sys)
def read_xml(ImgPath, AnnoPath, Savepath):
imagelist = os.listdir(AnnoPath)
for image in imagelist:
image_pre, ext = os.path.splitext(image)
# imgfile = +'/'+ image_pre+ '.JPG'
imgfile = os.path.join(ImgPath,image_pre+ '.jpg')
# xmlfile = AnnoPath +'/'+ image_pre+ '.xml'
xmlfile = os.path.join(AnnoPath, image_pre + '.xml')
print(imgfile)
print(xmlfile)
# im = cv2.imread(imgfile)
im = cv2.imdecode(np.fromfile(imgfile,dtype=np.uint8),cv2.IMREAD_UNCHANGED)#imdecode()读取图像数据并转换成图片格式
#fromfile()读数据时需要用户指定元素类型,并对数组的形状进行适当的修改,cv2.IMREAD_UNCHANGED加载图像
DomTree = xml.dom.minidom.parse(xmlfile)#读取xml文件中的值
annotation = DomTree.documentElement #documentElement 属性可返回文档的根节点。
filenamelist = annotation.getElementsByTagName('filename')#getElementById()可以访问Documnent中的某一特定元素,顾名思义,就是通过ID来取得元素,所以只能访问设置了ID的元素。
filename = filenamelist[0].childNodes[0].data
objectlist = annotation.getElementsByTagName('object')
i = 1
for objects in objectlist:
namelist = objects.getElementsByTagName('name')
objectname = namelist[0].childNodes[0].data #通过xml文件给图像加目标框
bndbox = objects.getElementsByTagName('bndbox')
for box in bndbox:
try:
x1_list = box.getElementsByTagName('xmin')
x1 = int(x1_list[0].childNodes[0].data)
y1_list = box.getElementsByTagName('ymin')
y1 = int(y1_list[0].childNodes[0].data)
x2_list = box.getElementsByTagName('xmax')
x2 = int(x2_list[0].childNodes[0].data)
y2_list = box.getElementsByTagName('ymax')
y2 = int(y2_list[0].childNodes[0].data)
minX = x1
minY = y1
maxX = x2
maxY = y2
if(i % 3 == 0):
color = (128,0,0)
elif (i % 3 == 1):
color = (153, 51, 0)
elif (i % 3 == 2):
color = (255, 204, 0)
elif (i % 3 == 3):
color = (0, 51, 0)
elif (i % 9 == 4):
color = (51, 204, 204)
elif (i % 9 == 5):
color = (128, 0, 128)
elif (i % 9 == 6):
color = (0, 255, 255)
elif (i % 9 == 7):
color = (60, 179, 113)
elif (i % 9 == 8):
color = (255, 127, 80)
elif (i % 9 == 9):
color = (0, 255, 0)
cv2.rectangle(im,(minX,minY),(maxX,maxY),color,8)
if not os.path.exists(Savepath):
os.makedirs(Savepath)
path = os.path.join(Savepath, image_pre + '.jpg')
font = cv2.FONT_HERSHEY_SIMPLEX
cv2.putText(im, objectname, (minX,minY - 7), font, 0.7, (0, 0, 255), 2)
cv2.imencode(".jpg",im)[1].tofile(path)
i += 1
except Exception as e:
print(e)
if __name__ == "__main__":
img_path = r'D:\person\newtrain\JPEG/'
xml_path = r'D:\person\newtrain\Annotation/'
save_path = r'D:\person\newJPEG/'
read_xml(img_path, xml_path,save_path)
2、YOLO格式文件的验证txt
import cv2
import os
def draw_box_in_single_image(image_path, txt_path):
# 读取图像
image = cv2.imread(image_path)
# 读取txt文件信息
def read_list(txt_path):
pos = []
with open(txt_path, 'r') as file_to_read:
while True:
lines = file_to_read.readline() # 整行读取数据
if not lines:
break
# 将整行数据分割处理,如果分割符是空格,括号里就不用传入参数,如果是逗号, 则传入‘,'字符。
p_tmp = [float(i) for i in lines.split(' ')]
pos.append(p_tmp) # 添加新读取的数据
# Efield.append(E_tmp)
pass
return pos
# txt转换为box
def convert(size, box):
xmin = (box[1]-box[3]/2.)*size[1]
xmax = (box[1]+box[3]/2.)*size[1]
ymin = (box[2]-box[4]/2.)*size[0]
ymax = (box[2]+box[4]/2.)*size[0]
box = (int(xmin), int(ymin), int(xmax), int(ymax))
return box
pos = read_list(txt_path)
print(pos)
tl = int((image.shape[0]+image.shape[1])/2)
lf = max(tl-1,1)
for i in range(len(pos)):
label = str(int(pos[i][0]))
print('label is '+label)
box = convert(image.shape, pos[i])
image = cv2.rectangle(image,(box[0], box[1]),(box[2],box[3]),(0,0,255),2)
cv2.putText(image,label,(box[0],box[1]-2), 0, 1, [0,0,255], thickness=2, lineType=cv2.LINE_AA)
pass
if pos:
cv2.imwrite('./VOCData/see_images/{}.png'.format(image_path.split('\\')[-1][:-4]), image)
else:
print('None')
print('./VOCData/see_images/{}.png'.format(image_path.split('\\')[-1][:-4]))
# cv2.imshow("images", image)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
img_folder = "./image/train"
img_list = os.listdir(img_folder)
img_list.sort()
label_folder = "./label/train"
label_list = os.listdir(label_folder)
label_list.sort()
if not os.path.exists('./VOCData/see_images'):
os.makedirs('./VOCData/see_images')
for i in range(len(img_list)):
image_path = img_folder + "\\" + img_list[i]
txt_path = label_folder + "\\" + label_list[i]
draw_box_in_single_image(image_path, txt_path)