提示:文章写完后,目录可以自动生成,如何生成可参考右边的帮助文档
一、 TXT格式的标注文件转为XML`
################################################################
#转换之前准备工作:
#1、图像数据集,jpg格式
#2、txt格式标注文件,txt格式
#3、yaml文件
#example:狗的数据集 ..//DataSet_operate//RawData
# ---images 图片文件夹
# ---dog0001.jpg
# ---labels 标注文件夹
# ---dog0001.txt
# ---dagdata.yaml
################################################################
import os
import opeartion
def PreparationCheck(path):
imgfilespath=path+"//images"
if os.path.exists(imgfilespath):
print("img文件夹存在")
else:
print("img文件夹不存在,准备工作失败")
exit(-1)
txtfilespath = path + "//txts"
if os.path.exists(txtfilespath):
print("txt文件夹存在")
else:
print("txt文件夹不存在,准备工作失败")
exit(-1)
suffixflag=False
for name in os.listdir(path):
if(name[-5:]==".yaml"):
suffixflag=True
if(suffixflag):
print("yaml文件存在")
else:
print("yaml文件不存在,准备工作失败")
exit(-1)
if __name__=='__main__':
rootpath="E://PycharmProjects//myUse//DataSet_operate//RawData" # 数据集的根路径
print ("检查准备工作")
PreparationCheck(rootpath)
opeartion.TransformToxmls(rootpath)
二、XML格式的标注文件转为TXT
################################################################
#转换之前准备工作:
#1、图像数据集,jpg格式
#2、xml格式标注文件,xml格式
#3、yaml文件
#example:狗的数据集 ..//DataSet_operate//RawData
# ---images 图片文件夹
# ---dog0001.jpg
# ---xmls 标注文件夹
# ---dog0001.xml
# ---dagdata.yaml
################################################################
import os
import opeartion
def PreparationCheck(path):
txtfilespath = path + "//xmls"
if os.path.exists(txtfilespath):
print("xmls文件夹存在")
else:
print("xmls文件夹不存在,准备工作失败")
exit(-1)
if __name__=='__main__':
rootpath="E://PycharmProjects//myUse//DataSet_operate//RawData" # 数据集的根路径
print ("检查准备工作")
PreparationCheck(rootpath)
opeartion.TransformTotxts(rootpath)
三、数据集按照比例划分
################################################################
#转换之前准备工作:
#1、图像数据集,jpg格式
#2、txt格式标注文件,txt格式
#example:狗的数据集 ..//DataSet_operate//RawData
# ---images 图片文件夹
# ---dog0001.jpg
# ---txts 标注文件夹
# ---dog0001.txt
################################################################
import os
import opeartion
def PreparationCheck(path):
imgfilespath = path + "//images"
if os.path.exists(imgfilespath):
print("img文件夹存在")
else:
print("img文件夹不存在,准备工作失败")
exit(-1)
txtfilespath = path + "//txts"
if os.path.exists(txtfilespath):
print("txt文件夹存在")
else:
print("txt文件夹不存在,准备工作失败")
exit(-1)
if __name__ == '__main__':
divratio=0.8
rootpath = "E://PycharmProjects//myUse//DataSet_operate//RawData" # 数据集的根路径
print("检查准备工作")
PreparationCheck(rootpath)
opeartion.PartitionDataset(rootpath,divratio)
四、底层代码
import os
import yaml
import random
import shutil
import cv2
import xml.etree.ElementTree as ET
from xml.dom.minidom import Document
#数据转换
def convert(size, box): # size:(原图w,原图h) , box:(xmin,xmax,ymin,ymax)
dw = 1./size[0] # 1/w
dh = 1./size[1] # 1/h
x = (box[0] + box[1])/2.0 # 物体在图中的中心点x坐标
y = (box[2] + box[3])/2.0 # 物体在图中的中心点y坐标
w = box[1] - box[0] # 物体实际像素宽度
h = box[3] - box[2] # 物体实际像素高度
x = x*dw # 物体中心点x的坐标比(相当于 x/原图w)
w = w*dw # 物体宽度的宽度比(相当于 w/原图w)
y = y*dh # 物体中心点y的坐标比(相当于 y/原图h)
h = h*dh # 物体宽度的宽度比(相当于 h/原图h)
return (x, y, w, h)
#将数据集按照指定比例进行划分成val、train、test
def PartitionDataset(path,ratio):
srcimgfilepath = path + "//images"
srctxtfilepath = path + "//txts"
resultrootpath = ".//Results//datasets"
yamlfilepath = ".//Results//datasets//classification.yaml"
imagesfilepath = ".//Results//datasets//images"
labelsfilepath = ".//Results//datasets//labels"
if os.path.exists(resultrootpath):
shutil.rmtree(resultrootpath)
print("数据集文件已经存在!重新创建")
os.makedirs(resultrootpath)
os.makedirs(imagesfilepath)
os.makedirs(labelsfilepath)
os.makedirs(resultrootpath+"//temp")
os.makedirs(resultrootpath + "//temp//images")
os.makedirs(resultrootpath + "//temp//labels")
os.makedirs(imagesfilepath + "//train")
os.makedirs(imagesfilepath + "//val")
os.makedirs(imagesfilepath + "//test")
os.makedirs(labelsfilepath + "//train")
os.makedirs(labelsfilepath + "//val")
print("新创建数据集文件夹")
for imgname in os.listdir(srcimgfilepath):
imgpath=srcimgfilepath+"//"+imgname
shutil.copy(imgpath,resultrootpath + "//temp//images")
for txtname in os.listdir(srctxtfilepath):
txtpath=srctxtfilepath+"//"+txtname
shutil.copy(txtpath,resultrootpath + "//temp//labels")
for txtfilename in os.listdir(resultrootpath + "//temp//labels"):
txtname=txtfilename[:-4]
for imgfilename in os.listdir(resultrootpath + "//temp//images"):
imgname = imgfilename[:-4]
if(imgname==txtname):
shutil.move(resultrootpath + "//temp//images//" + imgfilename, imagesfilepath + "//train")
shutil.move(resultrootpath + "//temp//labels//" + txtfilename, labelsfilepath + "//train")
for imgfilename in os.listdir(resultrootpath + "//temp//images"):
shutil.move(resultrootpath + "//temp//images//" + imgfilename, imagesfilepath + "//test")
shutil.rmtree(resultrootpath + "//temp")
#按照比例随机划分数据集
files = os.listdir(imagesfilepath + "//train")
imgnum = len(files)
valnum = int(imgnum * (1 - ratio))
random.shuffle(files)
for i in range(valnum):
#filename=files[i][:-4]
try:
shutil.move(imagesfilepath + "//train//" + files[i], imagesfilepath + "//val" )
shutil.move(labelsfilepath + "//train//" + files[i][:-4]+".txt", labelsfilepath + "//val")
print(f"Moved {files[i]} to val")
except Exception as e:
print(f"Failed to move {files[i]}: {e}")
yamlFile = open(yamlfilepath, 'w', encoding='utf-8')
pathContent = {"path": "../datasets"}
yaml.dump(pathContent, yamlFile)
trainContent={"train": "images/train"}
yaml.dump(trainContent, yamlFile)
valContent = {"val": "images/val"}
yaml.dump(valContent, yamlFile)
testContent = {"test": "images/test"}
yaml.dump(testContent, yamlFile)
yamlFile.close()
print('数据集已经划分成train、val、test')
#将xml格式的标注数据转换到txt
def TransformTotxts(path):
xmlfilepath= path + "//xmls"
yamlfilepath = ".//Results//txts//classification.yaml"
txtfilepath=".//Results//txts"
if os.path.exists(txtfilepath):
print("已存在txts文件夹,删除重新创建")
shutil.rmtree(txtfilepath)
os.makedirs(txtfilepath)
print("新创建txts文件夹")
num = 0
namelist = {}
for xmlname in os.listdir(xmlfilepath):
xml = open(xmlfilepath + '\\' + xmlname, encoding='utf-8')
xmlname = xmlname[:-4] + '.txt'
txtpath = txtfilepath + '\\' + xmlname
txt = open(txtpath, 'w', encoding='utf-8')
xmltree = ET.parse(xml)
xmlroot = xmltree.getroot()
size = xmlroot.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in xmlroot.iter('object') or int(difficult) == 1:
difficult = obj.find('difficult').text
objbox = obj.find('bndbox')
boxsize = (
float(objbox.find('xmin').text), float(objbox.find('xmax').text), float(objbox.find('ymin').text),
float(objbox.find('ymax').text))
infos = convert((w, h), boxsize)
objName = obj.find('name').text
if num == 0:
namelist = {objName:num}
index = num
num=num+1
else:
if objName in namelist:
index=namelist[objName]
else:
namelist[objName]=num
index=num
num=num+1
txt.write(str(index) + " " + " ".join([str(text) for text in infos]) + '\n')
xml.close()
#生成yaml文件
yamlFile = open(yamlfilepath, 'w', encoding='utf-8')
classnum = len(namelist)
yaml.dump({'nc': classnum}, yamlFile)
classContent = {"names": namelist}
yaml.dump(classContent, yamlFile)
yamlFile.close()
#将txt格式的标注数据转换到xml
def TransformToxmls(path):
imgfilepath = path + "//images"
txtfilepath = path + "//txts"
xmlfilepath = ".//Results//xmls"
for name in os.listdir(path):
if(name[-5:]==".yaml"):
yamlfilepath=path + "//" + name
if os.path.exists(xmlfilepath):
print("已存在xmls文件夹,删除重新创建")
shutil.rmtree(xmlfilepath)
os.makedirs(xmlfilepath)
print("新创建xmls文件夹")
txtlist = os.listdir(txtfilepath)
for txtname in txtlist:
xmlBuilder = Document()
annotation = xmlBuilder.createElement("annotation") # 创建annotation标签
xmlBuilder.appendChild(annotation)
txtFile = open(txtfilepath + '\\' + txtname)
txtList = txtFile.readlines()
img_path = imgfilepath + '\\' + txtname[0:-4] + ".jpg"
img = cv2.imread(img_path)
if img is None:
print("没有同" + txtname + "匹配的图片文件")
continue
else:
img_height, img_width, img_depth = img.shape
folder = xmlBuilder.createElement("folder") # folder标签
folderContent = xmlBuilder.createTextNode("VOC2007")
folder.appendChild(folderContent)
annotation.appendChild(folder)
filename = xmlBuilder.createElement("filename") # filename标签
filenameContent = xmlBuilder.createTextNode(txtname[0:-4] + ".jpg")
filename.appendChild(filenameContent)
annotation.appendChild(filename)
size = xmlBuilder.createElement("size") # size标签
width = xmlBuilder.createElement("width") # size子标签width
widthContent = xmlBuilder.createTextNode(str(img_width))
width.appendChild(widthContent)
size.appendChild(width)
height = xmlBuilder.createElement("height") # size子标签height
heightContent = xmlBuilder.createTextNode(str(img_height))
height.appendChild(heightContent)
size.appendChild(height)
depth = xmlBuilder.createElement("depth") # size子标签depth
depthContent = xmlBuilder.createTextNode(str(img_depth))
depth.appendChild(depthContent)
size.appendChild(depth)
annotation.appendChild(size)
for i in txtList:
oneline = i.strip().split(" ")
index = oneline[0]
with open(yamlfilepath, 'r', encoding='utf-8') as f:
data = yaml.safe_load(f)
class_names = {}
for i, name in data['names'].items():
class_names[int(i)] = name
result = class_names.get(int(index), 'False')
if result != 'False':
classname = class_names[int(index)]
else:
print('找不到' + index + '对应的物体种类')
continue
object = xmlBuilder.createElement("object")
picname = xmlBuilder.createElement("name")
nameContent = xmlBuilder.createTextNode(classname)
picname.appendChild(nameContent)
object.appendChild(picname)
pose = xmlBuilder.createElement("pose")
poseContent = xmlBuilder.createTextNode("Unspecified")
pose.appendChild(poseContent)
object.appendChild(pose)
truncated = xmlBuilder.createElement("truncated")
truncatedContent = xmlBuilder.createTextNode("0")
truncated.appendChild(truncatedContent)
object.appendChild(truncated)
difficult = xmlBuilder.createElement("difficult")
difficultContent = xmlBuilder.createTextNode("0")
difficult.appendChild(difficultContent)
object.appendChild(difficult)
bndbox = xmlBuilder.createElement("bndbox")
xmin = xmlBuilder.createElement("xmin")
mathData = int(((float(oneline[1])) * img_width + 1) - (float(oneline[3])) * 0.5 * img_width)
xminContent = xmlBuilder.createTextNode(str(mathData))
xmin.appendChild(xminContent)
bndbox.appendChild(xmin)
ymin = xmlBuilder.createElement("ymin")
mathData = int(((float(oneline[2])) * img_height + 1) - (float(oneline[4])) * 0.5 * img_height)
yminContent = xmlBuilder.createTextNode(str(mathData))
ymin.appendChild(yminContent)
bndbox.appendChild(ymin)
xmax = xmlBuilder.createElement("xmax")
mathData = int(((float(oneline[1])) * img_width + 1) + (float(oneline[3])) * 0.5 * img_width)
xmaxContent = xmlBuilder.createTextNode(str(mathData))
xmax.appendChild(xmaxContent)
bndbox.appendChild(xmax)
ymax = xmlBuilder.createElement("ymax")
mathData = int(((float(oneline[2])) * img_height + 1) + (float(oneline[4])) * 0.5 * img_height)
ymaxContent = xmlBuilder.createTextNode(str(mathData))
ymax.appendChild(ymaxContent)
bndbox.appendChild(ymax)
object.appendChild(bndbox)
annotation.appendChild(object)
f = open(xmlfilepath + '\\' + txtname[0:-4] + ".xml", 'w')
xmlBuilder.writexml(f, indent='\t', newl='\n', addindent='\t', encoding='utf-8')
f.close()
print("标注文件转换完成")