几个对目标检测标签进行常见修改的python代码

本文链接：https://blog.csdn.net/ncusz/article/details/128116024

1. 修改VOC标签的名称

该代码使用于对目标检测VOC标签中的标签名称进行修改。

<?xml version='1.0' encoding='us-ascii'?>
<annotation>
	<folder>img</folder>
	<filename>12.jpg</filename>
	<path>E:\img\12.jpg</path>
	<source>
		<database>Unknown</database>
	</source>
	<size>
		<width>640</width>
		<height>640</height>
		<depth>3</depth>
	</size>
	<segmented>0</segmented>
	<object>
		<name>insulator</name>
		<pose>Unspecified</pose>
		<truncated>1</truncated>
		<difficult>0</difficult>
		<bndbox>
			<xmin>203</xmin>
			<ymin>177</ymin>
			<xmax>281</xmax>
			<ymax>640</ymax>
		</bndbox>
	</object>
	<object>
		<name>insulator</name>
		<pose>Unspecified</pose>
		<truncated>1</truncated>
		<difficult>0</difficult>
		<bndbox>
			<xmin>344</xmin>
			<ymin>167</ymin>
			<xmax>436</xmax>
			<ymax>640</ymax>
		</bndbox>
	</object>
</annotation>

比如说我们需要将上面voc标签中的“insulator”改为“insulator2”，就可以在下面代码中将“name= ['原始类别', '修改后的类别']”修改为“name= ['insulator', 'insulator2']”。程序中path表示的是存放voc标签的路径。

import os.path
import xml.dom.minidom

# 原始xml文件存放路径
path = 'D:\csv2coco\Annotations_new'
# 定义需替换的类名['原始类别名称','新名称']
name = ['原始类别', '修改后的类别']

files = os.listdir(path)  # 返回文件夹中的文件名列表
for xmlFile in files:
    dom = xml.dom.minidom.parse(path + '\\' + xmlFile)
    root = dom.documentElement
    newfilename = root.getElementsByTagName('name')
    for i, t in enumerate(newfilename):
        if t.firstChild.data == name[0]:
            newfilename[i].firstChild.data = name[1]
    with open(os.path.join(path, xmlFile), 'w') as fh:
        dom.writexml(fh)

2. voc标签转换为txt标签

该代码用来从 VOC格式的annotations/*.xml 生成 YOLO格式的labels/*.txt;从xml文件中提取对应于图片的注释信息，并将其按yolo训练所需的格式存储在*.txt文件中，一个xml文件生成一个对应的txt文件。


import os.path
import xml.etree.ElementTree as ET
class_names = ['类别1', '类别2'', '类别3'' ,'类别4''  ]
 
xmlpath = ''  # 原xml路径
txtpath = ''  # 转换后txt文件存放路径
files = []
if not os.path.exists(txtpath):
    os.makedirs(txtpath)
 
for root, dirs, files in os.walk(xmlpath):
    pass
# print("current dir is {}\nhis subdirs are {}\nnumber of files in current dir is {}\n".format(root,dirs,len(files)))
number = len(files)
print(number)
# print(type(files[0]),files[0],files[0][0],files[0][1],files[0][2],files[0][3],files[0][4])#files[i] <--> *.xml

i = 0
while i < number:
    name = files[i][0:-4]
    print(name)
    xml_name = name + ".xml"
    txt_name = name + ".txt"
    xml_file_name = xmlpath + '\\'+ xml_name
    txt_file_name = txtpath + '\\'+txt_name
    print(xml_name,txt_name,xml_file_name,txt_file_name)
    xml_file = open(xml_file_name,encoding='utf-8')
    print(xml_file)
    tree = ET.parse(xml_file)
    print(tree)
    root = tree.getroot()
    print(root)
    image_name = root.find('filename').text
    print(image_name)
    w = int(root.find('size').find('width').text)
    h = int(root.find('size').find('height').text)
    c = int(root.find('size').find('depth').text)
    print("img_width={},img_height={},img_channels={}".format(w,h,c))
    #write info from xml to txt
    obj=[i for i in root.iter('object') ]
    print(obj)

    f_txt = open(txt_file_name, 'w+')
    content = ""

    first = True

    for obj in root.iter('object'):

        name = obj.find('name').text
        class_num = class_names.index(name)
        # class_num = 0

        xmlbox = obj.find('bndbox')

        x1 = int(xmlbox.find('xmin').text)
        x2 = int(xmlbox.find('xmax').text)
        y1 = int(xmlbox.find('ymin').text)
        y2 = int(xmlbox.find('ymax').text)

        if first:
            content += str(class_num) + " " + \
                        str((x1 + x2) / 2 / w) + " " + str((y1 + y2) / 2 / h) + " " + \
                        str((x2 - x1) / w) + " " + str((y2 - y1) / h)
            first = False
        else:
            content += "\n" + \
                        str(class_num) + " " + \
                        str((x1 + x2) / 2 / w) + " " + str((y1 + y2) / 2 / h) + " " + \
                        str((x2 - x1) / w) + " " + str((y2 - y1) / h)

    print(str(i / (number - 1) * 100) + "%\n")
    # print(content)
    f_txt.write(content)
    f_txt.close()
    xml_file.close()
    i += 1

3. CSV标签转换为VOC标签

此代码会设计到一些环境库的安装：

1. 安装pandas库

  pip install pandas

2. 安装glob库（python3.7）

pip install glob3

3. 安装cv2库

pip install opencv-python

1. 首先更改代码中以下几个配置。

csv_file = "labels.csv"
saved_path = ".VOC2007/" # path to save converted voc dataset     
image_save_path = "./JPEGImages/"   # converted voc images path
image_raw_parh = "images/"          # original image path

2. 然后运行程序。

3. 同样会自动创建文件夹并复制图片到相应位置。

import os
import numpy as np
import codecs
import pandas as pd
import json
from glob import glob
import cv2
import shutil
from sklearn.model_selection import train_test_split
from IPython import embed
#1.标签路径
csv_file = "../csv/train_labels.csv"
saved_path = "./VOCdevkit/VOC2007/"                #保存路径
image_save_path = "./JPEGImages/"
image_raw_parh = "../csv/images/"
#2.创建要求文件夹
if not os.path.exists(saved_path + "Annotations"):
    os.makedirs(saved_path + "Annotations")
if not os.path.exists(saved_path + "JPEGImages/"):
    os.makedirs(saved_path + "JPEGImages/")
if not os.path.exists(saved_path + "ImageSets/Main/"):
    os.makedirs(saved_path + "ImageSets/Main/")
    
#3.获取待处理文件
total_csv_annotations = {}
annotations = pd.read_csv(csv_file,header=None).values
for annotation in annotations:
    key = annotation[0].split(os.sep)[-1]
    value = np.array([annotation[1:]])
    if key in total_csv_annotations.keys():
        total_csv_annotations[key] = np.concatenate((total_csv_annotations[key],value),axis=0)
    else:
        total_csv_annotations[key] = value

#4.读取标注信息并写入 xml
for filename,label in total_csv_annotations.items():
    #embed()
    height, width, channels = cv2.imread(image_raw_parh + filename).shape
    #embed()
    with codecs.open(saved_path + "Annotations/"+filename.replace(".jpg",".xml"),"w","utf-8") as xml:
        xml.write('<annotation>\n')
        xml.write('\t<folder>' + 'UAV_data' + '</folder>\n')
        xml.write('\t<filename>' + filename + '</filename>\n')
        xml.write('\t<source>\n')
        xml.write('\t\t<database>The UAV autolanding</database>\n')
        xml.write('\t\t<annotation>UAV AutoLanding</annotation>\n')
        xml.write('\t\t<image>flickr</image>\n')
        xml.write('\t\t<flickrid>NULL</flickrid>\n')
        xml.write('\t</source>\n')
        xml.write('\t<owner>\n')
        xml.write('\t\t<flickrid>NULL</flickrid>\n')
        xml.write('\t\t<name>ChaojieZhu</name>\n')
        xml.write('\t</owner>\n')
        xml.write('\t<size>\n')
        xml.write('\t\t<width>'+ str(width) + '</width>\n')
        xml.write('\t\t<height>'+ str(height) + '</height>\n')
        xml.write('\t\t<depth>' + str(channels) + '</depth>\n')
        xml.write('\t</size>\n')
        xml.write('\t\t<segmented>0</segmented>\n')
        if isinstance(label,float):
            ## 空白
            xml.write('</annotation>')
            continue
        for label_detail in label:
            labels = label_detail
            #embed()
            xmin = int(labels[0])
            ymin = int(labels[1])
            xmax = int(labels[2])
            ymax = int(labels[3])
            label_ = labels[-1]
            if xmax <= xmin:
                pass
            elif ymax <= ymin:
                pass
            else:
                xml.write('\t<object>\n')
                xml.write('\t\t<name>'+label_+'</name>\n')
                xml.write('\t\t<pose>Unspecified</pose>\n')
                xml.write('\t\t<truncated>1</truncated>\n')
                xml.write('\t\t<difficult>0</difficult>\n')
                xml.write('\t\t<bndbox>\n')
                xml.write('\t\t\t<xmin>' + str(xmin) + '</xmin>\n')
                xml.write('\t\t\t<ymin>' + str(ymin) + '</ymin>\n')
                xml.write('\t\t\t<xmax>' + str(xmax) + '</xmax>\n')
                xml.write('\t\t\t<ymax>' + str(ymax) + '</ymax>\n')
                xml.write('\t\t</bndbox>\n')
                xml.write('\t</object>\n')
                print(filename,xmin,ymin,xmax,ymax,labels)
        xml.write('</annotation>')
        

#6.split files for txt
txtsavepath = saved_path + "ImageSets/Main/"
ftrainval = open(txtsavepath+'/trainval.txt', 'w')
ftest = open(txtsavepath+'/test.txt', 'w')
ftrain = open(txtsavepath+'/train.txt', 'w')
fval = open(txtsavepath+'/val.txt', 'w')
total_files = glob(saved_path+"./Annotations/*.xml")
total_files = [i.split("/")[-1].split(".xml")[0] for i in total_files]
#test_filepath = ""
for file in total_files:
    ftrainval.write(file + "\n")

# move images to voc JPEGImages folder
for image in glob(image_raw_parh+"/*.jpg"):
    shutil.copy(image,saved_path+image_save_path)

train_files,val_files = train_test_split(total_files,test_size=0.15,random_state=42)

for file in train_files:
    ftrain.write(file + "\n")
#val
for file in val_files:
    fval.write(file + "\n")

ftrainval.close()
ftrain.close()
fval.close()
#ftest.close()