目标检测数据集处理脚本

最新推荐文章于 2024-08-21 00:30:28 发布

Rock_Huang~

最新推荐文章于 2024-08-21 00:30:28 发布

阅读量1.1k

点赞数 1

分类专栏： python

本文链接：https://blog.csdn.net/weixin_38632246/article/details/93749294

版权

python 专栏收录该内容

49 篇文章 10 订阅

订阅专栏

功能一：将图像文件和标注文件对应，删去无标注文件

import os
import sys
input_folder=r'C:\Users\rockhuang\Desktop\rider'

training_data=[]    
a=[]
for root, dirs, files in os.walk(input_folder):
    for filename in (x for x in files if x.endswith('.jpg')):
        filepath = os.path.join(root, filename) 
        #E:\pic\20180729\000006.jpg
        object_class = filename.split('.')[0]
        a.append(object_class)
    print(a)

input_xml=r'C:\Users\rockhuang\Desktop\rider_xml'
xml_data=[]
b=[]
for root, dirs, files in os.walk(input_xml):
    for filename1 in (x for x in files if x.endswith('.xml')):
        filepath = os.path.join(root, filename1)
        #print(filename1)
        object_class = filename1.split('.')[0]
        b.append(object_class)
        print(object_class)

for i in a:
    if i not in b:
        
        os.remove(input_folder+'\\'+i+'.jpg')

功能二：linux生成目标检测中的trainval.txt ,test.txt 切记win下的txt一定要转为unix格式，具体notepad++ 打开，视图----》显示符号-------》显示所有字符，然后：编辑-----》文档格式转换--------》转换为unix格式，否则ubuntu下读不了这种txt，会报错。

import os, sys
import glob
 
trainval_dir = r"C:\Users\rockhuang\Desktop\train"   #train图像保存路径
#test_dir = r"C:\Users\rockhuang\Desktop\test"  #test图片保存路径
 
trainval_img_lists = glob.glob(trainval_dir + '/*.jpg')    #获取trainval中所有.png的文件
trainval_img_names = []    #获取名称
for item in trainval_img_lists:
    temp1, temp2 = os.path.splitext(os.path.basename(item))
    trainval_img_names.append(temp1)

#test_img_lists = glob.glob(test_dir + '/*.png')   #获取test中所有.png文件
#test_img_names = []
#for item in test_img_lists:
 #   temp1, temp2 = os.path.splitext(os.path.basename(item))
#    test_img_names.append(temp1)
#dist_img_dir1 = "INRIA_TRAIN_part/PNGImages/trainval"
#dist_img_dir2 = "INRIA_TRAIN_part/PNGImages/test"
dist_img_dir = r"VOC2012/JPEGImages"    #需要写入txt的trainval和test路径，因为我们在PNGImges目录下除了有trainval和test文件夹外还有所有159张图片，而文件夹trainval和test文件夹只是用于生成txt之用
dist_anno_dir = r"VOC2012/Annotations" #需要写入的xml路径
 
trainval_fd = open(r"C:\Users\rockhuang\Desktop\trainval1.txt", 'w')
#test_fd = open(r"C:\Users\rockhuang\Desktop\test.txt", 'w')
 
for item in trainval_img_names:
    trainval_fd.write(dist_img_dir + '/' + str(item) + '.jpg' + ' ' + dist_anno_dir + '/' + str(item) + '.xml\n')
#for item in test_img_names:
#    test_fd.write(dist_img_dir + '/' + str(item) + '.png' + ' ' + dist_anno_dir + '/' + str(item) + '.xml\n')

功能三：生成 test_name_size.txt

import os, sys
import glob
from PIL import Image
 
img_dir = r'C:\Users\rockhuang\Desktop\yest'
 
img_lists = glob.glob(img_dir + '\*.jpg')
 
test_name_size = open(r'C:\Users\rockhuang\Desktop\test_name_size.txt', 'w')
 
for item in img_lists:
    img = Image.open(item)
    width, height = img.size
    temp1, temp2 = os.path.splitext(os.path.basename(item))
    test_name_size.write(temp1 + ' ' + str(height) + ' ' + str(width) + '\n')

功能四：将视频文件每一帧保存

import cv2
# 使用opencv按一定间隔截取视频帧，并保存为图片
import glob
a=r'C:\Users\rockhuang\Desktop\rider1\\'
vc_list= glob.glob(a+ '/*.MOV') 
c = 900000
for vc1 in vc_list:
    vc = cv2.VideoCapture(vc1) #读取视频文件

 
 
    if vc.isOpened():#判断是否正常打开
 
        rval,frame = vc.read()
    else:
        rval = False
    timeF = 8#视频帧计数间隔频率
 
    while(rval): #循环读取视频帧
        rval,frame = vc.read()
        if (c%timeF == 0) :#每隔timeF帧进行存储操作
            cv2.imwrite(r'C:\Users\rockhuang\Desktop\rider1\\'+'la'+str(c)+'.jpg',frame) # 存储为图像
 
        c = c + 1
    cv2.waitKey(1)
    vc.release()

功能五：原图像显示标注框bounding box

注：这段代码原来用于处理DETRAC数据集，一些函数没用到注销了

import xml.etree.ElementTree as ET
from xml.dom.minidom import Document
import os
import cv2
import time

def ConvertVOCXml(file_path="",file_name=""):
    tree = ET.parse(file_name)
    root = tree.getroot()
    # print(root.tag)
 
    num=0 #计数
    #读xml操作
 
    frame_lists=[]
    output_file_name=""
    for child in root:
        if(child.tag=="frame"):
          # 创建dom文档
            doc = Document()
            # 创建根节点
            annotation = doc.createElement('annotation')
            # 根节点插入dom树
            doc.appendChild(annotation)
   
            #print(child.tag, child.attrib["num"])
            pic_id= child.attrib["num"].zfill(5)
            #print(pic_id)
            output_file_name=root.attrib["name"]+"__img"+pic_id+".xml"
           #  print(output_file_name)
   
            folder = doc.createElement("folder")
            folder.appendChild(doc.createTextNode("VOC2007"))
            annotation.appendChild(folder)
   
            filename = doc.createElement("filename")
            pic_name=root.attrib["name"]+"__img"+pic_id+".jpg"
            filename.appendChild(doc.createTextNode(pic_name))
            annotation.appendChild(filename)
   
            sizeimage = doc.createElement("size")
            imagewidth = doc.createElement("width")
            imageheight = doc.createElement("height")
            imagedepth = doc.createElement("depth")
   
            imagewidth.appendChild(doc.createTextNode("960"))
            imageheight.appendChild(doc.createTextNode("540"))
            imagedepth.appendChild(doc.createTextNode("3"))
   
            sizeimage.appendChild(imagedepth)
            sizeimage.appendChild(imagewidth)
            sizeimage.appendChild(imageheight)
            annotation.appendChild(sizeimage)
   
            target_list=child.getchildren()[0]  #获取target_list
            #print(target_list.tag)
            object=None
            for target in target_list:
                if(target.tag=="target"):
                    #print(target.tag)
                    object = doc.createElement('object')
                    bndbox = doc.createElement("bndbox")
   
                    for target_child in target:
                        if(target_child.tag=="box"):
                            xmin = doc.createElement("xmin")
                            ymin = doc.createElement("ymin")
                            xmax = doc.createElement("xmax")
                            ymax = doc.createElement("ymax")
                            xmin_value=int(float(target_child.attrib["left"]))
                            ymin_value=int(float(target_child.attrib["top"]))
                            box_width_value=int(float(target_child.attrib["width"]))
                            box_height_value=int(float(target_child.attrib["height"]))
                            xmin.appendChild(doc.createTextNode(str(xmin_value)))
                            ymin.appendChild(doc.createTextNode(str(ymin_value)))
                            if(xmin_value+box_width_value>960):
                                xmax.appendChild(doc.createTextNode(str(960)))
                            else:
                                xmax.appendChild(doc.createTextNode(str(xmin_value+box_width_value)))
                            if(ymin_value+box_height_value>540):
                                ymax.appendChild(doc.createTextNode(str(540)))
                            else:
                                ymax.appendChild(doc.createTextNode(str(ymin_value+box_height_value)))
   
                        if(target_child.tag=="attribute"):
                            name = doc.createElement('name')
                            pose=doc.createElement('pose')
                            truncated=doc.createElement('truncated')
                            difficult=doc.createElement('difficult')
   
                            name.appendChild(doc.createTextNode("car"))
                            pose.appendChild(doc.createTextNode("Left"))  #随意指定
                            truncated.appendChild(doc.createTextNode("0"))  #随意指定
                            difficult.appendChild(doc.createTextNode("0"))  #随意指定
   
                            
                            object.appendChild(name)
                            object.appendChild(pose)
                            object.appendChild(truncated)
                            object.appendChild(difficult)
                            
                    bndbox.appendChild(xmin)
                    bndbox.appendChild(ymin)
                    bndbox.appendChild(xmax)
                    bndbox.appendChild(ymax)
                    object.appendChild(bndbox)
                    annotation.appendChild(object)
   
   
            file_path_out=os.path.join(file_path,output_file_name)
            f = open(file_path_out, 'w')
            f.write(doc.toprettyxml(indent=' ' * 4))
            f.close()
            num=num+1
    return num




'''
画方框
'''
def bboxes_draw_on_img(img, bbox, color=[255, 0, 0], thickness=2):

    # Draw bounding box...
    print(bbox)
    p1 = (int(float(bbox["xmin"])), int(float(bbox["ymin"])))
    p2 = (int(float(bbox["xmax"])), int(float(bbox["ymax"])))
    cv2.rectangle(img, p1, p2, color, thickness)


def visualization_image(image_name,xml_file_name):
    tree = ET.parse(xml_file_name)
    root = tree.getroot()

    object_lists=[]
    for child in root:
        if(child.tag=="folder"):
            print(child.tag, child.text)
        elif (child.tag == "filename"):
            print(child.tag, child.text)
        elif (child.tag == "size"):  #解析size
            for size_child in child:
                if(size_child.tag=="width"):
                    print(size_child.tag,size_child.text)
                elif (size_child.tag == "height"):
                    print(size_child.tag, size_child.text)
                elif (size_child.tag == "depth"):
                    print(size_child.tag, size_child.text)
        elif (child.tag == "object"):  #解析object
            singleObject={}
            for object_child in child:
                if (object_child.tag == "name"):
                # print(object_child.tag,object_child.text)
                    singleObject["name"] = object_child.text
                elif (object_child.tag == "bndbox"):
                    for bndbox_child in object_child:
                        if (bndbox_child.tag == "xmin"):
                            singleObject["xmin"] = bndbox_child.text
                      # print(bndbox_child.tag, bndbox_child.text)
                        elif (bndbox_child.tag == "ymin"):
                           # print(bndbox_child.tag, bndbox_child.text)
                            singleObject["ymin"] = bndbox_child.text
                        elif (bndbox_child.tag == "xmax"):
                            singleObject["xmax"] = bndbox_child.text
                        elif (bndbox_child.tag == "ymax"):
                            singleObject["ymax"] = bndbox_child.text
            object_length=len(singleObject)
            if(object_length>0):
                object_lists.append(singleObject)
    img = cv2.imread(image_name)
    for object_coordinate in object_lists:
        bboxes_draw_on_img(img,object_coordinate)
    cv2.imshow("capture", img)
    cv2.waitKey (0)
    cv2.destroyAllWindows()


if ( __name__ == "__main__"):
   #print("main")
    ##basePath=r"E:\add_dataset\DETRAC-Train-Annotations-XML"
    #totalxml=os.listdir(basePath)
    total_num=0
    flag=False
    print("正在转换")
    #saveBasePath=r"E:\add_dataset\DETRAC-Train-Annotations-XML\xml_test"
    #if os.path.exists(saveBasePath)==False: #判断文件夹是否存在
    #    os.makedirs(saveBasePath)

   #ConvertVOCXml(file_path="samplexml",file_name="000009.xml")
   # Start time
    start = time.time()
    log=open("xml_statistical.txt","w") #分析日志，进行排错
   # for xml in totalxml:
    #    file_name=os.path.join(basePath,xml)
     #   print(file_name)
     #   num=ConvertVOCXml(file_path=saveBasePath,file_name=file_name)
      #  print(num)
      #  total_num=total_num+num
      #  log.write(file_name+" "+str(num)+"\n")
   # End time
   # end = time.time()
    #seconds=end-start
    #print( "Time taken : {0} seconds".format(seconds))
   # print(total_num)
   # log.write(str(total_num)+"\n")
    
   # for xml in totalxml:
    #    file_name=os.path.join(basePath,xml)
     #   print(file_name)
     #   num=ConvertVOCXml(file_path=saveBasePath,file_name=file_name)
      #  print(num)
      #  total_num=total_num+num
      #  log.write(file_name+" "+str(num)+"\n")
   # End time
   # end = time.time()
    #seconds=end-start
    #print( "Time taken : {0} seconds".format(seconds))
   # print(total_num)
   # log.write(str(total_num)+"\n")
    visualization_image(r"C:\Users\rockhuang\Desktop\rider\xa900048.jpg",r"C:\Users\rockhuang\Desktop\rider_xml\xa900048.xml")

功能六：COCO数据集提取某一类别

生成id号

import json
 
className = {
    1:'person'
}
 
classNum = [1]
 
def writeNum(Num):
    with open("COCO_train.json","r+") as f:
        f.write(str(Num))
 # with open("instances_val2014.json","r+") as f:
#     data = json.load(f)
    # annData = data["annotations"]
    # print(annData[0])
    # for x in annData[0]:
    #     if(x == "image_id"):
    #         print(type(x))
    #         print(x+ ":" + str(annData[0][x]))
    #     if (x == "image_id" or x == "bbox" or x == "category_id"):
    #         print(x + ":" + annData[0][x])
    #     if (x == "image_id" or x == "bbox" or x == "category_id"):
    #         print(x+ ":" + annData[0][x])
 # with open("test.json","w") as f:
#     json.dump(annData, f, ensure_ascii=False)
 
inputfile = []
inner = {}
##向test.json文件写入内容
path="E:/add_dataset/annotations_trainval2017/annotations/instances_train2017.json"
with open(path,"r+") as f:
    allData = json.load(f)
    data = allData["annotations"]
    print(data[1])
    print("read ready")
    
for i in data:
    if(i['category_id'] in classNum):
        inner = {
            "filename": str(i["image_id"]).zfill(6),
            "name": className[i["category_id"]],
            "bndbox":i["bbox"]
        } 
        inputfile.append(inner)
inputfile = json.dumps(inputfile)
writeNum(inputfile)

写文件

import json
import os
import cv2
nameStr = []
with open("COCO_train.json", "r+") as f:
    data = json.load(f)
    print("read ready")
for i in data:
    imgName = '000000' + str(i["filename"]) + ".jpg"
    nameStr.append(imgName)
nameStr = set(nameStr)
print(nameStr)
print(len(nameStr))
 
path = r'E:/add_dataset/train2017/'
savePath=r"E:/add_dataset/coco1/"
count=0
for file in nameStr:
    img=cv2.imread(path+file)
    cv2.imwrite(savePath+file,img)
    count=count+1
    print('num: '+count.__str__()+'     '+file+'\n')

功能七：对坐标txt文件转xml

这个博主主要是对https://data.vision.ee.ethz.ch/cvl/aess/dataset/里面的数据集做处理，里面标注给的都是txt，需要转化为xml。
txt格式为（图片名.png a b c d ; d f g h ;…）具体自己调整代码哈

1，生成每个图像的txt文件

import os
import re
import numpy as np
input_folder=open(r'F:\add_person\1\annotations.txt',"r")
for lines in input_folder:
    lines=lines[:-2]
    img_name=lines.split(":")[0]
    txt_label=lines.split(":")[1]
    list_txt_label="".join(txt_label)
    
    order=list_txt_label[2:-1].split("), (")
   # print(newlines)
    print(order)
    for i in range(len(order)):
        print(order[i])
        
        output_folder=r'F:\add_person\1\2'+'\\'+img_name.split('.')[0]+'.txt'
        f= open(output_folder,"a")
        f.write(order[i])  
        f.write('\n')

2.txt 文件转化为xml格式

#! /usr/bin/python
# -*- coding:UTF-8 -*-
import os, sys
import glob
from PIL import Image
 
# VEDAI 图像存储位置
src_img_dir = r"F:\add_person\5\image"
# VEDAI 图像的 ground truth 的 txt 文件存放位置
src_txt_dir = r"F:\add_person\5\txt"
src_xml_dir = r"F:\add_person\5\xml"
 
img_Lists = glob.glob(src_img_dir + '\*.png')
 
img_basenames = [] # e.g. 100.jpg
for item in img_Lists:
    img_basenames.append(os.path.basename(item))

img_names = [] # e.g. 100
for item in img_basenames:
    temp1, temp2 = os.path.splitext(item)
    img_names.append(temp1)

for img in img_names:
    im = Image.open((src_img_dir + '\\' + img + '.png'))
    width, height = im.size
 
    # open the crospronding txt file
    gt = open(src_txt_dir + '\\' + img + '.txt').read().splitlines()
    #gt = open(src_txt_dir + '/gt_' + img + '.txt').read().splitlines()
 
    # write in xml file
    #os.mknod(src_xml_dir + '/' + img + '.xml')
    xml_file = open((src_xml_dir + '\\' + img + '.xml'), 'w')
    #xml_file.write('<?xml version="1.0" ?>\n')
    xml_file.write('<annotation>\n')
    xml_file.write('    <folder>VOC2007</folder>\n')
    xml_file.write('    <filename>' + str(img) + '.png' + '</filename>\n')
    xml_file.write('    <size>\n')
    xml_file.write('        <width>' + str(width) + '</width>\n')
    xml_file.write('        <height>' + str(height) + '</height>\n')
    xml_file.write('        <depth>3</depth>\n')
    xml_file.write('    </size>\n')
 
    # write the region of image on xml file
    for img_each_label in gt:
        spt = img_each_label.split(', ') 
        if(int(spt[0])>int(spt[2])):
            temp=spt[2]
            spt[2]=spt[0]
            spt[0]=temp
        if(int(spt[1])>int(spt[3])):
            temp1=spt[3]
            spt[3]=spt[1]
            spt[1]=temp1#这里如果txt里面是以逗号‘，’隔开的，那么就改为spt = img_each_label.split(',')。
        xml_file.write('    <object>\n')
        xml_file.write('        <name>' + 'person' + '</name>\n')
        xml_file.write('        <pose>Unspecified</pose>\n')
        xml_file.write('        <truncated>0</truncated>\n')
        xml_file.write('        <difficult>0</difficult>\n')
        xml_file.write('        <bndbox>\n')
        xml_file.write('            <xmin>' + str(spt[0]) + '</xmin>\n')
        xml_file.write('            <ymin>' + str(spt[1]) + '</ymin>\n')
        xml_file.write('            <xmax>' + str(spt[2]) + '</xmax>\n')
        xml_file.write('            <ymax>' + str(spt[3]) + '</ymax>\n')
        xml_file.write('        </bndbox>\n')
        xml_file.write('    </object>\n')
 
    xml_file.write('</annotation>\n')

功能八：INRIAPerson数据集处理，该数据集地址为：http://pascal.inrialpes.fr/data/human/

# -*- coding: UTF-8 -*-
from xml.dom.minidom import Document
import os
import re

list = os.listdir(r"F:\person\INRIAPerson\Train\annotations")
savePath = r'F:\person\INRIAPerson\Train\xml'
for oldfilename in list:
    if str(".txt") not in oldfilename:
        continue
    print (oldfilename)

    #raw_input(unicode('按回车键退出...','utf-8').encode('gbk'))

    fileindex = re.findall('\d+', oldfilename)

    print (fileindex)
    #raw_input(unicode('按回车键退出...','utf-8').encode('gbk'))

    print(str(int(fileindex[0]))) 

    #raw_input(unicode('按回车键退出...','utf-8').encode('gbk'))
    newfilename = os.path.splitext(oldfilename)[0] + ".xml"

    #print newfilename
    #raw_input(unicode('按回车键退出...','utf-8').encode('gbk'))
    f = open(os.path.join(r"F:\person\INRIAPerson\Train\annotations",oldfilename), "r")
    print( 'processing:' + f.name)

    doc = Document()
    annotation = doc.createElement('annotation')
    doc.appendChild(annotation)

    folder = doc.createElement('folder')
    folder.appendChild(doc.createTextNode('VOC2007'))
    annotation.appendChild(folder)

    filename = doc.createElement('filename')
    filename.appendChild(doc.createTextNode(oldfilename))
    annotation.appendChild(filename)

    source = doc.createElement('source')
    annotation.appendChild(source)

    database = doc.createElement('database')
    database.appendChild(doc.createTextNode('PASperson Database'))
    source.appendChild(database)

    annotation1 = doc.createElement('annotation')
    annotation1.appendChild(doc.createTextNode('PASperson'))
    source.appendChild(annotation1)

    fr = f.readlines()  # 调用文件的 readline()方法一次读取

    for line in fr:
        if str(line).__contains__("size"):
            sizes = []
            sizes = re.findall('\d+', line)
            size = doc.createElement('size')
            annotation.appendChild(size)
            width = doc.createElement('width')
            width.appendChild(doc.createTextNode(sizes[0]))
            size.appendChild(width)
            height = doc.createElement('height')
            height.appendChild(doc.createTextNode(sizes[1]))
            size.appendChild(height)
            depth = doc.createElement('depth')
            depth.appendChild(doc.createTextNode(sizes[2]))
            size.appendChild(depth)

            segmented = doc.createElement('segmented')
            segmented.appendChild(doc.createTextNode('0'))
            annotation.appendChild(segmented)
        if (str(line).__contains__('Objects')):
            nums = re.findall('\d+', line)
            break
    for index in range(1, int(nums[0])+1):
        for line in fr:
            if str(line).__contains__("Bounding box for object " + str(index)):
                coordinate = re.findall('\d+', line)
                object = doc.createElement('object')
                annotation.appendChild(object)

                name = doc.createElement('name')
                name.appendChild(doc.createTextNode('person'))
                object.appendChild(name)

                pose = doc.createElement('pose')
                pose.appendChild(doc.createTextNode('Unspecified'))
                object.appendChild(pose)

                truncated = doc.createElement('truncated')
                truncated.appendChild(doc.createTextNode('0'))
                object.appendChild(truncated)

                difficult = doc.createElement('difficult')
                difficult.appendChild(doc.createTextNode('0'))
                object.appendChild(difficult)

                bndbox = doc.createElement('bndbox')
                object.appendChild(bndbox)

                #数字中包含序号，下标应从1开始
                xmin = doc.createElement('xmin')
                xmin.appendChild(doc.createTextNode(coordinate[1]))
                bndbox.appendChild(xmin)
                ymin = doc.createElement('ymin')
                ymin.appendChild(doc.createTextNode(coordinate[2]))
                bndbox.appendChild(ymin)
                xmax = doc.createElement('xmax')
                xmax.appendChild(doc.createTextNode(coordinate[3]))
                bndbox.appendChild(xmax)
                ymax = doc.createElement('ymax')
                ymax.appendChild(doc.createTextNode(coordinate[4]))
                bndbox.appendChild(ymax)
    f.close()
    f = open(os.path.join(savePath,newfilename), 'w')
    f.write(doc.toprettyxml(indent="\t"))
    f.close()
    print(str(fileindex) + " compelete") 

print('process compelete')

功能九：voc格式数据集生成trainval.txt, text.txt, val.txt

import os
import random

trainval_percent = 0.66
train_percent = 0.95
xmlfilepath = 'Annotations'
txtsavepath = 'ImageSets\Main'
total_xml = os.listdir(xmlfilepath)

num=len(total_xml)
list=range(num)
tv=int(num*trainval_percent)
tr=int(tv*train_percent)
trainval= random.sample(list,tv)
train=random.sample(trainval,tr)

ftrainval = open('ImageSets/Main/trainval.txt', 'w')
ftest = open('ImageSets/Main/test.txt', 'w')
ftrain = open('ImageSets/Main/train.txt', 'w')
fval = open('ImageSets/Main/val.txt', 'w')

for i  in list:
    name=total_xml[i][:-4]+'\n'
    if i in trainval:
        ftrainval.write(name)
        if i in train:
            ftrain.write(name)
        else:
            fval.write(name)
    else:
        ftest.write(name)

ftrainval.close()
ftrain.close()
fval.close()
ftest .close()