#!/usr/bin/python
# -*- coding: UTF-8 -*-
# get annotation object bndbox location##
import shutil, os
import cv2
from pathlib import Path # 比os.walk 和递归 更牛的遍历文件夹文件方式
try:
import xml.etree.cElementTree as ET # 解析xml的c语言版的模块
except ImportError:
import xml.etree.ElementTree as ET
class ExtractFile:
def __init__(self, in_xml_path, in_jpg_path):
self.in_xml_path = in_xml_path
self.in_jpg_path = in_jpg_path
def getAnn(self, out_xml_path): # AnotPath VOC标注文件路径
for parent, dirnames, filenames in os.walk(self.in_xml_path):
for filename in filenames:
filename_qianzhui = filename.split(".")[0]
xml_file_path = os.path.join(self.in_xml_path, filename_qianzhui) + ".xml"
copy_file = os.path.join(self.in_xml_path, xml_file_path)
tree = ET.ElementTree(file=copy_file) # 打开文件,解析成一棵树型结构
root = tree.getroot() # 获取树型结构的根
ObjectSet = root.findall('object') # 找到文件中所有含有object关键字的地方,这些地方含有标注目标
ObjNames = ["person"] # 以目标类别为关键字,目标框为值组成的字典结构
for Object in ObjectSet:
ObjName = Object.find('name').text
if ObjName in ObjNames:
self.copy_xml = shutil.copy(xml_file_path, out_xml_path)
self.out_xml_toList = []
self.out_xml_toList.append(self.copy_xml)
return self.out_xml_toList
print("提取xml完成")
def getImg(self, out_jpg_path):
for xml_file_name in extractFile.getAnn(out_xml_path):
pic_file_name = os.path.split(xml_file_name)[1]
##########方法1
# filename_qianzhui = pic_file_name.split(".")[0]
# pic_file_path = os.path.join(self.in_jpg_path, filename_qianzhui) + ".jpg"
##########方法2
filename_qianzhui = pic_file_name[:-4] + ".jpg"
pic_file_path = os.path.join(self.in_jpg_path, filename_qianzhui)
# 读取存放所有图片路径
image = cv2.imread(pic_file_path)
# 提取到的对应图片写入指定路径
cv2.imwrite(out_jpg_path + filename_qianzhui, image)
print("根据xml找到对应的jpg,并写入指定路径完成")
if __name__ == '__main__':
in_xml_path = r"tiqu_voc_xml_input"
in_jpg_path = r"tiqu_voc_jpg_input"
extractFile = ExtractFile(in_xml_path, in_jpg_path)
out_xml_path = r"tiqu_voc_xml_out"
out_jpg_path = r"tiqu_voc_jpg_out\\" #\\
extractFile.getImg(out_jpg_path)
VOC提取出指定的目标类别xml文件和jpg文件
最新推荐文章于 2023-07-28 09:46:00 发布