1.博主从朋友那里拿了一个VOC数据集,里面有20个类,博主只要6个
classes={"one","two","three","four","five","fist"}
2.选出来后把对应图片也找出来
# coding=utf-8
"""选出指定标签的xml文件"""
#"""选出对应标签的图片"""
import os
import os.path
import xml.dom.minidom
import shutil
import sys
path = "C:\\Users\\AS\\Desktop\\new\\Annotations"
newpath="C:\\Users\\AS\\Desktop\\new\\label"
label_path = "C:\\Users\\AS\\Desktop\\new\\label"
image_path = "C:\\Users\\AS\\Desktop\\new\\JPEGImages"
image_new_path ="C:\\Users\\AS\\Desktop\\new\\image"
files = os.listdir(path) # 得到文件夹下所有文件名称
# s = []
classes={"one","two","three","four","five","fist"}
new =[]
def selete_xml_file():
j = 1
for xmlFile in files:
# 遍历文件夹
j=j+1
fp = os.path.join(path, xmlFile)
# print(fp)
portion = os.path.splitext(xmlFile)
if not os.path.isdir(xmlFile):
# 判断是否是文件夹,不是文件夹才打开
# print (xmlFile)
# xml文件读取操作
# 将获取的xml文件名送入到dom解析
dom = xml.dom.minidom.parse(os.path.join(path, xmlFile))
###最核心的部分os.path.join(path,xmlFile),路径拼接,输入的是具体路径
root = dom.documentElement
name = root.getElementsByTagName('name')
# print(name)
# pose=root.getElementsByTagName('pose')
# 重命名class name
for i in range(len(name)):
# print (name[i].firstChild.data)
# print(xmlFile)
# if name[i].firstChild.data=="one":
if name[i].firstChild.data in classes:
# print("jjjjj")
newfp = os.path.join(newpath, os.path.basename(fp))
shutil.copyfile(fp, newfp)
print(j)
# new.append(fp)
def selete_image_file():
k= 0
# image_file = os.listdir(image_path)
# list = os.listdir(newpath)
list =[]
for label in os.listdir(label_path):
label = label.split('.',1)[0]
list.append(label)
for image in os.listdir(image_path):
print(image)
image_name= image.split('.',1)[0]
print(image_name)
if image_name in list:
k=k+1
print(k)
newfp = os.path.join(image_new_path, os.path.basename(image))
image = os.path.join(image_path, image)
print(image)
print(newfp)
shutil.copyfile(image, newfp)
print(k)
selete_image_file()
# selete_xml_file()