一.coco数据集筛选需要的标签
根据类别生成对应的json文件
import json
className = {
1:'person',
3: "car",
6: "bus",
8: "truck"
}
classNum = [1, 3, 6, 8]
coco_json=r".\annotations\instances_train2017.json"
def writeNum(Num):
with open("COCO_train.json", "a+") as f:
f.write(str(Num))
inputfile = []
inner = {}
with open(coco_json, "r+") as f:
allData = json.load(f)
data =allData["annotations"]
print(data[1])
print("read ready")
for i in data:
if (i['category_id'] in classNum):
inner = {
"filename":str(i["image_id"]).zfill(12),
"name":className[i["category_id"]],
"bndbox":i["bbox"]
}
二.根据筛选出的图片id,选取图片
import json
import os
import cv2
from multiprocessing import Pool
import shutil
def keep_pic(path, savePath, file):
# img = cv2.imread(os.path.join(path, file))
# cv2.imwrite(os.path.join(savePath, file), img)
old_path = os.path.join(path, file)
new_path = os.path.join(savePath, file)
shutil.copy(old_path, new_path)
# 如果想要速度快
# shutil.move(old_path, new_path)
# print(nameStr)
# print(len(nameStr))
if __name__ == "__main__":
nameStr = []
# for i in data:
# imgName = str(i["filename"]) + ".jpg"
# nameStr.append(imgName)
with open("COCO_train.json", "r+") as f:
data = json.load(f)
print("read ready")
for i in data:
imgName = str(i["filename"]) + ".jpg"
nameStr.append(imgName)
# 去重
nameStr = set(nameStr)
path = 'E:/darknet/coco/train/'
count = 0
# keep_pic(path, "test", file)
l = Pool(processes=8)
l.daemon = False
# for imageName in fileList[:3]:
for file in nameStr:
print("imageName: ", file)
l.apply_async(keep_pic, args=(path, "E:/darknet/coco/VOC2017_/JPEGImages/", file, )) # 异步执行
# # 关闭进程池,停止接受其它进程
l.close()
# # 阻塞进程
l.join()
三.coco数据集转VOC
import xml.dom
import xml.dom.minidom
import os
# from PIL import Image
import cv2
import json
import threading
from multiprocessing import Pool
# xml文件规范定义
_IMAGE_PATH = 'E:/darknet/coco/train/'
_INDENT = '' * 4
_NEW_LINE = '\n'
_FOLDER_NODE = 'COCO2017'
_ROOT_NODE = 'annotation'
_DATABASE_NAME = 'LOGODection'
_ANNOTATION = 'COCO2017'
_AUTHOR = 'SyGoing_CSDN'
_SEGMENTED = '0'
_DIFFICULT = '0'
_TRUNCATED = '0'
_POSE = 'Unspecified'
person_num = 0
car_num = 0
bus_num = 0
truck_num = 0
# _IMAGE_COPY_PATH= 'JPEGImages'
_ANNOTATION_SAVE_PATH = 'E:/darknet/coco/VOC2017_/Annotations'
# _IMAGE_CHANNEL= 3
# 封装创建节点的过程
def createElementNode(doc, tag, attr): # 创建一个元素节点
element_node = doc.createElement(tag)
# 创建一个文本节点
text_node = doc.createTextNode(attr)
# 将文本节点作为元素节点的子节点
element_node.appendChild(text_node)
return element_node
# 封装添加一个子节点
def createChildNode(doc, tag, attr, parent_node):
child_node = createElementNode(doc, tag, attr)
parent_node.appendChild(child_node)
# object节点比较特殊
def createObjectNode(doc, attrs):
object_node = doc.createElement('object')
midname = attrs['name']
# if midname == "person":
# person_num += 1
# elif midname == "car":
# car_num += 1
# elif
# if midname != 'person':
# midname = 'car'
createChildNode(doc, 'name', midname,
object_node)
# createChildNode(doc, 'name',attrs['name'],
# object_node)
createChildNode(doc, 'pose',
_POSE, object_node)
createChildNode(doc, 'truncated',
_TRUNCATED, object_node)
createChildNode(doc, 'difficult',
_DIFFICULT, object_node)
bndbox_node = doc.createElement('bndbox')
createChildNode(doc, 'xmin', str(int(attrs['bndbox'][0])),
bndbox_node)
createChildNode(doc, 'ymin', str(int(attrs['bndbox'][1])),
bndbox_node)
createChildNode(doc, 'xmax', str(int(attrs['bndbox'][0] + attrs['bndbox'][2])),
bndbox_node)
createChildNode(doc, 'ymax', str(int(attrs['bndbox'][1] + attrs['bndbox'][3])),
bndbox_node)
object_node.appendChild(bndbox_node)
return object_node
# 将documentElement写入XML文件
def writeXMLFile(doc, filename):
tmpfile = open('tmp.xml', 'w')
doc.writexml(tmpfile, addindent='' * 4, newl='\n', encoding='utf-8')
tmpfile.close()
# 删除第一行默认添加的标记
fin = open('tmp.xml')
# print(filename)
fout = open(filename, 'w')
# print(os.path.dirname(fout))
lines = fin.readlines()
for line in lines[1:]:
if line.split():
fout.writelines(line)
# new_lines =''.join(lines[1:])
# fout.write(new_lines)
fin.close()
fout.close()
def write(_ANNOTATION_SAVE_PATH, saveName):
xml_file_name = os.path.join(_ANNOTATION_SAVE_PATH, (saveName + '.xml'))
# withopen(xml_file_name,"w") as f:
# pass
img = cv2.imread(os.path.join(img_path, imageName))
print(os.path.join(img_path, imageName))
# cv2.imshow(img)
height, width, channel = img.shape
print(height, width, channel)
my_dom = xml.dom.getDOMImplementation()
doc = my_dom.createDocument(None, _ROOT_NODE, None)
# 获得根节点
root_node = doc.documentElement
# folder节点
createChildNode(doc, 'folder', _FOLDER_NODE, root_node)
# filename节点
createChildNode(doc, 'filename', saveName + '.jpg', root_node)
# source节点
source_node = doc.createElement('source')
# source的子节点
createChildNode(doc, 'database', _DATABASE_NAME, source_node)
createChildNode(doc, 'annotation', _ANNOTATION, source_node)
createChildNode(doc, 'image', 'flickr', source_node)
createChildNode(doc, 'flickrid', 'NULL', source_node)
root_node.appendChild(source_node)
# owner节点
owner_node = doc.createElement('owner')
# owner的子节点
createChildNode(doc, 'flickrid', 'NULL', owner_node)
createChildNode(doc, 'name', _AUTHOR, owner_node)
root_node.appendChild(owner_node)
# size节点
size_node = doc.createElement('size')
createChildNode(doc, 'width', str(width), size_node)
createChildNode(doc, 'height', str(height), size_node)
createChildNode(doc, 'depth', str(channel), size_node)
root_node.appendChild(size_node)
# segmented节点
createChildNode(doc, 'segmented', _SEGMENTED, root_node)
for ann in ann_data:
imgName = "COCO_train2017_" + str(ann["filename"])
cname = saveName;
if (saveName == imgName):
# object节点
object_node = createObjectNode(doc, ann)
root_node.appendChild(object_node)
else:
continue
# 构建XML文件名称
print(xml_file_name)
# 创建XML文件
# createXMLFile(attrs, width,height, xml_file_name)
# # 写入文件
#
writeXMLFile(doc, xml_file_name)
if __name__ == "__main__":
##读取图片列表
img_path = "E:/darknet/coco/VOC2017_/JPEGImages/"
fileList = os.listdir(img_path)
if fileList == 0:
os._exit(-1)
with open("COCO_train.json", "r") as f:
ann_data = json.load(f)
current_dirpath = os.path.dirname(os.path.abspath('__file__'))
if not os.path.exists(_ANNOTATION_SAVE_PATH):
os.mkdir(_ANNOTATION_SAVE_PATH)
for imageName in fileList:
print("imageName: ", imageName)
write(_ANNOTATION_SAVE_PATH, imageName)