coco数据集提取car,转化为voc格式
分三步:
提取你需要的类别
python filter.py
移除不需要的图片
python get_image.py
写成voc格式
python get_xml.py
话不多少直接上代码:
filter.py
import json
className = {
3:'car'
}
classNum = [3]
def writeNum(Num):
with open("COCO_train2014.json","a+") as f:
f.write(str(Num))
inputfile = []
inner = {}
with open("instances_train2014.json","r+") as f:
allData = json.load(f)
data = allData["annotations"]
print(data[1])
print("read ready")
for i in data:
if(i['category_id'] in classNum):
inner = {
"filename": str(i["image_id"]).zfill(6),
"name": className[i["category_id"]],
"bndbox":i["bbox"]
}
inputfile.append(inner)
inputfile = json.dumps(inputfile)
writeNum(inputfile)
get_image.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2018/4/24 22:46
# @Author : ma xiangjun
# @Site :
# @File : 根据目标图片id筛选图片.py
# @Software: PyCharm
import json
import os
nameStr = []
with open("COCO_train.json","r+") as f:
data = json.load(f)
print("read ready")
for i in data:
imgName = "COCO_train2014_" + str(i["filename"]) + ".jpg"
nameStr.append(imgName)
nameStr = set(nameStr)
print(nameStr)
print(len(nameStr))
path = "/home/panlu/ext_work/YOLO/darknet/scripts/coco/images/train2014/"
for file in os.listdir(path):
if(file not in nameStr):
os.remove(path+file)
get_xml.py
import xml.dom
import xml.dom.minidom
import os
import cv2
import json
_AUTHOR= 'maxiangjun'
_SEGMENTED= '0'
_DIFFICULT= '0'
_TRUNCATED= '0'
_POSE= 'Unspecified'
def createElementNode(doc,tag, attr):
element_node = doc.createElement(tag)
text_node = doc.createTextNode(attr)
element_node.appendChild(text_node)
return element_node
def createChildNode(doc,tag, attr,parent_node):
child_node = createElementNode(doc, tag, attr)
parent_node.appendChild(child_node)
def createObjectNode(doc,attrs):
object_node = doc.createElement('object')
createChildNode(doc, 'name', attrs['name'],
object_node)
createChildNode(doc, 'pose',
_POSE, object_node)
createChildNode(doc, 'truncated',
_TRUNCATED, object_node)
createChildNode(doc, 'difficult',
_DIFFICULT, object_node)
bndbox_node = doc.createElement('bndbox')
createChildNode(doc, 'xmin', str(int(attrs['bndbox'][0])),
bndbox_node)
createChildNode(doc, 'ymin', str(int(attrs['bndbox'][1])),
bndbox_node)
createChildNode(doc, 'xmax', str(int(attrs['bndbox'][0]+attrs['bndbox'][2])),
bndbox_node)
createChildNode(doc, 'ymax', str(int(attrs['bndbox'][1]+attrs['bndbox'][3])),
bndbox_node)
object_node.appendChild(bndbox_node)
return object_node
def writeXMLFile(doc,filename):
tmpfile =open('tmp.xml','w')
doc.writexml(tmpfile, addindent=''*4,newl = '\n',encoding = 'utf-8')
tmpfile.close()
fin =open('tmp.xml')
fout =open(filename, 'w')
lines = fin.readlines()
for line in lines[1:]:
if line.split():
fout.writelines(line)
fin.close()
fout.close()
if __name__ == "__main__":
img_path = "/home/panlu/ext_work/YOLO/darknet/scripts/coco/images/train2014/"
fileList = os.listdir(img_path)
if fileList == 0:
print("Do not find images in your img_path")
os._exit(-1)
with open("COCO_train.json", "r") as f:
ann_data = json.load(f)
current_dirpath = os.path.dirname(os.path.abspath('__file__'))
if not os.path.exists('Annotations'):
os.mkdir('Annotations')
for imageName in fileList:
saveName= imageName.strip(".jpg")
print(saveName)
xml_file_name = os.path.join('Annotations', (saveName + '.xml'))
img=cv2.imread(os.path.join(img_path,imageName))
print(os.path.join(img_path,imageName))
height,width,channel=img.shape
print(height,width,channel)
my_dom = xml.dom.getDOMImplementation()
doc = my_dom.createDocument(None, 'annotation', None)
root_node = doc.documentElement
#print(root_node)
#input()
createChildNode(doc, 'folder', 'COCO2014', root_node)
createChildNode(doc, 'filename', saveName+'.jpg',root_node)
source_node = doc.createElement('source')
createChildNode(doc, 'database', 'LOGODection', source_node)
createChildNode(doc, 'annotation', 'COCO2014', source_node)
createChildNode(doc, 'image','flickr', source_node)
createChildNode(doc, 'flickrid','NULL', source_node)
root_node.appendChild(source_node)
owner_node = doc.createElement('owner')
createChildNode(doc, 'flickrid','NULL', owner_node)
createChildNode(doc, 'name',_AUTHOR, owner_node)
root_node.appendChild(owner_node)
size_node = doc.createElement('size')
createChildNode(doc, 'width',str(width), size_node)
createChildNode(doc, 'height',str(height), size_node)
createChildNode(doc, 'depth',str(channel), size_node)
root_node.appendChild(size_node)
createChildNode(doc, 'segmented',_SEGMENTED, root_node)
count = 0
for ann in ann_data:
if(saveName==("COCO_train_2014" + ann["filename"].zfill(12))):
count = 1
object_node = createObjectNode(doc, ann)
root_node.appendChild(object_node)
else:
continue
if count ==1:
writeXMLFile(doc, xml_file_name)
最后得到的目录如下:
下一步转化为kitti格式参考这里:voc2007_extract_car_2_kitti
看到这里发现nvidia 一个demo里提供了coco直接转化为kitti的python代码,OK,如果目的是coco2kitti 前面可以不用看了,下面这个简单:
coco直接提取需要的类别转化KITTI,python代码如下,略作修改:
"""coco2kitti.py: Converts MS COCO annotation files to
Kitti format bounding box label files
__author__ = "ma xiang jun"
"""
import os
from pycocotools.coco import COCO
def coco2kitti(catNms, annFile):
# initialize COCO api for instance annotations
coco = COCO(annFile)
# Create an index for the category names
cats = coco.loadCats(coco.getCatIds())
cat_idx = {}
for c in cats:
cat_idx[c['id']] = c['name']
for img in coco.imgs:
# Get all annotation IDs for the image
catIds = coco.getCatIds(catNms=catNms)
annIds = coco.getAnnIds(imgIds=[img], catIds=catIds)
# If there are annotations, create a label file
if len(annIds) > 0:
# Get image filename
img_fname = coco.imgs[img]['file_name']
# open text file
with open('./labels/' + img_fname.split('.')[0] + '.txt','w') as label_file:
anns = coco.loadAnns(annIds)
for a in anns:
bbox = a['bbox']
# Convert COCO bbox coords to Kitti ones
bbox = [bbox[0], bbox[1], bbox[2] + bbox[0], bbox[3] + bbox[1]]
bbox = [str(b) for b in bbox]
catname = cat_idx[a['category_id']]
# Format line in label file
# Note: all whitespace will be removed from class names
out_str = [catname.replace(" ","")
+ ' ' + ' '.join(['0']*3)
+ ' ' + ' '.join([b for b in bbox])
+ ' ' + ' '.join(['0']*8)
+'\n']
label_file.write(out_str[0])
if __name__ == '__main__':
# These settings assume this script is in the annotations directory
dataDir = '/home/mahnx0/datasets/coco/'
dataType = 'train2014'
annFile = '%s/annotations/instances_%s.json' % (dataDir, dataType)
# If this list is populated then label files will only be produced
# for images containing the listed classes and only the listed classes
# will be in the label file
# EXAMPLE:
#catNms = ['person', 'dog', 'skateboard']
catNms = ['car','bus','truck']
# Check if a labels file exists and, if not, make one
# If it exists already, exit to avoid overwriting
if os.path.isdir('./labels'):
print('Labels folder already exists - exiting to prevent badness')
else:
os.mkdir('./labels')
coco2kitti(catNms, annFile)
如果发现格式或者数据哪里不对,这里有两个很方便的sh命令
(在当前目录下所有文件内容中查找bus,并替换为car)
sed -i “s/bus/car/g” grep bus -rl ./
根据文件名批量查找文件并且移到其它目录
find ./ -type f ! -name “passwd” -exec rm {} \; (删除passwd之外的文件)
#
./表示从当前目录找
-type f,表示只找file,文件类型的,目录和其他字节不要
-exec 把find到的文件名作为参数传递给后面的命令行,代替{}的部分
-exec后跟的命令行,必须用“ \;”结束
经典的移动:man mv
find ./ -type f -name “*.sh”|xargs mv -t /opt/
find ./ -type f -name “*.sh”|xargs -i mv {} /opt/
find ./ -type f -name “*.sh” -exec mv {} /opt/ \; =====>\转意符号。否则 ; 不被shell识别。
mv find ./ -type f -name "*.sh"
/opt/ 或者 cp $(find ./ -type f -name “*.sh” ) /opt/