写在前面:
github上的Text-Detection-with-FRCN代码是py-faster-cnn在场景文字识别上面的扩展,基本只是把数据集和分类类别进行了改变。在样例中,将原有的pascal_voc数据集换成了coco_text。而如果想要在代码中不进行大的改动,最好的方式就是统一两个数据集的格式。
在github中,提供了format data的方式:
# format the raw image and label into the type of pascal_voc
# follow the code in $Text-Detection-with-FRCN/datasets/script/format_annotation.p
cd$Text-Detection-with-FRCN/datasets/script
./format_annotation.py --dataset coco-text
我们来看一下format_annotation.py的倒数第二行, os.system('./ann2voc2007.sh'+ args.dataset)
‘./ann2voc2007.sh'的内容为:matlab -nodisplay -nodesktop -r"ann2voc2007('$1'); quit"
也就是用matlib来运行。下载matlib要花费的时间很长,如果不想安装,怎么办呢?可以利用python来对matlib文件进行改写。
matlib的内容是生成xml文件。那么相应的,也可以利用python生成xml文件。
原matlib代码:
function ann2voc2007(input_dir)
curpath = mfilename('fullpath');
[pathstr,~,~] = fileparts(curpath)
if input_dir(end) == '/'
input_dir = input_dir(1:end-1);
end
[~,input_dir,~] = fileparts(input_dir);
input_dir = [pathstr '/../' input_dir '/formatted_dataset']
imgpath = [input_dir '/JPEGImages/']
txtpath = [input_dir '/images.annotations']
xmlpath_new = [input_dir '/Annotations/'];
foldername = 'VOC2007';
coco = containers.Map();
fidin = fopen(txtpath, 'r');
cnt = 0;
while ~feof(fidin)
tline = fgetl(fidin);
str = regexp(tline, ' ', 'split');
xmlname = strrep(str{1},'.jpg','.xml');
info = imfinfo([imgpath '/' str{1}]);
str{3} = max(str2double(str{3}), 1);
str{4} = max(str2double(str{4}), 1);
str{5} = min(str2double(str{5}), info.Width);
str{6} = min(str2double(str{6}), info.Height);
if str{3} >= str{5} || str{4} >= str{6} || str{3} <= 0 || str{4} <= 0 || str{5} > info.Width...
str{6} > info.Height
continue;
end
cnt = cnt + 1
if exist([imgpath '/' str{1}])
if isKey(coco,xmlname)
Createnode = coco(xmlname);
object_node = Createnode.createElement('object');
Root = Createnode.getDocumentElement;
Root.appendChild(object_node);
node=Createnode.createElement('name');
node.appendChild(Createnode.createTextNode(str{2}));
object_node.appendChild(node);
node=Createnode.createElement('pose');
node.appendChild(Createnode.createTextNode('Unspecified'));
object_node.appendChild(node);
node=Createnode.createElement('truncated');
node.appendChild(Createnode.createTextNode('0'));
object_node.appendChild(node);
node=Createnode.createElement('difficult');
node.appendChild(Createnode.createTextNode('0'));
object_node.appendChild(node);
bndbox_node=Createnode.createElement('bndbox');
object_node.appendChild(bndbox_node);
node=Createnode.createElement('xmin');
node.appendChild(Createnode.createTextNode(num2str(str{3})));
bndbox_node.appendChild(node);
node=Createnode.createElement('ymin');
node.appendChild(Createnode.createTextNode(num2str(str{4})));
bndbox_node.appendChild(node);
node=Createnode.createElement('xmax');
node.appendChild(Createnode.createTextNode(num2str(str{5})));
bndbox_node.appendChild(node);
node=Createnode.createElement('ymax');
node.appendChild(Createnode.createTextNode(num2str(str{6})));
bndbox_node.appendChild(node);
else
Createnode = com.mathworks.xml.XMLUtils.createDocument('annotation');
Root = Createnode.getDocumentElement;
node = Createnode.createElement('folder');
node.appendChild(Createnode.createTextNode(foldername));
Root.appendChild(node);
node = Createnode.createElement('filename');
node.appendChild(Createnode.createTextNode(str{1}));
Root.appendChild(node);
source_node = Createnode.createElement('source');
Root.appendChild(source_node);
node = Createnode.createElement('database');
node.appendChild(Createnode.createTextNode('MS COCO-Text'));
source_node.appendChild(node);
node = Createnode.createElement('annotation');
node.appendChild(Createnode.createTextNode('MS COCO-Text 2014'));
source_node.appendChild(node);
node=Createnode.createElement('image');
node.appendChild(Createnode.createTextNode('NULL'));
source_node.appendChild(node);
node=Createnode.createElement('flickrid');
node.appendChild(Createnode.createTextNode('NULL'));
source_node.appendChild(node);
owner_node=Createnode.createElement('owner');
Root.appendChild(owner_node);
node=Createnode.createElement('flickrid');
node.appendChild(Createnode.createTextNode('NULL'));
owner_node.appendChild(node);
node=Createnode.createElement('name');
node.appendChild(Createnode.createTextNode('ligen'));
owner_node.appendChild(node);
size_node=Createnode.createElement('size');
Root.appendChild(size_node);
node=Createnode.createElement('width');
node.appendChild(Createnode.createTextNode(num2str(info.Width)));
size_node.appendChild(node);
node=Createnode.createElement('height');
node.appendChild(Createnode.createTextNode(num2str(info.Height)));
size_node.appendChild(node);
node=Createnode.createElement('depth');
node.appendChild(Createnode.createTextNode(num2str(info.BitDepth / 8)));
size_node.appendChild(node);
node=Createnode.createElement('segmented');
node.appendChild(Createnode.createTextNode('0'));
Root.appendChild(node);
object_node=Createnode.createElement('object');
Root.appendChild(object_node);
node=Createnode.createElement('name');
node.appendChild(Createnode.createTextNode(str{2}));
object_node.appendChild(node);
node=Createnode.createElement('pose');
node.appendChild(Createnode.createTextNode('Unspecified'));
object_node.appendChild(node);
node=Createnode.createElement('truncated');
node.appendChild(Createnode.createTextNode('0'));
object_node.appendChild(node);
node=Createnode.createElement('difficult');
node.appendChild(Createnode.createTextNode('0'));
object_node.appendChild(node);
bndbox_node=Createnode.createElement('bndbox');
object_node.appendChild(bndbox_node);
node=Createnode.createElement('xmin');
node.appendChild(Createnode.createTextNode(num2str(str{3})));
bndbox_node.appendChild(node);
node=Createnode.createElement('ymin');
node.appendChild(Createnode.createTextNode(num2str(str{4})));
bndbox_node.appendChild(node);
node=Createnode.createElement('xmax');
node.appendChild(Createnode.createTextNode(num2str(str{5})));
bndbox_node.appendChild(node);
node=Createnode.createElement('ymax');
node.appendChild(Createnode.createTextNode(num2str(str{6})));
bndbox_node.appendChild(node);
coco(xmlname) = Createnode;
end
end
end
fclose(fidin);
keyss = keys(coco);
for i = 1:length(keyss)
xmlwrite([xmlpath_new '/' keyss{i}], coco(keyss{i}));
end
end
#coding:utf-8
from PIL import Image
from xml.dom.minidom import Document
import os
def main():
imgpath = 'JPEGImages/'
txtpath = 'images.annotations'
xmlpath_new = 'Annotations/'
coco = {}
# 得到图像的标注信息
file_object = open(txtpath,'rU')
try:
for line in file_object:
line = line.rstrip('\n')
strs = line.split(' ')
print strs[0]
foldername = 'VOC2007'
# 用xml替换jpg,得到同名文件
xmlname = strs[0].replace('.jpg','.xml')
info = Image.open(imgpath + strs[0])
# read image size
(width,height) = info.size
strs[2] = max(float(strs[2]), 1)
strs[3] = max(float(strs[3]), 1)
strs[4] = min(float(strs[4]), width);
strs[5] = min(float(strs[5]), height);
# 过滤异常
if strs[2] >= strs[4] or strs[3] >= strs[5] or strs[2] <=0 or strs[3] <= 0 or strs[4] > width or strs[5] > height:
continue
if os.path.exists(imgpath + strs[0]):
if xmlname in coco:
Createnode = coco[xmlname]
object_node = Createnode.createElement('object')
Root = Createnode.getElementsByTagName('annotation')[0]
Root.appendChild(object_node)
node=Createnode.createElement('name')
node.appendChild(Createnode.createTextNode(strs[1]))
object_node.appendChild(node)
node=Createnode.createElement('pose')
node.appendChild(Createnode.createTextNode('Unspecified'))
object_node.appendChild(node)
node=Createnode.createElement('truncated')
node.appendChild(Createnode.createTextNode('0'))
object_node.appendChild(node)
node=Createnode.createElement('difficult')
node.appendChild(Createnode.createTextNode('0'))
object_node.appendChild(node)
bndbox_node=Createnode.createElement('bndbox')
object_node.appendChild(bndbox_node)
node=Createnode.createElement('xmin')
node.appendChild(Createnode.createTextNode(str(strs[2])))
bndbox_node.appendChild(node)
node=Createnode.createElement('ymin')
node.appendChild(Createnode.createTextNode(str(strs[3])))
bndbox_node.appendChild(node)
node=Createnode.createElement('xmax')
node.appendChild(Createnode.createTextNode(str(strs[4])))
bndbox_node.appendChild(node)
node=Createnode.createElement('ymax')
node.appendChild(Createnode.createTextNode(str(strs[5])))
bndbox_node.appendChild(node)
else:
Createnode=Document() #创建DOM文档对象
Root=Createnode.createElement('annotation') #创建根元素
Createnode.appendChild(Root)
# folder
folder=Createnode.createElement('folder')
folder.appendChild(Createnode.createTextNode(foldername))
Root.appendChild(folder)
# filename
filename = Createnode.createElement('filename')
filename.appendChild(Createnode.createTextNode(strs[0]))
Root.appendChild(filename)
# source
source_node = Createnode.createElement('source')
Root.appendChild(source_node)
node = Createnode.createElement('database')
node.appendChild(Createnode.createTextNode('MS COCO-Text'))
source_node.appendChild(node)
node = Createnode.createElement('annotation')
node.appendChild(Createnode.createTextNode('MS COCO-Text 2014'))
source_node.appendChild(node)
node=Createnode.createElement('image')
node.appendChild(Createnode.createTextNode('NULL'))
source_node.appendChild(node)
node=Createnode.createElement('flickrid');
node.appendChild(Createnode.createTextNode('NULL'));
source_node.appendChild(node);
# owner
owner_node=Createnode.createElement('owner')
Root.appendChild(owner_node)
node=Createnode.createElement('flickrid')
node.appendChild(Createnode.createTextNode('NULL'))
owner_node.appendChild(node)
node=Createnode.createElement('name')
node.appendChild(Createnode.createTextNode('ligen'))
owner_node.appendChild(node)
# size
size_node=Createnode.createElement('size')
Root.appendChild(size_node)
node=Createnode.createElement('width')
node.appendChild(Createnode.createTextNode(str(width)))
size_node.appendChild(node)
node=Createnode.createElement('height');
node.appendChild(Createnode.createTextNode(str(height)))
size_node.appendChild(node)
node=Createnode.createElement('depth')
node.appendChild(Createnode.createTextNode('3'))
size_node.appendChild(node)
# segmented
node=Createnode.createElement('segmented')
node.appendChild(Createnode.createTextNode('0'))
Root.appendChild(node)
# object
object_node=Createnode.createElement('object')
Root.appendChild(object_node)
node=Createnode.createElement('name')
node.appendChild(Createnode.createTextNode(strs[1]))
object_node.appendChild(node)
node=Createnode.createElement('pose')
node.appendChild(Createnode.createTextNode('Unspecified'))
object_node.appendChild(node)
node=Createnode.createElement('truncated')
node.appendChild(Createnode.createTextNode('0'))
object_node.appendChild(node)
node=Createnode.createElement('difficult')
node.appendChild(Createnode.createTextNode('0'))
object_node.appendChild(node)
bndbox_node=Createnode.createElement('bndbox')
object_node.appendChild(bndbox_node)
node=Createnode.createElement('xmin')
node.appendChild(Createnode.createTextNode(str(strs[2])))
bndbox_node.appendChild(node)
node=Createnode.createElement('ymin')
node.appendChild(Createnode.createTextNode(str(strs[3])))
bndbox_node.appendChild(node)
node=Createnode.createElement('xmax')
node.appendChild(Createnode.createTextNode(str(strs[4])))
bndbox_node.appendChild(node)
node=Createnode.createElement('ymax')
node.appendChild(Createnode.createTextNode(str(strs[5])))
bndbox_node.appendChild(node)
coco[xmlname] = Createnode
finally:
file_object.close()
print 'begin load xml...'
for key in coco:
print key
f = open(xmlpath_new + key,'w')
f.write(coco[key].toprettyxml(indent = '\t'))
f.close()
if __name__ == "__main__":
main()
(1)matlib创建文档节点及根元素的方式:
Createnode = com.mathworks.xml.XMLUtils.createDocument('annotation');
Root = Createnode.getDocumentElement;
node = Createnode.createElement('folder');
node.appendChild(Createnode.createTextNode(foldername));
Root.appendChild(node);
python创建文档节点及根元素的方式:
Createnode=Document() #创建DOM文档对象
Root=Createnode.createElement('annotation') #创建根元素
Createnode.appendChild(Root)
# folder
folder=Createnode.createElement('folder')
folder.appendChild(Createnode.createTextNode(foldername))
Root.appendChild(folder)
(2)matlib获取文档已存在的根元素的方式:
Createnode = coco(xmlname);
object_node = Createnode.createElement('object');
Root = Createnode.getDocumentElement;
Root.appendChild(object_node);
python获取文档已存在的根元素的方式:
Createnode = coco[xmlname]
object_node = Createnode.createElement('object')
Root = Createnode.getElementsByTagName('annotation')[0]
Root.appendChild(object_node)
1)matlib中list的索引是从1开始,而python中是从0开始
2)python中str是关键字
总结:
其实python转xml并不是什么新东西。写这篇博文的目的,在于纪念这次思考问题的方式。
在实验环境不能够满足要求时,例如本次缺少matlib,考虑时间成本,是为了这部分代码下载matlib还是重写呢?衡量二者的复杂度和时间成本,选择了重写。