github上有一个开源项目:https://github.com/jugg1024/Text-Detection-with-FRCN
基于faster rcnn做文字检测
参考链接:http://blog.csdn.net/u013250416/article/details/78457624
一.跑demo
1)下载好pre_trained model
http://pan.baidu.com/s/1dE2Ori5 Extract Code: phxk
下载好pre_trained model,就可以继续了,这个caffe_model用来做文字检测:vgg16_faster_rcnn_fine_tune_on_coco.caffemodel
然后放到
$Text-Detection-with-FRCN/model/vgg16_faster_rcnn_fine_tune_on_coco.caffemodel
2)run demo
cd $Text-Detection-with-FRCN/
./script/text_detect_demo.sh
Results are on output_img
说明:在text_detect_demo.sh当中,有一个选项:dataset,可以用已经训练好的coco.caffemodel,来测试自己的数据集,只需要修改一下数据集的路径即可
/home/xuy/code/Text-Detection-with-FRCN/py-faster-rcnn/tools/text_detect_demo.py \
--gpu 0 \
--net models/deploy.prototxt \
--model models/vgg16_faster_rcnn_fine_tune_on_coco.caffemodel \
--dataset /home/xuy/桌面/code/python/ocr/dataset/image_1000
二.训练自己的数据集:(以官方给的coco text2014为例)
1)下载coco text数据集:
cd $Text-Detection-with-FRCN/datasets/script
./fetch_dataset.sh coco-text
# download it takes long!
# ensure you have both data and label
# for coco-text label is in COCO-text.json, and data is in train2014.zip
这里可以不运行终端命令,查看该文件可知:
if [ "$1" = "coco-text" ]; then
download_file $1 COCO_Text.zip https://s3.amazonaws.com/cocotext/COCO_Text.zip 5cecfc1081b2ae7fdea75e6c9a9dec3b zip
download_file $1 train2014.zip http://msvocds.blob.core.windows.net/coco2014/train2014.zip nocheck zip
我们一共需要下载两个文件,用迅雷下载好之后,直接放到$Text-Detection-with-FRCN/datasets/coco-text里面即可
2)下载已经训练好的pre-train model
cd $Text-Detection-with-FRCN/py-faster-rcnn
./data/scripts/fetch_imagenet_models.sh
同样方法:下载好之后,放到$Text-Detection-with-FRCN/py-faster-rcnn/data/imagenet_models/
3)format the data(you should write your code here)
此时参考上面的链接:
先将下载好的数据解压,然后由于ubuntu下面没有安装matlab(如果平时不用matlab的话没必要安装)
将matlab版本的代码转化为python版本的代码即可。
改写代码如下:
#coding:utf-8
from PIL import Image
from xml.dom.minidom import Document
import os
def main():
imgpath = 'JPEGImages/'
txtpath = 'images.annotations'
xmlpath_new = 'Annotations/'
coco = {}
# 得到图像的标注信息
file_object = open(txtpath,'rU')
try:
for line in file_object:
line = line.rstrip('\n')
strs = line.split(' ')
print strs[0]
foldername = 'VOC2007'
# 用xml替换jpg,得到同名文件
xmlname = strs[0].replace('.jpg','.xml')
info = Image.open(imgpath + strs[0])
# read image size
(width,height) = info.size
strs[2] = max(float(strs[2]), 1)
strs[3] = max(float(strs[3]), 1)
strs[4] = min(float(strs[4]), width);
strs[5] = min(float(strs[5]), height);
# 过滤异常
if strs[2] >= strs[4] or strs[3] >= strs[5] or strs[2] <=0 or strs[3] <= 0 or strs[4] > width or strs[5] > height:
continue
if os.path.exists(imgpath + strs[0]):
if xmlname in coco:#如果coco里面有xmlname,也就是xml文件的后半段
Createnode = coco[xmlname]
object_node = Createnode.createElement('object')
Root = Createnode.getElementsByTagName('annotation')[0]
Root.appendChild(object_node)
node=Createnode.createElement('name')
node.appendChild(Createnode.createTextNode(strs[1]))
object_node.appendChild(node)
node=Createnode.createElement('pose')
node.appendChild(Createnode.createTextNode('Unspecified'))
object_node.appendChild(node)
node=Createnode.createElement('truncated')
node.appendChild(Createnode.createTextNode('0'))
object_node.appendChild(node)
node=Createnode.createElement('difficult')
node.appendChild(Createnode.createTextNode('0'))
object_node.appendChild(node)
bndbox_node=Createnode.createElement('bndbox')
object_node.appendChild(bndbox_node)
node=Createnode.createElement('xmin')
node.appendChild(Createnode.createTextNode(str(strs[2])))
bndbox_node.appendChild(node)
node=Createnode.createElement('ymin')
node.appendChild(Createnode.createTextNode(str(strs[3])))
bndbox_node.appendChild(node)
node=Createnode.createElement('xmax')
node.appendChild(Createnode.createTextNode(str(strs[4])))
bndbox_node.appendChild(node)
node=Createnode.createElement('ymax')
node.appendChild(Createnode.createTextNode(str(strs[5])))
bndbox_node.appendChild(node)
else:#如果没有xml文件,那么就创建xml 的node,是xml文件的前半段
Createnode=Document() #创建DOM文档对象
Root=Createnode.createElement('annotation') #创建根元素
Createnode.appendChild(Root)
# folder
folder=Createnode.createElement('folder')
folder.appendChild(Createnode.createTextNode(foldername))
Root.appendChild(folder)
# filename
filename = Createnode.createElement('filename')
filename.appendChild(Createnode.createTextNode(strs[0]))
Root.appendChild(filename)
# source
source_node = Createnode.createElement('source')
Root.appendChild(source_node)
node = Createnode.createElement('database')
node.appendChild(Createnode.createTextNode('MS COCO-Text'))
source_node.appendChild(node)
node = Createnode.createElement('annotation')
node.appendChild(Createnode.createTextNode('MS COCO-Text 2014'))
source_node.appendChild(node)
node=Createnode.createElement('image')
node.appendChild(Createnode.createTextNode('NULL'))
source_node.appendChild(node)
node=Createnode.createElement('flickrid');
node.appendChild(Createnode.createTextNode('NULL'));
source_node.appendChild(node);
# owner
owner_node=Createnode.createElement('owner')
Root.appendChild(owner_node)
node=Createnode.createElement('flickrid')
node.appendChild(Createnode.createTextNode('NULL'))
owner_node.appendChild(node)
node=Createnode.createElement('name')
node.appendChild(Createnode.createTextNode('ligen'))
owner_node.appendChild(node)
# size
size_node=Createnode.createElement('size')
Root.appendChild(size_node)
node=Createnode.createElement('width')
node.appendChild(Createnode.createTextNode(str(width)))
size_node.appendChild(node)
node=Createnode.createElement('height');
node.appendChild(Createnode.createTextNode(str(height)))
size_node.appendChild(node)
node=Createnode.createElement('depth')
node.appendChild(Createnode.createTextNode('3'))
size_node.appendChild(node)
# segmented
node=Createnode.createElement('segmented')
node.appendChild(Createnode.createTextNode('0'))
Root.appendChild(node)
# object
object_node=Createnode.createElement('object')
Root.appendChild(object_node)
node=Createnode.createElement('name')
node.appendChild(Createnode.createTextNode(strs[1]))
object_node.appendChild(node)
node=Createnode.createElement('pose')
node.appendChild(Createnode.createTextNode('Unspecified'))
object_node.appendChild(node)
node=Createnode.createElement('truncated')
node.appendChild(Createnode.createTextNode('0'))
object_node.appendChild(node)
node=Createnode.createElement('difficult')
node.appendChild(Createnode.createTextNode('0'))
object_node.appendChild(node)
bndbox_node=Createnode.createElement('bndbox')
object_node.appendChild(bndbox_node)
node=Createnode.createElement('xmin')
node.appendChild(Createnode.createTextNode(str(strs[2])))
bndbox_node.appendChild(node)
node=Createnode.createElement('ymin')
node.appendChild(Createnode.createTextNode(str(strs[3])))
bndbox_node.appendChild(node)
node=Createnode.createElement('xmax')
node.appendChild(Createnode.createTextNode(str(strs[4])))
bndbox_node.appendChild(node)
node=Createnode.createElement('ymax')
node.appendChild(Createnode.createTextNode(str(strs[5])))
bndbox_node.appendChild(node)
coco[xmlname] = Createnode
finally:
file_object.close()
print 'begin load xml...'
for key in coco:
print key
f = open(xmlpath_new + key,'w')
f.write(coco[key].toprettyxml(indent = '\t'))
f.close()
if __name__ == "__main__":
main()
在这里附上结果:*.xml
<?xml version="1.0" ?>
<annotation>
<folder>VOC2007</folder>
<filename>COCO_train2014_000000000036.jpg</filename>
<source>
<database>MS COCO-Text</database>
<annotation>MS COCO-Text 2014</annotation>
<image>NULL</image>
<flickrid>NULL</flickrid>
</source>
<owner>
<flickrid>NULL</flickrid>
<name>ligen</name>
</owner>
<size>
<width>481</width>
<height>640</height>
<depth>3</depth>
</size>
<segmented>0</segmented>
<object>
<name>text</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>431</xmin>
<ymin>181</ymin>
<xmax>452</xmax>
<ymax>215</ymax>
</bndbox>
</object>
<object>
<name>text</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>461</xmin>
<ymin>180</ymin>
<xmax>480</xmax>
<ymax>215</ymax>
</bndbox>
</object>
</annotation>
将这个文件放到/$Text-Detection-with-FRCN/datasets/coco-text/formatted_dataset/ann2voc2007.py
最后将$Text-Detection-with-FRCN/datasets/coco-text/formatted_dataset/Annotations/下面的tmp文件删除掉
4)create a softlink the formatted data to working directorry
cd $Text-Detection-with-FRCN/datasets/ ln -s coco-text train-data # $YOUR_DATA
5)training
cd $Text-Detection-with-FRCN/py-faster-rcnn/ ./experiments/scripts/faster_rcnn_end2end.sh 0 VGG16 pascal_voc
其中遇到了问题:
解决方法:
问题: pb2.text_format.Merge(f.read(), self.solver_param) AttributeError: 'module' object has no attribute 'text_format'
解决办法:pip install protobuf==2.6.0