我用到的工具有:
1.DrawBox.exe,基于opencv,按住鼠标左键框出目标区域,按下1则保存为1类,2则为2类(我只用到两个类,所以只判断2个键),不按任何键则不作任何操作,空格键表示当前图片处理完毕,开始处理下一张图片,标签信息保存在当前路径下的trainval.txt里。
txt内容格式如下:
#include "opencv.hpp"
using namespace cv;
#include <iostream>
using namespace std;
#include <fstream>
#include <sstream>
#include <iomanip>
string getFileName(int i, string extend = ".jpg", int len = 6)
{
stringstream s;
string str;
s << setw(len) << setfill('0') << i;
s >> str;
str += extend;
return str;
}
fstream f;
cv::Mat org, dst, img, tmp;
Point pre_pt(-1, -1); //鼠标按下时坐标
Point cur_pt(-1, -1); //鼠标松开时坐标
void on_mouse(int event, int x, int y, int flags, void *ustc)//event鼠标事件代号,x,y鼠标坐标,flags拖拽和键盘操作的代号
{
char temp[16];
if (event == CV_EVENT_LBUTTONDOWN)//左键按下,读取初始坐标,并在图像上该点处划圆
{
org.copyTo(img);//将原始图片复制到img中
sprintf(temp, "(%d,%d)", x, y);
pre_pt = Point(x, y);
putText(img, temp, pre_pt, FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 0, 255), 1, 8);//在窗口上显示坐标
circle(img, pre_pt, 2, Scalar(255, 0, 0, 0), CV_FILLED, CV_AA, 0);//划圆
imshow("img", img);
}
else if (event == CV_EVENT_MOUSEMOVE && !(flags & CV_EVENT_FLAG_LBUTTON))//左键没有按下的情况下鼠标移动的处理函数
{
// img.copyTo(tmp);//将img复制到临时图像tmp上,用于显示实时坐标
// sprintf(temp, "(%d,%d)", x, y);
// cur_pt = Point(x, y);
// putText(tmp, temp, cur_pt, FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 0, 255));//只是实时显示鼠标移动的坐标
// imshow("img", tmp);
}
else if (event == CV_EVENT_MOUSEMOVE && (flags & CV_EVENT_FLAG_LBUTTON))//左键按下时,鼠标移动,则在图像上划矩形
{
img.copyTo(tmp);
sprintf(temp, "(%d,%d)", x, y);
cur_pt = Point(x, y);
putText(tmp, temp, cur_pt, FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 0, 255));
rectangle(tmp, pre_pt, cur_pt, Scalar(0, 255, 0, 0), 1, 8, 0);//在临时图像上实时显示鼠标拖动时形成的矩形
imshow("img", tmp);
}
else if (event == CV_EVENT_LBUTTONUP)//左键松开,将在图像上划矩形
{
org.copyTo(img);
sprintf(temp, "(%d,%d)", x, y);
cur_pt = Point(x, y);
putText(img, temp, cur_pt, FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 0, 255));
circle(img, pre_pt, 2, Scalar(255, 0, 0, 0), CV_FILLED, CV_AA, 0);
rectangle(img, pre_pt, cur_pt, Scalar(0, 255, 0, 0), 1, 8, 0);//根据初始点和结束点,将矩形画到img上
imshow("img", img);
img.copyTo(tmp);
//截取矩形包围的图像,并保存到dst中
int width = abs(pre_pt.x - cur_pt.x);
int height = abs(pre_pt.y - cur_pt.y);
if (width == 0 || height == 0)
{
printf("width == 0 || height == 0");
return;
}
//dst = org(Rect(min(cur_pt.x, pre_pt.x), min(cur_pt.y, pre_pt.y), width, height));
//namedWindow("dst");
//imshow("dst", dst);
cout << pre_pt << " " << cur_pt << endl;
}
}
int fileNum = 2000;
int iter = 2151;
/**
* @brief 将图片另存为VOC格式,文件名为6为数,后缀为jpg
* @param[in] input为输入图片路径,output为输出图片路径
* @Return:
*/
void saveImgAsFormatVoc(string input="F:\\徐州\\360\\07\\20170701191815\\",string output="F:\\徐州\\360\\07\\07\\")
{
Directory dir;
vector<string> filenames = dir.GetListFiles(input, "*.tiff", false);
for (auto it = filenames.begin(); it != filenames.end(); ++it)
{
//cout << *it << endl;
Mat src = imread(input + *it);
imwrite(output + getFileName(iter++), src);
}
cout << iter << endl;
}
/**
* @brief 对图片进行标注,框出目标区域,并输入类别
* 图片名未按标准格式命名 000000.jpg 000001.jpg格式
* @param[in] path为输入图片路径,txtPath为保存标注的txt路径
* @Return:
*/
void drawBoxes(string path = "F:\\徐州\\360\\07\\07\\", string txtPath = "trainval.txt")
{
Directory dir;
f.open(txtPath, ios::app);
vector<string> filenames = dir.GetListFiles(path, "*.jpg", false);
cout << filenames.size() << endl;
for (int i = 0; i < filenames.size(); ++i)
{
namedWindow("img");
setMouseCallback("img", on_mouse, 0);
org = imread(path + filenames[i]);
org.copyTo(img);
org.copyTo(tmp);
imshow("img", org);
int res = 0;
string label;
while (res = waitKey())
{
if (res == 49)
{
label = "wheel";
cout << label << endl;
f << filenames[i] << " " << label << " " << pre_pt.x << " " << pre_pt.y << " " << cur_pt.x << " " << cur_pt.y << endl;
}
else if (res == 50)
{
label = "defect";
cout << label << endl;
f << filenames[i] << " " << label << " " << pre_pt.x << " " << pre_pt.y << " " << cur_pt.x << " " << cur_pt.y << endl;
}
if (res == 32)
{
destroyWindow("img");
break;
}
}
}
f.close();
}
/**
* @brief 对图片进行标注,框出目标区域,并输入类别
* 图片名按标准格式命名
* @param[in] start:从特定序号图片开始标注(10000张图片分批次标准,下次标注只需要设置start值接着标注即可)
path为输入图片路径,txtPath为保存标注的txt路径
* @Return:
*/
void drawBoxes(int start = 0, string path = "F:\\徐州\\360\\07\\07\\", string txtPath = "F:\\徐州\\360\\07\\07\\trainval.txt")
{
Directory dir;
f.open(txtPath, ios::app);
for (int i = start; i < iter; ++i)
{
string filename = getFileName(i);
cout << filename << endl;
namedWindow("img");
setMouseCallback("img", on_mouse, 0);
//org = imread(path + filenames[i]);
org = imread(path + filename);
org.copyTo(img);
org.copyTo(tmp);
imshow("img", org);
int res = 0;
string label;
while (res = waitKey())
{
if (res == 49)
{
label = "wheel";
cout << label << endl;
f << filename << " " << label << " " << pre_pt.x << " " << pre_pt.y << " " << cur_pt.x << " " << cur_pt.y << endl;
}
else if (res == 50)
{
label = "defect";
cout << label << endl;
f << filename << " " << label << " " << pre_pt.x << " " << pre_pt.y << " " << cur_pt.x << " " << cur_pt.y << endl;
}
if (res == 32)
{
destroyWindow("img");
break;
}
}
}
f.close();
}
int main()
{
drawBoxes(81);
return 0;
}
2.txt2xml.py 将txt文件转换成voc格式的xml文件
# -*- coding: utf-8 -*-
"""
Created on Wed Aug 09 13:31:44 2017
@author: Asus
"""
import xml.etree.cElementTree as et
import cv2
#插入新物体 root为根节点,name为目标类别
def insertObj(root,name0,xmin0,ymin0,xmax0,ymax0):
objs=root.findall('object')
if len(objs):
objs[-1].tail='\n\t'
else:
seg=root.findall('segmented')[0]
seg.tail='\n\t'
obj=et.SubElement(root,'object')
obj.text='\n\t\t'
obj.tail='\n'
name=et.SubElement(obj,'name')
name.text=name0
name.tail='\n\t\t'
pose=et.SubElement(obj,'pose')
pose.text='Unspecified'
pose.tail='\n\t\t'
truncated=et.SubElement(obj,'truncated')
truncated.text='0'
truncated.tail='\n\t\t'
difficult=et.SubElement(obj,'difficult')
difficult.text='0'
difficult.tail='\n\t\t'
bndbox=et.SubElement(obj,'bndbox')
bndbox.text='\n\t\t\t'
bndbox.tail='\n\t'
xmin=et.SubElement(bndbox,'xmin')
xmin.text=xmin0
xmin.tail='\n\t\t\t'
ymin=et.SubElement(bndbox,'ymin')
ymin.text=ymin0
ymin.tail='\n\t\t\t'
xmax=et.SubElement(bndbox,'xmax')
xmax.text=xmax0
xmax.tail='\n\t\t\t'
ymax=et.SubElement(bndbox,'ymax')
ymax.text=ymax0
ymax.tail='\n\t\t'
#创建xml文件,添加头部,filename为文件名,.jpg
def createXml(filename0,width0,height0,channel0):
root=et.Element('annotation')
root.text='\n\t'
folder=et.SubElement(root,'folder')
folder.text='VOC2007'
folder.tail='\n\t'
filename=et.SubElement(root,'filename')
filename.text=filename0
filename.tail='\n\t'
source=et.SubElement(root,'source')
source.text='\n\t\t'
source.tail='\n\t'
database=et.SubElement(source,'database')
database.text='The VOC2007 Database'
database.tail='\n\t\t'
annotation=et.SubElement(source,'annotation')
annotation.text='PASCAL VOC2007'
annotation.tail='\n\t\t'
image=et.SubElement(source,'image')
image.text='flickr'
image.tail='\n\t\t'
flickrid=et.SubElement(source,'flickrid')
flickrid.text='0'
flickrid.tail='\n\t'
owner=et.SubElement(root,'owner')
owner.text='\n\t\t'
owner.tail='\n\t'
flickrid=et.SubElement(owner,'flickrid')
flickrid.text='0'
flickrid.tail='\n\t\t'
name=et.SubElement(owner,'name')
name.text='tycho'
name.tail='\n\t'
size=et.SubElement(root,'size')
size.text='\n\t\t'
size.tail='\n\t'
width=et.SubElement(size,'width')
width.text=str(width0)
width.tail='\n\t\t'
height=et.SubElement(size,'height')
height.text=str(height0)
height.tail='\n\t\t'
depth=et.SubElement(size,'depth')
depth.text=str(channel0)
depth.tail='\n\t'
segmented=et.SubElement(root,'segmented')
segmented.text='0'
segmented.tail='\n'
return root
global root
xmlpath='Annotation/'
txtpath='07/'
jpgpath='07/'
if __name__=='__main__':
with open(txtpath+'trainval.txt') as f:
curFileName='xxxxxx.jpg'
for line in f.readlines():
line=line.strip('\n').split(' ')
filename=line[0]
img=cv2.imread(jpgpath+filename)
if filename!=curFileName:
if curFileName!='xxxxxx.jpg':
et.ElementTree(root).write(xmlpath+curFileName[:-3]+'xml')
curFileName=filename
root=createXml(filename,img.shape[1],img.shape[0],img.shape[2])
insertObj(root,line[1],line[2],line[3],line[4],line[5])
et.ElementTree(root).write(xmlpath+curFileName[:-3]+'xml')