faster-rcnn:制作自己的数据集

最新推荐文章于 2022-10-09 00:01:23 发布

jstzwjr

最新推荐文章于 2022-10-09 00:01:23 发布

阅读量893

点赞数

分类专栏：目标检测

本文链接：https://blog.csdn.net/qq_17127427/article/details/77001627

版权

目标检测专栏收录该内容

8 篇文章 0 订阅

订阅专栏

我用到的工具有：

1.DrawBox.exe，基于opencv，按住鼠标左键框出目标区域，按下1则保存为1类，2则为2类（我只用到两个类，所以只判断2个键），不按任何键则不作任何操作，空格键表示当前图片处理完毕，开始处理下一张图片，标签信息保存在当前路径下的trainval.txt里。

txt内容格式如下：

#include "opencv.hpp"
using namespace cv;
#include <iostream>
using namespace std;
#include <fstream>
#include <sstream>
#include <iomanip>

string getFileName(int i, string extend = ".jpg", int len = 6)
{
	stringstream s;
	string str;
	s << setw(len) << setfill('0') << i;
	s >> str;
	str += extend;
	return str;
}

fstream f;
cv::Mat org, dst, img, tmp;


Point pre_pt(-1, -1);   //鼠标按下时坐标
Point cur_pt(-1, -1);   //鼠标松开时坐标


void on_mouse(int event, int x, int y, int flags, void *ustc)//event鼠标事件代号，x,y鼠标坐标，flags拖拽和键盘操作的代号  
{
	char temp[16];
	if (event == CV_EVENT_LBUTTONDOWN)//左键按下，读取初始坐标，并在图像上该点处划圆  
	{
		org.copyTo(img);//将原始图片复制到img中  
		sprintf(temp, "(%d,%d)", x, y);
		pre_pt = Point(x, y);
		putText(img, temp, pre_pt, FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 0, 255), 1, 8);//在窗口上显示坐标  
		circle(img, pre_pt, 2, Scalar(255, 0, 0, 0), CV_FILLED, CV_AA, 0);//划圆  
		imshow("img", img);
	}
	else if (event == CV_EVENT_MOUSEMOVE && !(flags & CV_EVENT_FLAG_LBUTTON))//左键没有按下的情况下鼠标移动的处理函数  
	{
// 		img.copyTo(tmp);//将img复制到临时图像tmp上，用于显示实时坐标  
// 		sprintf(temp, "(%d,%d)", x, y);
// 		cur_pt = Point(x, y);
// 		putText(tmp, temp, cur_pt, FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 0, 255));//只是实时显示鼠标移动的坐标  
// 		imshow("img", tmp);
	}
	else if (event == CV_EVENT_MOUSEMOVE && (flags & CV_EVENT_FLAG_LBUTTON))//左键按下时，鼠标移动，则在图像上划矩形  
	{
		img.copyTo(tmp);
		sprintf(temp, "(%d,%d)", x, y);
		cur_pt = Point(x, y);
		putText(tmp, temp, cur_pt, FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 0, 255));
		rectangle(tmp, pre_pt, cur_pt, Scalar(0, 255, 0, 0), 1, 8, 0);//在临时图像上实时显示鼠标拖动时形成的矩形  
		imshow("img", tmp);
	}
	else if (event == CV_EVENT_LBUTTONUP)//左键松开，将在图像上划矩形  
	{
		org.copyTo(img);
		sprintf(temp, "(%d,%d)", x, y);
		cur_pt = Point(x, y);
		putText(img, temp, cur_pt, FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 0, 255));
		circle(img, pre_pt, 2, Scalar(255, 0, 0, 0), CV_FILLED, CV_AA, 0);
		rectangle(img, pre_pt, cur_pt, Scalar(0, 255, 0, 0), 1, 8, 0);//根据初始点和结束点，将矩形画到img上  
		imshow("img", img);
		img.copyTo(tmp);
		//截取矩形包围的图像，并保存到dst中  
		int width = abs(pre_pt.x - cur_pt.x);
		int height = abs(pre_pt.y - cur_pt.y);
		
		if (width == 0 || height == 0)
		{
			printf("width == 0 || height == 0");
			return;
		}
		//dst = org(Rect(min(cur_pt.x, pre_pt.x), min(cur_pt.y, pre_pt.y), width, height));
		//namedWindow("dst");
		//imshow("dst", dst);

		cout << pre_pt << " " << cur_pt << endl;	
		
	}
}

int fileNum = 2000;
int iter = 2151;

/**
  *  @brief  将图片另存为VOC格式，文件名为6为数，后缀为jpg  
  *  @param[in]  input为输入图片路径，output为输出图片路径
  *  @Return:    
  */
void saveImgAsFormatVoc(string input="F:\\徐州\\360\\07\\20170701191815\\",string output="F:\\徐州\\360\\07\\07\\")
{
	Directory dir;
	vector<string> filenames = dir.GetListFiles(input, "*.tiff", false);
	for (auto it = filenames.begin(); it != filenames.end(); ++it)
	{
		//cout << *it << endl;
		Mat src = imread(input + *it);
		imwrite(output + getFileName(iter++), src);
	}
	cout << iter << endl;
}


/**
  *  @brief  对图片进行标注，框出目标区域，并输入类别
  *          图片名未按标准格式命名 000000.jpg  000001.jpg格式
  *  @param[in]  path为输入图片路径，txtPath为保存标注的txt路径
  *  @Return:    
  */
void drawBoxes(string path = "F:\\徐州\\360\\07\\07\\", string txtPath = "trainval.txt")
{
	Directory dir;
	f.open(txtPath, ios::app);

	vector<string> filenames = dir.GetListFiles(path, "*.jpg", false);
	cout << filenames.size() << endl;
	for (int i = 0; i < filenames.size(); ++i)
	{
		namedWindow("img");
		setMouseCallback("img", on_mouse, 0);
		org = imread(path + filenames[i]);
		org.copyTo(img);
		org.copyTo(tmp);
		imshow("img", org);
		int res = 0;
		string label;
		while (res = waitKey())
		{
			if (res == 49)
			{
				label = "wheel";
				cout << label << endl;
				f << filenames[i] << " " << label << " " << pre_pt.x << " " << pre_pt.y << " " << cur_pt.x << " " << cur_pt.y << endl;
			}
			else if (res == 50)
			{
				label = "defect";
				cout << label << endl;
				f << filenames[i] << " " << label << " " << pre_pt.x << " " << pre_pt.y << " " << cur_pt.x << " " << cur_pt.y << endl;
			}
			if (res == 32)
			{
				destroyWindow("img");
				break;
			}
		}
	}

	f.close();
}

/**
  *  @brief  对图片进行标注，框出目标区域，并输入类别
  *          图片名按标准格式命名
  *  @param[in]  start:从特定序号图片开始标注(10000张图片分批次标准，下次标注只需要设置start值接着标注即可)
              path为输入图片路径，txtPath为保存标注的txt路径
  *  @Return:
*/
void drawBoxes(int start = 0, string path = "F:\\徐州\\360\\07\\07\\", string txtPath = "F:\\徐州\\360\\07\\07\\trainval.txt")
{
	Directory dir;
	f.open(txtPath, ios::app);

	for (int i = start; i < iter; ++i)
	{
		string filename = getFileName(i);
		cout << filename << endl;
		namedWindow("img");
		setMouseCallback("img", on_mouse, 0);
		//org = imread(path + filenames[i]);
		org = imread(path + filename);
		org.copyTo(img);
		org.copyTo(tmp);
		imshow("img", org);
		int res = 0;
		string label;
		while (res = waitKey())
		{
			if (res == 49)
			{
				label = "wheel";
				cout << label << endl;
				f << filename << " " << label << " " << pre_pt.x << " " << pre_pt.y << " " << cur_pt.x << " " << cur_pt.y << endl;
			}
			else if (res == 50)
			{
				label = "defect";
				cout << label << endl;
				f << filename << " " << label << " " << pre_pt.x << " " << pre_pt.y << " " << cur_pt.x << " " << cur_pt.y << endl;
			}
			if (res == 32)
			{
				destroyWindow("img");
				break;
			}
		}
	}

	f.close();
}

int main()
{
	drawBoxes(81);
	return 0;
}

2.txt2xml.py 将txt文件转换成voc格式的xml文件

# -*- coding: utf-8 -*-
"""
Created on Wed Aug 09 13:31:44 2017

@author: Asus
"""

import xml.etree.cElementTree as et
import cv2

#插入新物体 root为根节点，name为目标类别
def insertObj(root,name0,xmin0,ymin0,xmax0,ymax0):
    objs=root.findall('object')
    if len(objs):
        objs[-1].tail='\n\t'
    else:
        seg=root.findall('segmented')[0]
        seg.tail='\n\t'
    
    obj=et.SubElement(root,'object')
    obj.text='\n\t\t'
    obj.tail='\n'
    
    name=et.SubElement(obj,'name')
    name.text=name0
    name.tail='\n\t\t'
    
    pose=et.SubElement(obj,'pose')
    pose.text='Unspecified'
    pose.tail='\n\t\t'
    
    truncated=et.SubElement(obj,'truncated')
    truncated.text='0'
    truncated.tail='\n\t\t'
    
    difficult=et.SubElement(obj,'difficult')
    difficult.text='0'
    difficult.tail='\n\t\t'
    
    bndbox=et.SubElement(obj,'bndbox')
    bndbox.text='\n\t\t\t'
    bndbox.tail='\n\t'
    
    xmin=et.SubElement(bndbox,'xmin')
    xmin.text=xmin0
    xmin.tail='\n\t\t\t'
    
    ymin=et.SubElement(bndbox,'ymin')
    ymin.text=ymin0
    ymin.tail='\n\t\t\t'
    
    xmax=et.SubElement(bndbox,'xmax')
    xmax.text=xmax0
    xmax.tail='\n\t\t\t'
    
    ymax=et.SubElement(bndbox,'ymax')
    ymax.text=ymax0
    ymax.tail='\n\t\t'
    
#创建xml文件，添加头部，filename为文件名，.jpg    
def createXml(filename0,width0,height0,channel0):
    root=et.Element('annotation')
    root.text='\n\t'
    
    folder=et.SubElement(root,'folder')
    folder.text='VOC2007'
    folder.tail='\n\t'
    
    filename=et.SubElement(root,'filename')
    filename.text=filename0
    filename.tail='\n\t'
    
    source=et.SubElement(root,'source')
    source.text='\n\t\t'
    source.tail='\n\t'
    
    database=et.SubElement(source,'database')
    database.text='The VOC2007 Database'
    database.tail='\n\t\t'
    
    annotation=et.SubElement(source,'annotation')
    annotation.text='PASCAL VOC2007'
    annotation.tail='\n\t\t'
    
    image=et.SubElement(source,'image')
    image.text='flickr'
    image.tail='\n\t\t'
    
    flickrid=et.SubElement(source,'flickrid')
    flickrid.text='0'
    flickrid.tail='\n\t'
    
    owner=et.SubElement(root,'owner')
    owner.text='\n\t\t'
    owner.tail='\n\t'
    
    flickrid=et.SubElement(owner,'flickrid')
    flickrid.text='0'
    flickrid.tail='\n\t\t'
    
    name=et.SubElement(owner,'name')
    name.text='tycho'
    name.tail='\n\t'
    
    size=et.SubElement(root,'size')
    size.text='\n\t\t'
    size.tail='\n\t'
    
    width=et.SubElement(size,'width')
    width.text=str(width0)
    width.tail='\n\t\t'
    
    height=et.SubElement(size,'height')
    height.text=str(height0)
    height.tail='\n\t\t'
    
    depth=et.SubElement(size,'depth')
    depth.text=str(channel0)
    depth.tail='\n\t'
    
    segmented=et.SubElement(root,'segmented')
    segmented.text='0'
    segmented.tail='\n'
    
    return root

global root
xmlpath='Annotation/'
txtpath='07/'
jpgpath='07/'

if __name__=='__main__':
    with open(txtpath+'trainval.txt') as f:
        curFileName='xxxxxx.jpg'
        for line in f.readlines():
            line=line.strip('\n').split(' ')
            filename=line[0]
            img=cv2.imread(jpgpath+filename)
            if filename!=curFileName:
                if curFileName!='xxxxxx.jpg':
                    et.ElementTree(root).write(xmlpath+curFileName[:-3]+'xml')
                curFileName=filename
                root=createXml(filename,img.shape[1],img.shape[0],img.shape[2])
            insertObj(root,line[1],line[2],line[3],line[4],line[5])

       et.ElementTree(root).write(xmlpath+curFileName[:-3]+'xml')

jstzwjr

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
faster-rcnn:制作自己的数据集

我用到的工具有：1.DrawBox.exe，基于opencv，按住鼠标左键框出目标区域，按下1则保存为1类，2则为2类（我只用到两个类，所以只判断2个键），我只用到两个类别，不按任何键则不作任何操作，空格键表示当前图片处理完毕，开始处理下一张图片，标签信息保存在当前路径下的trainval.txt里。txt内容格式如下：#include "opencv.hpp"using
复制链接

扫一扫