深度学习Caffe实战笔记采集数据并预处理

最新推荐文章于 2022-01-17 11:14:32 发布

与若安好

最新推荐文章于 2022-01-17 11:14:32 发布

阅读量253

点赞数

文章标签： caffe 深度学习

本文链接：https://blog.csdn.net/qq_22329127/article/details/111825316

版权

深度学习Caffe实战笔记采集数据并预处理

采集数据并进行分类

针对自己的项目，将收集到的数据分为5类，并生成清单文件：

使用OpenCV 采集数据；

#include <opencv2/opencv.hpp>
#include <opencv2/highgui/highgui_c.h>
#include <iostream>

using namespace std;
using namespace cv;
//工程目录下的temp文件夹里，与.sln同级的temp文件
string writePath;

int CaptureImage(int num ,string people) {
	//选择路径
	 writePath = "****";

	}

	VideoCapture capture;
	capture.open(1);  //调取外部摄像头
	//capture.open(0);  //调取系统默认摄像头

	//定义摄像头的宽高
	float width = 320;
	float height = 240;
	capture.set(CAP_PROP_FRAME_WIDTH, width);
	capture.set(CAP_PROP_FRAME_HEIGHT, height);
	string name;
	namedWindow(people, CV_WINDOW_AUTOSIZE);
	int i = 0;
	int j = 30000;
	Mat frame; 

	while (1){
	capture >> frame;  //提前测试摄像头
	imshow(people, frame);//显示当前图片
	if (waitKey(0) == 32){ //按空格退出当前循环
		break;
	}
	}

	waitKey(0);
	while (j--) {
	
		if (j %60!= 0){
			continue;
		}
		capture >> frame;
		
			name = writePath + people+to_string(i) + ".jpg";
			imwrite(name, frame);
			cout << name << endl;
			i++;
	}
	waitKey(0);
return 0;
}

将拍摄的5类数据按照1：3的比例分为训练集和测试集 ，基于python平台脚本用来生成。

# -*- coding:UTF-8 -*-     #在cmd窗口执行时添加在首行
import os, random, shutil
import sys, getopt
import string

def getDir(argv):
	trainPath = 'train_dir'  #训练集目录
	testPath = 'val_dir'     #测试集目录
	rate=0.25				 #比例为0.25
	try:
		opts, args = getopt.getopt(argv,"hi:o:r:",["ipath=","opath=","rate="])
	except getopt.GetoptError:
		print('<cmd> -i <trainPath> -o <testPath> -r <rate>')
		sys.exit(2)
	for opt, arg in opts:
		if opt == '-h':
			print('<cmd> -i <trainPath> -o <testPath>')
			sys.exit()
		elif opt in ("-i", "--ipath"):
			trainPath = arg
		elif opt in ("-o", "--opath"):
			testPath = arg
		elif opt in ("-r", "--rate"):
			rate = arg
	return trainPath, testPath, rate

def moveFile(trainDir, testDir, rate):
	rate=float(rate)
	pathDir = os.listdir(trainDir)
	filenumber=len(pathDir)
	print("filenumber = ", filenumber)
	picknumber=int(filenumber*rate)
	print("picknumber = ", picknumber)
	sample = random.sample(pathDir, picknumber)
	for name in sample:
		shutil.move(os.path.join(trainDir,name), os.path.join(testDir,name))
	return

if __name__ == '__main__':
	train, test, rate = getDir(sys.argv[1:])
	if not os.path.isdir(train):
		print("输入路径不存在：", train)
		sys.exit()

	if not os.path.isdir(test):
		print("输出路径不存在：", test)
		sys.exit()
		
	tmp = float(rate)
	if tmp<=0.0 or tmp>=1.0:
		rate = 0.25

	print('输入的文件为：', train)
	print('输出的文件为：', test)
	print('捡出比例为：', rate)

	moveFile(train, test, rate)

将拍摄的5类数据分别生成 list_file文件 ，基于python平台脚本用来生成list_file文件；

import os
def generate(dir,label):
    files = os.listdir(dir)
    files.sort()
    print('****************')
    print('input :',dir)
    print('start...')
    listText = open(dir+'\\'+'list.txt','w')
    for file in files:
        fileType = os.path.split(file)
        if fileType[1] == '.txt':
            continue        
        name = file + ' ' + str(int(label)) +'\n'
        listText.write(name)
    listText.close()
    print('down!')
    print('****************')    

if __name__ == '__main__':  
    generate('normal_train',0)  //有多少个列表需要生产则调用几次

标签顺序打乱 功能；

# -*- coding:UTF-8 -*-    在cmd 窗口执行是添加在首行
import random
def ReadFileDatas():
    FileNamelist = []
    file = open('./train_list.txt','r+')   //需要打乱次序的文件
    for line in file:
        line=line.strip('\n') #删除每一行的\n
        FileNamelist.append(line)
    print('len ( FileNamelist ) = ' ,len(FileNamelist))
    file.close()
    return FileNamelist
 
def WriteDatasToFile(listInfo):
    file_handle=open('./train.txt',mode='a')
    for idx in range(len(listInfo)):
        str = listInfo[idx]
        #查找最后一个 “_”的位置
        ndex = str.rfind('_')
        #print('ndex = ',ndex)
        #截取字符串
        str_houZhui = str[(ndex+1):]
        #print('str_houZhui = ',str_houZhui)
        str_Result = str  + '\n'           #+ str_houZhui+'\n'
        print(str_Result)
        file_handle.write(str_Result)
    file_handle.close()
 
if __name__ == "__main__":
    listFileInfo = ReadFileDatas()
    #打乱列表中的顺序
    random.shuffle(listFileInfo)
    WriteDatasToFile(listFileInfo)

生成LMDB格式的数据集；

训练集
convert_imageset.exe \
 --resize_height=256 --resize_width=256 \  改变图片大小
 --shuffle --backend="lmdb" \              图片格式    
 train_image\\                             源图片所在文件夹
 train_shut.txt                            标签文件夹
 trainlmdb                                 生成的训练集数据目录
 测试集
convert_imageset.exe \                    
 --resize_height=256 --resize_width=256 \
 --shuffle --backend="lmdb" \
 test_image\\                             测试集图片目录
 test_shut.txt                            测试集标签数据
 testlmdb                                 生成的测试机数据目录
 pause

训练集数据格式转换为LMDB
6. 生成均值文件 ；

compute_image_mean.exe trainlmdb image_mean.binaryproto
pause

生成均值文件

与若安好

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
深度学习Caffe实战笔记采集数据并预处理

深度学习Caffe实战笔记采集数据并预处理采集数据并进行分类针对自己的项目，将收集到的数据分为5类，并生成清单文件：使用OpenCV 采集数据；#include <opencv2/opencv.hpp>#include <opencv2/highgui/highgui_c.h>#include <iostream>using namespace std;using namespace cv;//工程目录下的temp文件夹里，与.sln同级的temp
复制链接

扫一扫