数据整理
- 解析视频
ffmpeg -i 0b21f0579d247c855e05405d3ed805c1#201205251240.flv 1/1_%03d.jpg
ffmpeg解析视频,其中-i后跟视频名称,之后是解析的单帧图形保存路径,以及图像命名格式
def genDir():
base = '/home/hai/hai/fish_data/FISHCLEF2015/FishCLEF2015-TrainingDataset/Training Dataset/Videos/'
i = 2
for j in range(19):
file_name = base + str(i)
os.mkdir(file_name)
i = i + 1
批量创建文件夹,用来存储19个视频解析的训练图像
利用shell文件批量解析视频成单帧图像
#read -p "input path:" FilePath;#输入路径,这里选择自己定义路径
function getAllFiles()
{
fileList=`ls $FilePath`;#获得该路径下的所有文件名
i=1
num=1
for filelist in $fileList:
do
ffmpeg -i $filelist $i-%03d.jpg #解析视频语句,以及图像的命名
i=$[$i+$num]
done
}
FilePath="/home/hai/hai/fish_data/FISHCLEF2015/FishCLEF2015-TrainingDataset/TrainingDataset/Videos"
cd $FilePath;
getAllFiles;
然后将不同的视频图像存入相应顺序的文件夹,但是从下面才开始正式有用的,上面是无用功,哈哈哈!!!
# -*- coding=utf-8 -*-
import xml.dom.minidom
import os
import cv2
def file_name(file_dir):
L = []
for root, dirs, files in os.walk(file_dir):
L.append(files)
return L
def file_name1(file_dir):
L = []
for root, dirs, files in os.walk(file_dir):
for file in files:
if os.path.splitext(file)[1] == '.flv':
L.append(os.path.splitext(file)[0])
return L
# 其中os.path.splitext()函数将路径拆分为文件名+扩展名
fish_name = ['Dascyllus Reticulatus','Chaetodon Lunulatus','Pempheris Vanicolensis','Dascyllus Aruanus','Plectrogly-Phidodon Dickii',
'Amphiprion Clarkii','Chaetodon Trifascialis','Acanthurus Nigrofuscus','Chromis Chrysura','Hemigymnus Melapterus',
'Myripristis Kuntee','Chaetodon Speculum','Abudefduf Vaigiensis','Neoglyphidodon Nigroris','Zebrasoma Scopas']
# 其中os.path.splitext()函数将路径拆分为文件名+扩展名
file_dir = '/home/hai/hai/fish_data/FISHCLEF2015/FishCLEF2015-TrainingDataset/TrainingDataset/Videos'
L = file_name1(file_dir)
f = open('/home/hai/hai/fish_data/fish_train_0901.txt','w')
for iter in range(0,len(L)):
#打开xml文档
dom = xml.dom.minidom.parse('/home/hai/hai/fish_data/FISHCLEF2015/FishCLEF2015-TrainingDataset/TrainingDataset/Ground Truth XML' + '/' + L[iter]+'.xml')
#得到文档元素对象
root = dom.documentElement
itemlist = root.getElementsByTagName('frame')
for item in itemlist:
id = item.getAttribute('id')
obect = item.getElementsByTagName('object')
for i in range(0,len(obect)):
h= obect[i].getAttribute('h')
w= obect[i].getAttribute('w')
x= obect[i].getAttribute('x')
y= obect[i].getAttribute('y')
fish_species = obect[i].getAttribute('fish_species')
print str(iter+1) + '_' + id +'.jpg'
print h,w,x,y,fish_species
if fish_species == fish_name[0] or fish_species == fish_name[1] or fish_species == fish_name[2] or fish_species == fish_name[3] or fish_species == fish_name[4] or fish_species == fish_name[5] or fish_species == fish_name[6] or fish_species == fish_name[7] or fish_species == fish_name[8] or fish_species == fish_name[9] or fish_species == fish_name[10] or fish_species == fish_name[11] or fish_species == fish_name[12] or fish_species == fish_name[13] or fish_species == fish_name[14]:
f.writelines(['\n' + str(iter+1) + '_' + id +'.jpg', ' ', fish_species, ' ', x, ' ', y, ' ', str(int(x)+int(w)), ' ', str(int(y)+int(h))])
f.close()
通过这段代码将训练集中的ground truth的xml文件分解,并写入到fish_train_0901.txt文件中。
用下面的代码将所有的视频一次提取成单帧图像,存入h_train文件夹中。
import os
import cv2
def file_name(file_dir):