首先需要观察数据集的存储路径
HMDB51原始存储是路径+类(文件夹)+视频(avi视频)
提取帧序列我们需要转换为路径+类+视频名(文件夹)+帧序列(jpg图片)
划分数据集后存储路径为路径+训练/验证/测试(文件夹)+类(文件夹)+帧序列
提取帧序列代码:
import os
import numpy as np
from PIL import Image
import cv2
import stat
# 将avi数据集视频抽帧,转换为帧序列数据集
# 帧数间隔
frameRate=1
# videoFileName为视频的总路径,save_paths为保存路径+类
# videoFileName为E:\Master's period\Gesture dataset\HMDB51\hmdb51\brush_hair\April_09_brush_hair_u_nm_np1_ba_goo_0.avi
# save_path为E:\Master's period\Gesture dataset\HMDB51_frame\brush_hair\
def splitFrames(videoFileName,save_path):
# 打开视频文件
cap = cv2. VideoCapture(videoFileName)
temp1 = str(videoFileName.split("/")[-2])
temp2 = str(videoFileName.split("/")[-1])
save_dir = os.path.join(save_path,temp1,temp2)
if not os.path.exists(save_dir):
os.makedirs(save_dir)
num = frameRate
while True:
# success 表示是否成功,data是当前帧的图像数据;.read读取一帧图像,移动到下一帧
success, data = cap.read()
if not success:
break
if num % frameRate == 0:
data = np.array(data)[:, :, ::-1]
im = Image.fromarray(data) # 重建图像
im.save(save_dir + '/' +str(int(num/frameRate))+".jpg") # 保存当前帧的静态图像
num = num + 1
print("保存成功")
cap.release()
# 初始文件夹
file_path=r"E:\Master's period\Gesture dataset\HMDB51\hmdb51"
os.chmod(file_path,stat.S_IRWXU)
# 移动后文件夹
save_path=r"E:\Master's period\Gesture dataset\HMDB51_frame"
# file_dirs为所有类的列表
file_dirs=os.listdir(file_path)
# 定义要保存的路径dir
origion_paths=[]
save_paths=[]
# 循环所有类,添加到初始文件夹与保存文件夹下
for path in file_dirs:
# origion_paths为初始路径+类
origion_paths.append(file_path + "/" + path + "/")
# save_paths为保存路径+brush_hair序列
# save_paths.append(save_path + "/" + path + "/")
# 在类别下获取两个,i为序列,origion_path为获取的每个路径
# 如0 E:\Master's period\Gesture dataset\HMDB51\hmdb51\brush_hair
for i,origion_path in enumerate(origion_paths):
# 获得原始路径下的所有图片的name(默认路径下都是图片)
# image_list为brush_hair类别下所有视频名字序列
image_list = os.listdir(origion_path)
image_Dir = []
for x, y in enumerate(image_list):
# image_Dir为E:\Master's period\Gesture dataset\HMDB51\hmdb51\brush_hair\April_09_brush_hair_u_nm_np1_ba_goo_0.avi
image_Dir.append(os.path.join(origion_path, y))
splitFrames(image_Dir[x],save_path)
print("all datas has been moved successfully!")
划分数据集代码:
import os
import random
import shutil
# 将帧序列按7:1:2划分为训练集,验证集与测试集
fileDir = "E:/Master's period/Gesture dataset/HMDB51_frame/"
trainDir = "E:/Master's period/Gesture dataset/HDMB51_dataset/train/"
valDir = "E:/Master's period/Gesture dataset/HDMB51_dataset/val/"
testDir = "E:/Master's period/Gesture dataset/HDMB51_dataset/test/"
def copyFile(fileDir,trainDir,valDir,testDir):
# 取图片原始路径
# lei为所有类别目录
lei = os.listdir(fileDir)
trainpath = []
valpath = []
testpath = []
filepath = []
for x in lei:
# trainpath里存放的是E:/Master's period/Gesture dataset/HDMB51_dataset/train/brush_hair/
trainpath.append(trainDir + x + "/")
valpath.append(valDir + x + "/")
testpath.append(testDir + x + "/")
filepath.append(fileDir + x + "/")
for i,path in enumerate(filepath):
# path为E:/Master's period/Gesture dataset/HMDB51_frame/brush_hair/
pathDir = os.listdir(path)
num = len(pathDir)
# 移动train
picknumber1 = int(num * 0.7)
# pathDir为总视频的list
sample1 = random.sample(pathDir,picknumber1)
for name in sample1:
dile_dir = os.path.join(path, name)
save_dir = os.path.join(trainpath[i], name)
shutil.move(dile_dir,save_dir)
print("train移动完成")
# 移动val
pathDir = os.listdir(path)
picknumber2 = int(num * 0.1)
sample2 = random.sample(pathDir,picknumber2)
for name in sample2:
dile_dir = os.path.join(path, name)
save_dir = os.path.join(valpath[i], name)
shutil.move(dile_dir,save_dir)
print("val移动完成")
# 移动test,将文件夹下剩余文件都移动(防止有残余)
pathDir = os.listdir(path)
# picknumber3 = int(num * 0.2)
# sample3 = random.sample(pathDir,picknumber3)
for name in pathDir:
dile_dir = os.path.join(path, name)
save_dir = os.path.join(testpath[i], name)
shutil.move(dile_dir, save_dir)
print("test移动完成")
if __name__ =='__main__':
copyFile(fileDir,trainDir,valDir,testDir)