项目地址:https://github.com/hx173149/C3D-tensorflow
详细讲解参考:https://zhuanlan.zhihu.com/p/35458649
# Copyright 2015 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Functions for downloading and reading MNIST data."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
from six.moves import xrange # pylint: disable=redefined-builtin
import tensorflow as tf
import PIL.Image as Image
import random
import numpy as np
import cv2
import time
def get_frames_data(filename, num_frames_per_clip=16):
''' Given a directory containing extracted frames, return a video clip of
(num_frames_per_clip) consecutive frames as a list of np arrays '''
ret_arr = []
s_index = 0
for parent, dirnames, filenames in os.walk(filename):#os.walk请参考http://www.cnblogs.com/herbert/archive/2013/01/07/2848892.html
if(len(filenames)<num_frames_per_clip):
return [], s_index
filenames = sorted(filenames)
s_index = random.randint(0, len(filenames) - num_frames_per_clip)
for i in range(s_index, s_index + num_frames_per_clip):
image_name = str(filename) + '/' + str(filenames[i])#获得绝对路径
img = Image.open(image_name)#PIL读取图片,获得filename视频文件夹下的随机16个连续图片
img_data = np.array(img)#创建数组
ret_arr.append(img_data)
return ret_arr, s_index#返回包括帧数据的列表
def read_clip_and_label(filename, batch_size, start_pos=-1, num_frames_per_clip=16, crop_size=112, shuffle=False):
lines = open(filename,'r')
read_dirnames = []
data = []
label = []
batch_index = 0
next_batch_start = -1
lines = list(lines)#将每行变成一个元素
np_mean = np.load('crop_mean.npy').reshape([num_frames_per_clip, crop_size, crop_size, 3])
# Forcing shuffle, if start_pos is not specified
if start_pos < 0:
shuffle = True
if shuffle:
video_indices = range(len(lines))#视频文件夹索引,每个视频文件夹里面有一个视频(16张图片组成)
random.seed(time.time())#生成0-1的随机数
random.shuffle(video_indices)#乱序
else:
# Process videos sequentially
video_indices = range(start_pos, len(lines))
for index in video_indices:
if(batch_index>=batch_size):
next_batch_start = index
break#如果大于一个batch_size,则跳出
line = lines[index].strip('\n').split()#读取list文件中的每条路径及后面的label,并以空格分成两个元素
dirname = line[0]
tmp_label = line[1]
if not shuffle:
print("Loading a video clip from {}...".format(dirname))
tmp_data, _ = get_frames_data(dirname, num_frames_per_clip)#输出是16的图片的内容数据
img_datas = [];
if(len(tmp_data)!=0):
for j in xrange(len(tmp_data)):
img = Image.fromarray(tmp_data[j].astype(np.uint8))#将数组数据转化成图片数据
if(img.width>img.height):
scale = float(crop_size)/float(img.height)
img = np.array(cv2.resize(np.array(img),(int(img.width * scale + 1), crop_size))).astype(np.float32)#resize输入参数是(宽,高)
else:
scale = float(crop_size)/float(img.width)
img = np.array(cv2.resize(np.array(img),(crop_size, int(img.height * scale + 1)))).astype(np.float32)#按短边缩放到crop_size
crop_x = int((img.shape[0] - crop_size)/2)#shape[0]是行,有多少行,也即是数组的高
crop_y = int((img.shape[1] - crop_size)/2)
img = img[crop_x:crop_x+crop_size, crop_y:crop_y+crop_size,:] - np_mean[j]
img_datas.append(img)
data.append(img_datas)
label.append(int(tmp_label))
batch_index = batch_index + 1
read_dirnames.append(dirname)
# pad (duplicate) data/label if less than batch_size
valid_len = len(data)#只能得到一个batch_size的数据或者小于一个batch_size的数据
pad_len = batch_size - valid_len
if pad_len:
for i in range(pad_len):
data.append(img_datas)
label.append(int(tmp_label))#如果不足一个batch_size的数据,则用最后一个视频数据补全
np_arr_data = np.array(data).astype(np.float32)
np_arr_label = np.array(label).astype(np.int64)
return np_arr_data, np_arr_label, next_batch_start, read_dirnames, valid_len
主要:各种数组的转换,以及大小的变化,shape,resize的意义