C3D 工程中input_data.py文件解析

最新推荐文章于 2022-12-30 20:07:14 发布

冯爽朗

最新推荐文章于 2022-12-30 20:07:14 发布

阅读量829

点赞数

分类专栏：深度学习

本文链接：https://blog.csdn.net/mieleizhi0522/article/details/80268181

版权

深度学习专栏收录该内容

47 篇文章

订阅专栏

本文介绍了一种用于视频理解的C3D模型实现方法，包括如何从视频中抽取帧并将其转换为可用于模型训练的数据集。此外还详细介绍了如何使用TensorFlow进行数据预处理，如读取视频帧、裁剪、归一化等步骤。

摘要由CSDN通过智能技术生成

项目地址：https://github.com/hx173149/C3D-tensorflow

详细讲解参考：https://zhuanlan.zhihu.com/p/35458649

# Copyright 2015 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

"""Functions for downloading and reading MNIST data."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
from six.moves import xrange  # pylint: disable=redefined-builtin
import tensorflow as tf
import PIL.Image as Image
import random
import numpy as np
import cv2
import time

def get_frames_data(filename, num_frames_per_clip=16):
  ''' Given a directory containing extracted frames, return a video clip of
  (num_frames_per_clip) consecutive frames as a list of np arrays '''
  ret_arr = []
  s_index = 0
  for parent, dirnames, filenames in os.walk(filename):#os.walk请参考http://www.cnblogs.com/herbert/archive/2013/01/07/2848892.html
    if(len(filenames)<num_frames_per_clip):
      return [], s_index
    filenames = sorted(filenames)
    s_index = random.randint(0, len(filenames) - num_frames_per_clip)
    for i in range(s_index, s_index + num_frames_per_clip):
      image_name = str(filename) + '/' + str(filenames[i])#获得绝对路径
      img = Image.open(image_name)#PIL读取图片，获得filename视频文件夹下的随机16个连续图片
      img_data = np.array(img)#创建数组
      ret_arr.append(img_data)
  return ret_arr, s_index#返回包括帧数据的列表

def read_clip_and_label(filename, batch_size, start_pos=-1, num_frames_per_clip=16, crop_size=112, shuffle=False):
  lines = open(filename,'r')
  read_dirnames = []
  data = []
  label = []
  batch_index = 0
  next_batch_start = -1
  lines = list(lines)#将每行变成一个元素
  np_mean = np.load('crop_mean.npy').reshape([num_frames_per_clip, crop_size, crop_size, 3])
  # Forcing shuffle, if start_pos is not specified
  if start_pos < 0:
    shuffle = True
  if shuffle:
    video_indices = range(len(lines))#视频文件夹索引，每个视频文件夹里面有一个视频（16张图片组成）
    random.seed(time.time())#生成0-1的随机数
    random.shuffle(video_indices)#乱序
  else:
    # Process videos sequentially
    video_indices = range(start_pos, len(lines))
  for index in video_indices:
    if(batch_index>=batch_size):
      next_batch_start = index
      break#如果大于一个batch_size，则跳出
    line = lines[index].strip('\n').split()#读取list文件中的每条路径及后面的label,并以空格分成两个元素
    dirname = line[0]
    tmp_label = line[1]
    if not shuffle:
      print("Loading a video clip from {}...".format(dirname))
    tmp_data, _ = get_frames_data(dirname, num_frames_per_clip)#输出是16的图片的内容数据
    img_datas = [];
    if(len(tmp_data)!=0):
      for j in xrange(len(tmp_data)):
        img = Image.fromarray(tmp_data[j].astype(np.uint8))#将数组数据转化成图片数据
        if(img.width>img.height):
          scale = float(crop_size)/float(img.height)
          img = np.array(cv2.resize(np.array(img),(int(img.width * scale + 1), crop_size))).astype(np.float32)#resize输入参数是（宽，高）
        else:
          scale = float(crop_size)/float(img.width)
          img = np.array(cv2.resize(np.array(img),(crop_size, int(img.height * scale + 1)))).astype(np.float32)#按短边缩放到crop_size
        crop_x = int((img.shape[0] - crop_size)/2)#shape[0]是行，有多少行，也即是数组的高
        crop_y = int((img.shape[1] - crop_size)/2)
        img = img[crop_x:crop_x+crop_size, crop_y:crop_y+crop_size,:] - np_mean[j]
        img_datas.append(img)
      data.append(img_datas)
      label.append(int(tmp_label))
      batch_index = batch_index + 1
      read_dirnames.append(dirname)

  # pad (duplicate) data/label if less than batch_size
  valid_len = len(data)#只能得到一个batch_size的数据或者小于一个batch_size的数据
  pad_len = batch_size - valid_len
  if pad_len:
    for i in range(pad_len):
      data.append(img_datas)
      label.append(int(tmp_label))#如果不足一个batch_size的数据，则用最后一个视频数据补全

  np_arr_data = np.array(data).astype(np.float32)
  np_arr_label = np.array(label).astype(np.int64)

  return np_arr_data, np_arr_label, next_batch_start, read_dirnames, valid_len

主要：各种数组的转换，以及大小的变化，shape,resize的意义