tensorflow2 写网络结构,贼优雅

原创: Lebhoryi@rt-thread.com
时间: 2020/06/10
coding: google-research/kws_streaming/models/cnn.py

  • step1. 用argparse设定超参,包括卷积核等

    # sub parser for model settings
    subparsers = parser.add_subparsers(dest='model_name', help='NN model name')
    
    # CNN model settings
    parser_cnn = subparsers.add_parser('cnn')
    cnn.model_parameters(parser_cnn)
    
  • step2. build cnn

    model = cnn.model(flags)
    
    # 用zip 来解析超参,这一步简直是神来之笔,省了好多调用
     for filters, kernel_size, activation, dilation_rate, strides in zip(
      parse(flags.cnn_filters), parse(flags.cnn_kernel_size),
      parse(flags.cnn_act), parse(flags.cnn_dilation_rate),
      parse(flags.cnn_strides))
    

    parse 函数:

    def parse(text):
    	if not text:
    	    return []
    	
    	# ast.literal_eval() 也是一个很有意思的东西,
        # 建议花点时间学习一下
        # 功能:将字符串还原成它能够转化成的数据类型
    	res = ast.literal_eval(text)
    	if isinstance(res, tuple):
    	    return res
    	else:
    	    return [res]
    

    接下来就是搭建网络了,用for循环,搭建7个卷积层,
    忽略掉Stream()就是普通的卷积函数,
    然后跟上一些其他的层:

    • Flattern()
    • Dropout()
    • 两个FC,也是for循环
    • 最后一层输出层
  • step3. 查看网络结构

    logging.info(model.summary())
    
# coding=utf-8
# Copyright 2020 The Google Research Authors.

# cnn.py
"""CNN model with Mel spectrum."""
from kws_streaming.layers import modes
from kws_streaming.layers import speech_features
from kws_streaming.layers.compat import tf
from kws_streaming.layers.stream import Stream
from kws_streaming.models.utils import parse


def model_parameters(parser_nn):
  """Covolutional Neural Network(CNN) model parameters."""

  parser_nn.add_argument(
      '--cnn_filters',
      type=str,
      default='64,64,64,64,128,64,128',
      help='Number of output filters in the convolution layers',
  )
  parser_nn.add_argument(
      '--cnn_kernel_size',
      type=str,
      default='(3,3),(5,3),(5,3),(5,3),(5,2),(5,1),(10,1)',
      help='Heights and widths of the 2D convolution window',
  )
  parser_nn.add_argument(
      '--cnn_act',
      type=str,
      default="'relu','relu','relu','relu','relu','relu','relu'",
      help='Activation function in the convolution layers',
  )
  parser_nn.add_argument(
      '--cnn_dilation_rate',
      type=str,
      default='(1,1),(1,1),(2,1),(1,1),(2,1),(1,1),(2,1)',
      help='Dilation rate to use for dilated convolutions',
  )
  parser_nn.add_argument(
      '--cnn_strides',
      type=str,
      default='(1,1),(1,1),(1,1),(1,1),(1,1),(1,1),(1,1)',
      help='Strides of the convolution layers along the height and width',
  )
  parser_nn.add_argument(
      '--dropout1',
      type=float,
      default=0.5,
      help='Percentage of data dropped',
  )
  parser_nn.add_argument(
      '--units2',
      type=str,
      default='128,256',
      help='Number of units in the last set of hidden layers',
  )
  parser_nn.add_argument(
      '--act2',
      type=str,
      default="'linear','relu'",
      help='Activation function of the last set of hidden layers',
  )


def model(flags):
  """CNN model.

  It is based on paper:
  Convolutional Neural Networks for Small-footprint Keyword Spotting
  http://www.isca-speech.org/archive/interspeech_2015/papers/i15_1478.pdf

  Args:
    flags: data/model parameters

  Returns:
    Keras model for training
  """

  input_audio = tf.keras.layers.Input(
      shape=modes.get_input_data_shape(flags, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE),
      batch_size=flags.batch_size)
  net = input_audio

  if flags.preprocess == 'raw':
    # it is a self contained model, user need to feed raw audio only
    net = speech_features.SpeechFeatures(
        speech_features.SpeechFeatures.get_params(flags))(
            net)

  net = tf.keras.backend.expand_dims(net)
  for filters, kernel_size, activation, dilation_rate, strides in zip(
      parse(flags.cnn_filters), parse(flags.cnn_kernel_size),
      parse(flags.cnn_act), parse(flags.cnn_dilation_rate),
      parse(flags.cnn_strides)):
    net = Stream(
        cell=tf.keras.layers.Conv2D(
            filters=filters,
            kernel_size=kernel_size,
            activation=activation,
            dilation_rate=dilation_rate,
            strides=strides))(
                net)

  net = Stream(cell=tf.keras.layers.Flatten())(net)
  net = tf.keras.layers.Dropout(rate=flags.dropout1)(net)

  for units, activation in zip(parse(flags.units2), parse(flags.act2)):
    net = tf.keras.layers.Dense(units=units, activation=activation)(net)

  net = tf.keras.layers.Dense(units=flags.label_count)(net)
  return tf.keras.Model(input_audio, net)

总结:
用(for + zip)的方式可以省去好多重复代码,网络结构搭建的很赏心悦目,配上网络参数用argparse设定,见过的最优雅的CNN代码构建,没有之一


# 附上之前的CNN 构建源码 `2 Conv2d + linear + 2 FC`
def create_cnn_model2(fingerprint_input, model_settings, model_size_info,
                       is_training):
  """Builds a model with 2 convolution layers followed by a linear layer and 
      a hidden fully-connected layer.
  model_size_info: defines the first and second convolution parameters in
      {number of conv features, conv filter height, width, stride in y,x dir.},
      followed by linear layer size and fully-connected layer size.
  """
  if is_training:
    dropout_prob = tf.placeholder(tf.float32, name='dropout_prob')
  input_frequency_size = model_settings['dct_coefficient_count']
  input_time_size = model_settings['spectrogram_length']
  fingerprint_4d = tf.reshape(fingerprint_input,
                              [-1, input_time_size, input_frequency_size, 1])

  first_filter_count = model_size_info[0] 
  first_filter_height = model_size_info[1]   #time axis
  first_filter_width = model_size_info[2]    #frequency axis
  first_filter_stride_y = model_size_info[3] #time axis
  first_filter_stride_x = model_size_info[4] #frequency_axis

  second_filter_count = model_size_info[5] 
  second_filter_height = model_size_info[6]   #time axis
  second_filter_width = model_size_info[7]    #frequency axis
  second_filter_stride_y = model_size_info[8] #time axis
  second_filter_stride_x = model_size_info[9] #frequency_axis
 
  linear_layer_size = model_size_info[10]
  fc_size = model_size_info[11]

  # first conv
  first_weights = tf.Variable(
      tf.truncated_normal(
          [first_filter_height, first_filter_width, 1, first_filter_count],
          stddev=0.01))
  first_bias = tf.Variable(tf.zeros([first_filter_count]))
  first_conv = tf.nn.conv2d(fingerprint_4d, first_weights, [
      1, first_filter_stride_y, first_filter_stride_x, 1
  ], 'VALID') + first_bias

  first_conv = tf.layers.batch_normalization(first_conv, training=is_training,
                 name='bn1')

  first_relu = tf.nn.relu(first_conv)
  if is_training:
    first_dropout = tf.nn.dropout(first_relu, dropout_prob)
  else:
    first_dropout = first_relu
  first_conv_output_width = math.ceil(
      (input_frequency_size - first_filter_width + 1) /
      first_filter_stride_x)
  first_conv_output_height = math.ceil(
      (input_time_size - first_filter_height + 1) /
      first_filter_stride_y)

  # second conv
  second_weights = tf.Variable(
      tf.truncated_normal(
          [second_filter_height, second_filter_width, first_filter_count, 
             second_filter_count],
          stddev=0.01))
  second_bias = tf.Variable(tf.zeros([second_filter_count]))
  second_conv = tf.nn.conv2d(first_dropout, second_weights, [
      1, second_filter_stride_y, second_filter_stride_x, 1
  ], 'VALID') + second_bias

  ##### update 2020/05/21 #######
  second_conv = tf.layers.batch_normalization(second_conv, training=is_training,
                  name='bn2')

  second_relu = tf.nn.relu(second_conv)
  if is_training:
    second_dropout = tf.nn.dropout(second_relu, dropout_prob)
  else:
    second_dropout = second_relu
  second_conv_output_width = math.ceil(
      (first_conv_output_width - second_filter_width + 1) /
      second_filter_stride_x)
  second_conv_output_height = math.ceil(
      (first_conv_output_height - second_filter_height + 1) /
      second_filter_stride_y)
  second_conv_element_count = int(
      second_conv_output_width*second_conv_output_height*second_filter_count)
  flattened_second_conv = tf.reshape(second_dropout,
                                    [-1, second_conv_element_count])

  # linear layer
  W = tf.get_variable('W', shape=[second_conv_element_count, linear_layer_size],
        initializer=tf.contrib.layers.xavier_initializer())
  b = tf.get_variable('b', shape=[linear_layer_size])
  flow = tf.matmul(flattened_second_conv, W) + b

  # first fc
  first_fc_output_channels = fc_size
  first_fc_weights = tf.Variable(
      tf.truncated_normal(
          [linear_layer_size, first_fc_output_channels], stddev=0.01))
  first_fc_bias = tf.Variable(tf.zeros([first_fc_output_channels]))
  first_fc = tf.matmul(flow, first_fc_weights) + first_fc_bias

  first_fc = tf.layers.batch_normalization(first_fc, training=is_training,
               name='bn3')


  first_fc = tf.nn.relu(first_fc)
  if is_training:
    final_fc_input = tf.nn.dropout(first_fc, dropout_prob)
  else:
    final_fc_input = first_fc
  label_count = model_settings['label_count']
  final_fc_weights = tf.Variable(
      tf.truncated_normal(
          [first_fc_output_channels, label_count], stddev=0.01))
  final_fc_bias = tf.Variable(tf.zeros([label_count]))
  final_fc = tf.matmul(final_fc_input, final_fc_weights) + final_fc_bias
  if is_training:
    return final_fc, dropout_prob
  else:
    return final_fc
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值