WaveNet 代码解析 —— model.py
文章目录
简介
本项目一个基于 WaveNet 生成神经网络体系结构的语音合成项目,它是使用 TensorFlow 实现的(项目地址)。
WaveNet 神经网络体系结构能直接生成原始音频波形,在文本到语音和一般音频生成方面显示了出色的结果(详情请参阅 WaveNet 的详细介绍)。
由于 WaveNet 项目较大,代码较多。为了方便学习与整理,将按照工程文件的结构依次介绍。
本文将介绍项目中的 model.py 文件:WaveNet 模型脚本。
代码解析
函数解析
create_variable(name, shape)
下面这段代码的主要任务是:用Xavier初始化指定名称和形状的卷积过滤器变量
def create_variable(name, shape):
''' 使用指定的名称和形状创建卷积过滤器变量,用Xavier初始化 '''
# 使用“Xavier”初始化器对权重实现初始化
# “Xavier”初始化器被设计用来保持所有图层中渐变的大小大致相同
initializer = tf.contrib.layers.xavier_initializer_conv2d()
variable = tf.Variable(initializer(shape=shape), name=name)
return variable
create_embedding_table(name, shape)
下面这段代码的主要任务是:根据传入 shape 的维度特征,初始化权值
def create_embedding_table(name, shape):
# 若第一维度与第二维度大小一致
if shape[0] == shape[1]:
# 使用 one-hot 编码作为初始值
# np.identity() 返回主对角线为1的方阵
initial_val = np.identity(n=shape[0], dtype=np.float32)
return tf.Variable(initial_val, name=name)
else:
return create_variable(name, shape)
create_bias_variable(name, shape)
下面这段代码的主要任务是:使用指定的名称和形状创建一个偏差变量,并将其初始化为零
def create_bias_variable(name, shape):
''' 使用指定的名称和形状创建一个偏差变量,并将其初始化为零 '''
initializer = tf.constant_initializer(value=0.0, dtype=tf.float32)
return tf.Variable(initializer(shape=shape), name)
WaveNetModel 类解析
WaveNetModel 类成员变量解析
以下变量主要作为AudioReader类的成员变量
batch_size # 每批提供的音频文件数量
dilations # 每层膨胀系数的列表
filter_width # 膨胀后包含在每个卷积中的样品
residual_channels # 获得残差需要学习的过滤器数量
dilation_channels # 获得膨胀的卷积需要学习的过滤器数量
quantization_channels # 用于音频量化和相应的单热编码的振幅值数量,默认为256(8-bit)
use_biases # 卷积中添加偏置层标志位,默认为False
skip_channels # 有助于量化 softmax 输出需要学习的过滤器数量
scalar_input # 使用量化波形直接作为网络输入,而不是一次性编码,标志位。默认值为假
initial_filter_width # 应用于标量输入的卷积的初始滤波器的宽度,只有当 scalar_input=True 才启用
histograms # 日志中存储直方图标志位,默认值为假
global_condition_channels # 全局条件向量的通道数(嵌入大小),None表示没有全局条件
#如果不是None,那么这意味着global_condition张量指定一个整数,选择N个全局条件类别中的哪个,其中N = global_condition_cardinality。
#如果为None,则将global_condition张量视为一个向量,它必须具有global_condition_channels维度。
global_condition_cardinality # 要嵌入全局条件嵌入的互斥类别的数目。
receptive_field # 感受野大小
variables # WaveNet 模型网络所有变量
init_ops # 初始化操作
push_ops # 入队操作
WaveNetModel 类成员函数解析
__ init__
下面这段代码的主要任务是:初始化WaveNet模型
def __init__(self, batch_size, dilations, filter_width,
residual_channels, dilation_channels,
skip_channels, quantization_channels=2**8,
use_biases=False, scalar_input=False,
initial_filter_width=32,
histograms=False,
global_condition_channels=None,
global_condition_cardinality=None):
# 为 WaveNet 模型初始化各个参数
# 各参数具体含义见上一节
self.batch_size = batch_size
self.dilations = dilations
self.filter_width = filter_width
self.residual_channels = residual_channels
self.dilation_channels = dilation_channels
self.quantization_channels = quantization_channels
self.use_biases = use_biases
self.skip_channels = skip_channels
self.scalar_input = scalar_input
self.initial_filter_width = initial_filter_width
self.histograms = histograms
self.global_condition_channels = global_condition_channels
self.global_condition_cardinality = global_condition_cardinality
# 计算感受野大小
self.receptive_field = WaveNetModel.calculate_receptive_field(
self.filter_width, self.dilations, self.scalar_input,
self.initial_filter_width)
# 为 WaveNet 模型创建所有的变量
self.variables = self._create_variables()
calculate_receptive_field
下面这段代码的主要任务是:使用指定的名称和形状创建一个偏差变量,并将其初始化为零
@staticmethod # 标注为静态方法
def calculate_receptive_field(filter_width, dilations, scalar_input,
initial_filter_width):
# 计算感受野大小
receptive_field = (filter_width - 1) * sum(dilations) + 1
# 根据网络输入标志位的状态进行不同的计算
if scalar_input:
receptive_field += initial_filter_width - 1
else:
receptive_field += filter_width - 1
# 返回最终感受野大小
return receptive_field
_create_variables
下面这段代码的主要任务是:创建网络使用的所有变量。
def _create_variables(self):
''' 这个函数创建网络使用的所有变量。这允许我们在对损失函数和生成函数的多个调用之间共享它们 '''
# 创建变量字典
var = dict()
with tf.variable_scope('wavenet'):
# 若存在全局条件
if self.global_condition_cardinality is not None:
# 做嵌入查找
with tf.variable_scope('embeddings'):
layer = dict()
# 创建全局条件变量嵌套表
layer['gc_embedding'] = create_embedding_table(
'gc_embedding',
[self.global_condition_cardinality,
self.global_condition_channels])
# 将创建的嵌套表装入变量字典
var['embeddings'] = layer
# 为因果卷积层创建命名空间
with tf.variable_scope('causal_layer'):
layer = dict()
# 根据输入方式,对initial_channels与initial_filter_width赋值
if self.scalar_input:
initial_channels = 1
initial_filter_width = self.initial_filter_width
else:
initial_channels = self.quantization_channels
initial_filter_width = self.filter_width
# 为过滤器创建变量
layer['filter'] = create_variable(
'filter',
[initial_filter_width,
initial_channels,
self.residual_channels])
# 将创建的过滤器变量装入变量字典
var['causal_layer'] = layer
# 创建扩张堆栈列表变量
var['dilated_stack'] = list()
with tf.variable_scope('dilated_stack'):
# 遍历 dilations 中的元素
for i, dilation in enumerate(self.dilations):
# 每层都有自己的变量命名空间
with tf.variable_scope('layer{}'.format(i)):
current = dict()
# 分别为过滤器、门控激活单元、残差单元与跳步单元建立变量
current['filter'] = create_variable(
'filter',
[self.filter_width,
self.residual_channels,
self.dilation_channels])
current['gate'] = create_variable(
'gate',
[self.filter_width,
self.residual_channels,
self.dilation_channels])
current['dense'] = create_variable(
'dense',
[1,
self.dilation_channels,
self.residual_channels])
current['skip'] = create_variable(
'skip',
[1,
self.dilation_channels,
self.skip_channels])
# 若全局条件标志存在,创造针对全局的权重与过滤器变量
if self.global_condition_channels is not None:
current['gc_gateweights'] = create_variable(
'gc_gate',
[1, self.global_condition_channels,
self.dilation_channels])
current['gc_filtweights'] = create_variable(
'gc_filter',
[1, self.global_condition_channels,
self.dilation_channels])
# 若使用偏置层,则为过滤器、门控单元、残差单元与跳步单元添加偏置层
if self.use_biases:
current['filter_bias'] = create_bias_variable(
'filter_bias',
[self.dilation_channels])
current['gate_bias'] = create_bias_variable(
'gate_bias',
[self.dilation_channels])
current['dense_bias'] = create_bias_variable(
'dense_bias',
[self.residual_channels])
current['skip_bias'] = create_bias_variable(
'slip_bias',
[self.skip_channels])
# 将上述变量组成的字典添加到膨胀堆栈中
var['dilated_stack'].append(current)
# 为后置处理模块创建命名空间
with tf.variable_scope('postprocessing'):
current = dict()
# 创建变量,为 softmax输出与振幅值做相关的后续操作
current['postprocess1'] = create_variable(
'postprocess1',
[1, self.skip_channels, self.skip_channels])
current['postprocess2'] = create_variable(
'postprocess2',
[1, self.skip_channels, self.quantization_channels])
# 若使用偏置层,则为上述操作增添偏置变量
if self.use_biases:
current['postprocess1_bias'] = create_bias_variable(
'postprocess1_bias',
[self.skip_channels])
current['postprocess2_bias'] = create_bias_variable(
'postprocess2_bias',
[self.quantization_channels])
var['postprocessing'] = current
return var
_create_causal_layer
下面这段代码的主要任务是:创建一个因果卷积层
def _create_causal_layer(self, input_batch):
''' 创建一个因果卷积层。该层可以改变通道的数量 '''
# 为因果卷积层创造命名空间
with tf.name_scope('causal_layer'):
weights_filter = self.variables['causal_layer']['filter']
# 调用 ops.py 文件中的causal_conv()函数创建因果卷积层
return causal_conv(input_batch, weights_filter, 1)
_create_dilation_layer
下面这段代码的主要任务是:产生一个因果膨胀的卷积层
本函数部分结构如下图所示
def _create_dilation_layer(self, input_batch, layer_index, dilation,
global_condition_batch, output_width):
''' 产生一个因果膨胀的卷积层 '''
# 从变量表里找到膨胀堆栈,取出对应层的变量字典
variables = self.variables['dilated_stack'][layer_index]
# 找出对应的过滤器与门控单元变量进行赋值
weights_filter = variables['filter']
weights_gate = variables['gate']
# 建立过滤单元层与门控单元层
conv_filter = causal_conv(input_batch, weights_filter, dilation)
conv_gate = causal_conv(input_batch, weights_gate, dilation)
# 当全球条件标志存在
if global_condition_batch is not None:
weights_gc_filter = variables['gc_filtweights']
# 对过滤单元层与门控单元层进行叠加
conv_filter = conv_filter + tf.nn.conv1d(global_condition_batch,
weights_gc_filter, stride=1,
padding="SAME", name="gc_filter")
weights_gc_gate = variables['gc_gateweights']
conv_gate = conv_gate + tf.nn.conv1d(global_condition_batch,
weights_gc_gate, stride=1,
padding="SAME", name="gc_gate")
# 当偏置标志存在
if self.use_biases:
# 在变量字典找到相应的偏置层
filter_bias = variables['filter_bias']
gate_bias = variables['gate_bias']
# 为各单元分别添加对应的偏置层
conv_filter = tf.add(conv_filter, filter_bias)
conv_gate = tf.add(conv_gate, gate_bias)
# 变换计算
out = tf.tanh(conv_filter) * tf.sigmoid(conv_gate)
# 产生残差输出
weights_dense = variables['dense']
transformed = tf.nn.conv1d(
out, weights_dense, stride=1, padding="SAME", name="dense")
# 产生跳步输出
skip_cut = tf.shape(out)[1] - output_width
out_skip = tf.slice(out, [0, skip_cut, 0], [-1, -1, -1])
weights_skip = variables['skip']
skip_contribution = tf.nn.conv1d(
out_skip, weights_skip, stride=1, padding="SAME", name="skip")
# 当偏置标志存在
if self.use_biases:
# 在变量字典找到相应的偏置层
dense_bias = variables['dense_bias']
skip_bias = variables['skip_bias']
# 为各单元分别添加对应的偏置层
transformed = transformed + dense_bias
skip_contribution = skip_contribution + skip_bias
# 当直方图标志存在
if self.histograms:
layer = 'layer{}'.format(layer_index)
# 为当前层记录直方图
tf.histogram_summary(layer + '_filter', weights_filter)
tf.histogram_summary(layer + '_gate', weights_gate)
tf.histogram_summary(layer + '_dense', weights_dense)
tf.histogram_summary(layer + '_skip', weights_skip)
# 若偏执标志存在,将偏置层直方图也写入日志
if self.use_biases:
tf.histogram_summary(layer + '_biases_filter', filter_bias)
tf.histogram_summary(layer + '_biases_gate', gate_bias)
tf.histogram_summary(layer + '_biases_dense', dense_bias)
tf.histogram_summary(layer + '_biases_skip', skip_bias)
# 剪裁多余的部分
input_cut = tf.shape(input_batch)[1] - tf.shape(transformed)[1]
input_batch = tf.slice(input_batch, [0, input_cut, 0], [-1, -1, -1])
# 返回跳步连接层与经过计算和转换后的卷积层
return skip_contribution, input_batch + transformed
_generator_conv
下面这段代码的主要任务是:对单个卷积处理步骤执行卷积
def _generator_conv(self, input_batch, state_batch, weights):
''' 对单个卷积处理步骤执行卷积 '''
# 取过去与当前的权重
past_weights = weights[0, :, :]
curr_weights = weights[1, :, :]
# 将过去的权重与过去的输入相乘,当前的权重与当前的输出相乘,合并结果并输出
output = tf.matmul(state_batch, past_weights) + tf.matmul(
input_batch, curr_weights)
return output
_generator_causal_layer
下面这段代码的主要任务是:得到因果层输出
def _generator_causal_layer(self, input_batch, state_batch):
with tf.name_scope('causal_layer'):
# 获取因果层过滤器权重
weights_filter = self.variables['causal_layer']['filter']
# 对卷积层计算结果并返回
output = self._generator_conv(
input_batch, state_batch, weights_filter)
return output
_generator_dilation_layer
下面这段代码的主要任务是:创建一个膨胀卷积层
def _generator_dilation_layer(self, input_batch, state_batch, layer_index,
dilation, global_condition_batch):
# 在变量字典中选择膨胀堆栈,进而选择对应的层索引,取出相应层的变量字典
variables = self.variables['dilated_stack'][layer_index]
# 为过滤器与门控单元变量对应
weights_filter = variables['filter']
weights_gate = variables['gate']
# 为过滤单元与门控单元执行卷积操作
output_filter = self._generator_conv(
input_batch, state_batch, weights_filter)
output_gate = self._generator_conv(
input_batch, state_batch, weights_gate)
# 若全局条件变量存在
if global_condition_batch is not None:
# 将全局条件转换为一行
global_condition_batch = tf.reshape(global_condition_batch,
shape=(1, -1))
# 取对应的过滤器权重变量,计算过滤器输出
weights_gc_filter = variables['gc_filtweights']
weights_gc_filter = weights_gc_filter[0, :, :]
output_filter += tf.matmul(global_condition_batch,
weights_gc_filter)
# 取对应的门控单元权重变量,计算门控单元输出
weights_gc_gate = variables['gc_gateweights']
weights_gc_gate = weights_gc_gate[0, :, :]
output_gate += tf.matmul(global_condition_batch,
weights_gc_gate)
# 若偏置标志存在
if self.use_biases:
# 为过滤器与门控单元增加偏置层
output_filter = output_filter + variables['filter_bias']
output_gate = output_gate + variables['gate_bias']
# 计算过滤层与门控层的输出
out = tf.tanh(output_filter) * tf.sigmoid(output_gate)
# 取稠密变量
weights_dense = variables['dense']
# 计算矩阵相乘
transformed = tf.matmul(out, weights_dense[0, :, :])
# 若偏置标志存在,则添加偏执变量
if self.use_biases:
transformed = transformed + variables['dense_bias']
# 取跳步变量
weights_skip = variables['skip']
# 计算矩阵相乘
skip_contribution = tf.matmul(out, weights_skip[0, :, :])
# 若偏置标志存在,则添加偏执变量
if self.use_biases:
skip_contribution = skip_contribution + variables['skip_bias']
# 返回跳步连接层与经过计算和转换后的卷积层
return skip_contribution, input_batch + transformed
_create_network
下面这段代码的主要任务是:构建 WaveNet 网络模型
def _create_network(self, input_batch, global_condition_batch):
# 为输出创建列表
outputs = []
# 将输入作为本层数据
current_layer = input_batch
# 用规则卷积对输入进行预处理
current_layer = self._create_causal_layer(current_layer)
# 计算输出宽度
output_width = tf.shape(input_batch)[1] - self.receptive_field + 1
# 添加所有定义的膨胀层
with tf.name_scope('dilated_stack'):
# 遍历所有的膨胀层
for layer_index, dilation in enumerate(self.dilations):
# 创建各层的变量命名空间
with tf.name_scope('layer{}'.format(layer_index)):
# 创建膨胀层
output, current_layer = self._create_dilation_layer(
current_layer, layer_index, dilation,
global_condition_batch, output_width)
# 将该层添加至输出列表中
outputs.append(output)
# 添加后置处理变量命名空间
with tf.name_scope('postprocessing'):
# 执行(+)→ReLU→1 x1 conv→ReLU→1x1 conv对输出进行后处理
w1 = self.variables['postprocessing']['postprocess1']
w2 = self.variables['postprocessing']['postprocess2']
# 若偏置标志存在,则添加对应的偏置层
if self.use_biases:
b1 = self.variables['postprocessing']['postprocess1_bias']
b2 = self.variables['postprocessing']['postprocess2_bias']
# 若直方图存储标志存在,将直方图添加至日志
if self.histograms:
tf.histogram_summary('postprocess1_weights', w1)
tf.histogram_summary('postprocess2_weights', w2)
if self.use_biases:
tf.histogram_summary('postprocess1_biases', b1)
tf.histogram_summary('postprocess2_biases', b2)
# 跳过每一层的输出连接,都填加到此处
total = sum(outputs)
# 设置 Relu 激活函数
transformed1 = tf.nn.relu(total)
# 计算第一步后置处理卷积
conv1 = tf.nn.conv1d(transformed1, w1, stride=1, padding="SAME")
# 添加偏置层
if self.use_biases:
conv1 = tf.add(conv1, b1)
# 设置 Relu 激活函数
transformed2 = tf.nn.relu(conv1)
# 计算第二步后置处理卷积
conv2 = tf.nn.conv1d(transformed2, w2, stride=1, padding="SAME")
# 添加偏置层
if self.use_biases:
conv2 = tf.add(conv2, b2)
return conv2
_create_generator
下面这段代码的主要任务是:构建一个高效的增量生成器
def _create_generator(self, input_batch, global_condition_batch):
# 初始化参数,将输入作为本层数据
init_ops = []
push_ops = []
outputs = []
current_layer = input_batch
# 设置先入先出队列
q = tf.FIFOQueue(
1,
dtypes=tf.float32,
shapes=(self.batch_size, self.quantization_channels))
# 将 0 放入队列
init = q.enqueue_many(
tf.zeros((1, self.batch_size, self.quantization_channels)))
# 出队列,作为当前状态
current_state = q.dequeue()
# 将当前状态入队列
push = q.enqueue([current_layer])
# 将初始化操作与入队列操作保存至对应列表
init_ops.append(init)
push_ops.append(push)
# 生成因果逻辑层
current_layer = self._generator_causal_layer(
current_layer, current_state)
# 添加所有定义的膨胀层
with tf.name_scope('dilated_stack'):
# 遍历所有的膨胀层
for layer_index, dilation in enumerate(self.dilations):
with tf.name_scope('layer{}'.format(layer_index)):
# 创建当前膨胀系数大小的队列
q = tf.FIFOQueue(
dilation,
dtypes=tf.float32,
shapes=(self.batch_size, self.residual_channels))
# 初始化队列
init = q.enqueue_many(
tf.zeros((dilation, self.batch_size,
self.residual_channels)))
# 出队列作为当前状态
current_state = q.dequeue()
# 将当前层入队列
push = q.enqueue([current_layer])
# 将初始化与入队操作分别保存
init_ops.append(init)
push_ops.append(push)
# 创建逻辑卷积层
output, current_layer = self._generator_dilation_layer(
current_layer, current_state, layer_index, dilation,
global_condition_batch)
# 添加输出到输出列表
outputs.append(output)
# 将初始化与入队操作保存
self.init_ops = init_ops
self.push_ops = push_ops
with tf.name_scope('postprocessing'):
variables = self.variables['postprocessing']
# 执行(+)→ReLU→1 x1 conv→ReLU→1x1 conv对输出进行后置处理
w1 = variables['postprocess1']
w2 = variables['postprocess2']
if self.use_biases:
b1 = variables['postprocess1_bias']
b2 = variables['postprocess2_bias']
# 我们跳过每一层输出的连接,把它们都加到这里
total = sum(outputs)
transformed1 = tf.nn.relu(total)
# 计算后置处理第一步,根据情况选择添加偏置层
conv1 = tf.matmul(transformed1, w1[0, :, :])
if self.use_biases:
conv1 = conv1 + b1
transformed2 = tf.nn.relu(conv1)
# # 计算后置处理第二步,根据情况选择添加偏置层
conv2 = tf.matmul(transformed2, w2[0, :, :])
if self.use_biases:
conv2 = conv2 + b2
# 返回输出
return conv2
_one_hot
下面这段代码的主要任务是:使用 one-hot 对波形振幅进行编码
def _one_hot(self, input_batch):
# 创建 one-hot编码变量命名空间
with tf.name_scope('one_hot_encode'):
# 创建 one-hot 张量
encoded = tf.one_hot(
input_batch,
depth=self.quantization_channels,
dtype=tf.float32)
# 规定张量形状
shape = [self.batch_size, -1, self.quantization_channels]
# 按设置的形状转换 one-hot编码
encoded = tf.reshape(encoded, shape)
return encoded
_embed_gc
下面这段代码的主要任务是:返回对全局条件的嵌入
def _embed_gc(self, global_condition):
embedding = None
# 若全局条件标志存在
if self.global_condition_cardinality is not None:
# 只有当全局条件呈现为互斥类别的整数时才查找嵌入
embedding_table = self.variables['embeddings']['gc_embedding']
# 从一张张量列表中查找给定id的嵌入
embedding = tf.nn.embedding_lookup(embedding_table,
global_condition)
elif global_condition is not None:
# global_condition已经作为内嵌提供,全局嵌入通道的数量必须等于全局条件张量的最后一个维度
gc_batch_rank = len(global_condition.get_shape())
dims_match = (global_condition.get_shape()[gc_batch_rank - 1] ==
self.global_condition_channels)
# 若维度不相等,则报错
if not dims_match:
raise ValueError('Shape of global_condition {} does not'
' match global_condition_channels {}.'.
format(global_condition.get_shape(),
self.global_condition_channels))
embedding = global_condition
# 若嵌套表存在
if embedding is not None:
# 规范嵌套表维度
embedding = tf.reshape(
embedding,
[self.batch_size, 1, self.global_condition_channels])
return embedding
predict_proba
下面这段代码的主要任务是:根据输入波形中的所有样本计算下一个样本的概率分布
def predict_proba(self, waveform, global_condition=None, name='wavenet'):
with tf.name_scope(name):
# 根据数据输入形式改变编码方式
if self.scalar_input:
encoded = tf.cast(waveform, tf.float32)
encoded = tf.reshape(encoded, [-1, 1])
else:
encoded = self._one_hot(waveform)
# 获取全局条件嵌套表
gc_embedding = self._embed_gc(global_condition)
# 创建网络
raw_output = self._create_network(encoded, gc_embedding)
out = tf.reshape(raw_output, [-1, self.quantization_channels])
# 强制转换为float64以避免TensorFlow中的错误
proba = tf.cast(
tf.nn.softmax(tf.cast(out, tf.float64)), tf.float32)
# 剪裁所需要的部分
last = tf.slice(
proba,
[tf.shape(proba)[0] - 1, 0],
[1, self.quantization_channels])
return tf.reshape(last, [-1])
predict_proba_incremental
下面这段代码的主要任务是:基于单个样本和之前通过的所有样本,逐步计算下一个样本的概率分布
def predict_proba_incremental(self, waveform, global_condition=None,
name='wavenet'):
# 对不支持的数据格式警告
if self.filter_width > 2:
raise NotImplementedError("Incremental generation does not "
"support filter_width > 2.")
if self.scalar_input:
raise NotImplementedError("Incremental generation does not "
"support scalar input yet.")
with tf.name_scope(name):
# 创建one-hot编码嵌套表
encoded = tf.one_hot(waveform, self.quantization_channels)
encoded = tf.reshape(encoded, [-1, self.quantization_channels])
# 实现对全局条件的嵌入
gc_embedding = self._embed_gc(global_condition)
# 构建一个高效的增量生成器
raw_output = self._create_generator(encoded, gc_embedding)
out = tf.reshape(raw_output, [-1, self.quantization_channels])
# 强制转换数据类型
proba = tf.cast(
tf.nn.softmax(tf.cast(out, tf.float64)), tf.float32)
# 建材所需的部分并以需要的shape返回
last = tf.slice(
proba,
[tf.shape(proba)[0] - 1, 0],
[1, self.quantization_channels])
return tf.reshape(last, [-1])
loss
下面这段代码的主要任务是:创建一个小波网络并返回自动编码损耗
def loss(self, input_batch,
global_condition_batch=None,
l2_regularization_strength=None,
name='wavenet'):
with tf.name_scope(name):
# 对输入的声源进行编码和量化
encoded_input = mu_law_encode(input_batch,
self.quantization_channels)
# 实现对全局条件的嵌入
gc_embedding = self._embed_gc(global_condition_batch)
# 创建独热编码嵌套表
encoded = self._one_hot(encoded_input)
# 根据输入的方法修改网络的输入格式
if self.scalar_input:
network_input = tf.reshape(
tf.cast(input_batch, tf.float32),
[self.batch_size, -1, 1])
else:
network_input = encoded
# 切断网络输入的最后一个样本,保持因果关系
network_input_width = tf.shape(network_input)[1] - 1
network_input = tf.slice(network_input, [0, 0, 0],
[-1, network_input_width, -1])
# 创建 WaveNet 网络模型
raw_output = self._create_network(network_input, gc_embedding)
with tf.name_scope('loss'):
# 对于第一个预测样本,切断对应接收野的样本
target_output = tf.slice(
tf.reshape(
encoded,
[self.batch_size, -1, self.quantization_channels]),
[0, self.receptive_field, 0],
[-1, -1, -1])
target_output = tf.reshape(target_output,
[-1, self.quantization_channels])
prediction = tf.reshape(raw_output,
[-1, self.quantization_channels])
# 计算结果和标签之间的最大交叉熵。
loss = tf.nn.softmax_cross_entropy_with_logits(
logits=prediction,
labels=target_output)
# 计算平均损失
reduced_loss = tf.reduce_mean(loss)
# 将平均损失写入日志
tf.summary.scalar('loss', reduced_loss)
if l2_regularization_strength is None:
return reduced_loss
else:
# 将所有可训练参数使用L2正则化
l2_loss = tf.add_n([tf.nn.l2_loss(v)
for v in tf.trainable_variables()
if not('bias' in v.name)])
# 将正规化项添加到损失中
total_loss = (reduced_loss +
l2_regularization_strength * l2_loss)
# 将损失写入日志
tf.summary.scalar('l2_loss', l2_loss)
tf.summary.scalar('total_loss', total_loss)
return total_loss
本文还在持续更新中!
欢迎各位大佬交流讨论!