介绍
tensorflow的seq2seq API的使用主要包含下面几步:
1、设置helper
训练时的helper和infer时是不同的,分别如下
#这个help主要是用来决定下一步的输入如何生成的。可以用真实word对应的embeding,也可以从上一步的output logits里采样得到一个y(t-1)
train_helper = tf.contrib.seq2seq.TrainingHelper(output_embed, output_lengths)
# train_helper = tf.contrib.seq2seq.ScheduledEmbeddingTrainingHelper(
# output_embed, output_lengths, embeddings, 0.3
# )
#这个helper用于infer,根据上一步的输出y(t-1)从embeding矩阵查询word embeding用做下一步的输入
pred_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
embeddings, start_tokens=tf.to_int32(start_tokens), end_token=1)
2、用第一步的helper以及每个step的解码cell(比如GRUCell)以及输出层layer作为参数输入到BasicDecoder。BasicDecoder最主要的功能在其step函数里,用来得到一个解码步骤的输出
decoder = tf.contrib.seq2seq.BasicDecoder(
cell=out_cell, helper=helper,
initial_state=out_cell.zero_state(
dtype=tf.float32, batch_size=batch_size))
3、将BasicDecoder作为参数输入到dynamic_decode,得到输出
outputs = tf.contrib.seq2seq.dynamic_decode(
decoder=decoder, output_time_major=False,
impute_finished=True, maximum_iterations=output_max_length
)
return outputs[0]
所以如果要客制化主要涉及到两个类的重写
- TrainingHelper
- tf.contrib.seq2seq.BasicDecoder
源码解析
1、TrainingHelper
class TrainingHelper(Helper):
"""A helper for use during training. Only reads inputs.
Returned sample_ids are the argmax of the RNN output logits.
"""
def __init__(self, inputs, sequence_length, time_major=False, name=None):
"""Initializer.
Args:
inputs: A (structure of) input tensors.
sequence_length: An int32 vector tensor.
time_major: Python bool. Whether the tensors in `inputs` are time major.
If `False` (default), they are assumed to be batch major.
name: Name scope for any created operations.
Raises:
ValueError: if `sequence_length` is not a 1D tensor.
"""
with ops.name_scope(name, "TrainingHelper", [inputs, sequence_length]):
inputs = ops.convert_to_tensor(inputs, name="inputs")
self._inputs = inputs
#如果time_major ==false,则把时间和batch做一次transpose,得到[time,batch,...]
if not time_major:
inputs = nest.map_structure(_transpose_batch_time, inputs)
'''
_input_tas是一个tensor_arry,数组里每个元素的shape=[batch,embeding].
从_input_tas数组里获取第i个元素可以通过_input_tas.read(i).
可以参考下面的next_inputs函数里的使用方法
'''
self._input_tas = nest.map_structure(_unstack_ta, inputs)
#_sequence_length: 必须是一个数组
self._sequence_length = ops.convert_to_tensor(
sequence_length, name="sequence_length")
if self._sequence_length.get_shape().ndims != 1:
raise ValueError(
"Expected sequence_length to be a vector, but received shape: %s" %
self._sequence_length.get_shape())
self._zero_inputs = nest.map_structure(
lambda inp: array_ops.zeros_like(inp[0, :]), inputs)
self._batch_size = array_ops.size(sequence_length)
@property
def inputs(self):
return self._inputs
@property
def sequence_length(self):
return self._sequence_length
@property
def batch_size(self):
return self._batch_size
@property
def sample_ids_shape(self):
return tensor_shape.TensorShape([])
@property
def sample_ids_dtype(self):
return dtypes.int32
def initialize(self, name=None):
with ops.name_scope(name, "TrainingHelperInitialize"):
#finished's shape is (batch)
finished = math_ops.equal(0, self._sequence_length)#[False,False,...]
#all_finished is a scalr
all_finished = math_ops.reduce_all(finished)#False
#at begin ,read the first element of tensorarray as the next input
#next_inputs's shape is [batch, emb]
next_inputs = control_flow_ops.cond(
all_finished, lambda: self._zero_inputs,
lambda: nest.map_structure(lambda inp: inp.read(0), self._input_tas))
return (finished, next_inputs)
def sample(self, time, outputs, name=None, **unused_kwargs):
with ops.name_scope(name, "TrainingHelperSample", [time, outputs]):
sample_ids = math_ops.cast(
math_ops.argmax(outputs, axis=-1), dtypes.int32)
return sample_ids
def next_inputs(self, time, outputs, state, name=None, **unused_kwargs):
"""next_inputs_fn for TrainingHelper."""
with ops.name_scope(name, "TrainingHelperNextInputs",
[time, outputs, state]):
next_time = time + 1
#判断batch里的每个实例是否到达了句末
finished = (next_time >= self._sequence_length)
#如果batch里每个实例都到达了句末,则将all_finished会设置成0
all_finished = math_ops.reduce_all(finished)
def read_from_ta(inp):
return inp.read(next_time)
next_inputs = control_flow_ops.cond(
all_finished, lambda: self._zero_inputs,
lambda: nest.map_structure(read_from_ta, self._input_tas))
#next_inputs的shape为[batch,embedingSize]
return (finished, next_inputs, state)
2、BasicDecoder 的源码实现
class BasicDecoder(decoder.Decoder):
"""Basic sampling decoder."""
def __init__(self, cell, helper, initial_state, output_layer=None):
"""Initialize BasicDecoder.
Args:
cell: An `RNNCell` instance.
helper: A `Helper` instance.
initial_state: A (possibly nested tuple of...) tensors and TensorArrays.
The initial state of the RNNCell. etc [batch, hidden_state_size]
output_layer: (Optional) An instance of `tf.layers.Layer`, i.e.,
`tf.layers.Dense`. Optional layer to apply to the RNN output prior
to storing the result or sampling.
Raises:
TypeError: if `cell`, `helper` or `output_layer` have an incorrect type.
"""
rnn_cell_impl.assert_like_rnncell("cell", cell)
if not isinstance(helper, helper_py.Helper):
raise TypeError("helper must be a Helper, received: %s" % type(helper))
if (output_layer is not None
and not isinstance(output_layer, layers_base.Layer)):
raise TypeError(
"output_layer must be a Layer, received: %s" % type(output_layer))
self._cell = cell
self._helper = helper
self._initial_state = initial_state
self._output_layer = output_layer
@property
def batch_size(self):
return self._helper.batch_size
#返回输出层的shape,是一个tensorshape,不包含batch
def _rnn_output_size(self):
size = self._cell.output_size
if self._output_layer is None:
return size
else:
# To use layer's compute_output_shape, we need to convert the
# RNNCell's output_size entries into shapes with an unknown
# batch size. We then pass this through the layer's
# compute_output_shape and read off all but the first (batch)
# dimensions to get the output size of the rnn with the layer
# applied to the top.
output_shape_with_unknown_batch = nest.map_structure(
lambda s: tensor_shape.TensorShape([None]).concatenate(s),
size)
layer_output_shape = self._output_layer.compute_output_shape(
output_shape_with_unknown_batch)
return nest.map_structure(lambda s: s[1:], layer_output_shape)
@property
def output_size(self):
# Return the cell output and the id
return BasicDecoderOutput(
rnn_output=self._rnn_output_size(),
sample_id=self._helper.sample_ids_shape)
@property
def output_dtype(self):
# Assume the dtype of the cell is the output_size structure
# containing the input_state's first component's dtype.
# Return that structure and the sample_ids_dtype from the helper.
dtype = nest.flatten(self._initial_state)[0].dtype
return BasicDecoderOutput(
nest.map_structure(lambda _: dtype, self._rnn_output_size()),
self._helper.sample_ids_dtype)
def initialize(self, name=None):
"""Initialize the decoder.
Args:
name: Name scope for any created operations.
Returns:
`(finished, first_inputs, initial_state)`.
"""
return self._helper.initialize() + (self._initial_state,)
def step(self, time, inputs, state, name=None):
"""Perform a decoding step.
Args:
time: scalar `int32` tensor.
inputs: A (structure of) input tensors.
state: A (structure of) state tensors and TensorArrays.
name: Name scope for any created operations.
Returns:
`(outputs, next_state, next_inputs, finished)`.
"""
with ops.name_scope(name, "BasicDecoderStep", (time, inputs, state)):
cell_outputs, cell_state = self._cell(inputs, state)
if self._output_layer is not None:
cell_outputs = self._output_layer(cell_outputs)
sample_ids = self._helper.sample(
time=time, outputs=cell_outputs, state=cell_state)
(finished, next_inputs, next_state) = self._helper.next_inputs(
time=time,
outputs=cell_outputs,
state=cell_state,
sample_ids=sample_ids)
outputs = BasicDecoderOutput(cell_outputs, sample_ids)
return (outputs, next_state, next_inputs, finished)