我使用 tensorflow 制作了一个 seq2seq 模型,遇到了一个问题,当我在 tf.contrib.seq2seq.dynamic_decode 中使用 MultiRNNCell 时,我的程序会抛出错误。
问题发生在这里:
defw_rnn=tf.nn.rnn_cell.MultiRNNCell([
tf.nn.rnn_cell.LSTMCell(num_units=self.FLAGS.rnn_units,
initializer=tf.orthogonal_initializer)
for _ in range(self.FLAGS.rnn_layer_size)])
training_helper = tf.contrib.seq2seq.TrainingHelper(inputs=decoder_inputs,
sequence_length=self.decoder_targets_length,
time_major=False)
training_decoder = \
tf.contrib.seq2seq.BasicDecoder(
defw_rnn, training_helper,
encoder_final_state,
output_layer)
training_decoder_output, _, training_decoder_output_length = \
tf.contrib.seq2seq.dynamic_decode(
training_decoder,
impute_finished=True,
maximum_iterations=self.FLAGS.max_len)
当我运行此代码时,控制台显示此错误消息:
C:\Users\TopView\AppData\Local\Programs\Python\Python36\python.exe E:/PycharmProject/cikm_transport/CIKM/CIKM/translate_model/train.py WARNING:tensorflow:From C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\rnn.py:417: calling reverse_sequence (from tensorflow.python.ops.array_ops) with seq_dim is deprecated and will be removed in a future version. Instructions for updating: seq_dim is deprecated, use seq_axis instead WARNING:tensorflow:From C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\util\deprecation.py:432: calling reverse_sequence (from tensorflow.python.ops.array_ops) with batch_dim is deprecated and will be removed in a future version. Instructions for updating: batch_dim is deprecated, use batch_axis instead encoder_final_state shpe LSTMStateTuple(c=<tf.Tensor 'encoder/bidirectional_rnn/fw/fw/while/Exit_5:0' shape=(?, 24) dtype=float32>, h=<tf.Tensor 'encoder/bidirectional_rnn/fw/fw/while/Exit_6:0' shape=(?, 24) dtype=float32>) decoder_inputs shape before embedded (128, 10) decoder inputs shape after embedded (128, 10, 5) Traceback (most recent call last): File "E:/PycharmProject/cikm_transport/CIKM/CIKM/translate_model/train.py", line 14, in <module> len(embedding_matrix['embedding'][0])) File "E:\PycharmProject\cikm_transport\CIKM\CIKM\translate_model\model.py", line 109, in __init__ maximum_iterations=self.FLAGS.max_len) File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\contrib\seq2seq\python\ops\decoder.py", line 323, in dynamic_decode swap_memory=swap_memory) File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 3209, in while_loop result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants) File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2941, in BuildLoop pred, body, original_loop_vars, loop_vars, shape_invariants) File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2878, in _BuildLoop body_result = body(*packed_vars_for_body) File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 3179, in <lambda> body = lambda i, lv: (i + 1, orig_body(*lv)) File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\contrib\seq2seq\python\ops\decoder.py", line 266, in body decoder_finished) = decoder.step(time, inputs, state) File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\contrib\seq2seq\python\ops\basic_decoder.py", line 137, in step cell_outputs, cell_state = self._cell(inputs, state) File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 232, in __call__ return super(RNNCell, self).__call__(inputs, state) File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\layers\base.py", line 329, in __call__ outputs = super(Layer, self).__call__(inputs, *args, **kwargs) File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 703, in __call__ outputs = self.call(inputs, *args, **kwargs) File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 1325, in call cur_inp, new_state = cell(cur_inp, cur_state) File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 339, in __call__ *args, **kwargs) File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\layers\base.py", line 329, in __call__ outputs = super(Layer, self).__call__(inputs, *args, **kwargs) File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 703, in __call__ outputs = self.call(inputs, *args, **kwargs) File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 846, in call (c_prev, m_prev) = state File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\ops.py", line 436, in __iter__ "Tensor objects are not iterable when eager execution is not " TypeError: Tensor objects are not iterable when eager execution is not enabled. To iterate over this tensor use tf.map_fn.
Process finished with exit code 1
但是,当我更改 的实例defw_rnn
,使其成为像 LSTMCell 这样的单个 RNN 实例时,错误消失了:
defw_rnn=tf.nn.rnn_cell.LSTMCell(num_units=self.FLAGS.rnn_units,
initializer=tf.orthogonal_initializer)
并且代码运行良好。但是,我发现互联网上大多数关于 seq2seq 模型的代码都使用 MultiRNNCell 并且他们也使用 tensorflow,所以我真的很困惑我的程序有什么问题。
这是整个代码:
import tensorflow as tf
import numpy as np
class Seq2SeqModel(object):
def bw_fw_rnn(self):
with tf.name_scope("forward_rnn"):
fw = tf.nn.rnn_cell.MultiRNNCell([
tf.nn.rnn_cell.LSTMCell(num_units=self.FLAGS.rnn_units,
initializer=tf.orthogonal_initializer) for _ in
range(self.FLAGS.rnn_layer_size)])
fw = tf.nn.rnn_cell.DropoutWrapper(fw, output_keep_prob=self.FLAGS.keep_prob)
with tf.name_scope("backward_rnn"):
bw = tf.nn.rnn_cell.MultiRNNCell([
tf.nn.rnn_cell.LSTMCell(num_units=self.FLAGS.rnn_units,
initializer=tf.orthogonal_initializer) for _ in
range(self.FLAGS.rnn_layer_size)])
bw = tf.nn.rnn_cell.DropoutWrapper(bw, output_keep_prob=self.FLAGS.keep_prob)
return (fw, bw)
def decode_inputs_preprocess(self, data, id_matrix):
ending=tf.strided_slice(data,[0,0],[self.batch_size,-1],[1,1])
decoder_input=tf.concat([tf.fill([self.batch_size,1],id_matrix.index('<go>')),ending],1)
return decoder_input
def __init__(self, FLAGS, english_id_matrix, spanish_id_matrix, english_vocab_size,spanish_vocab_size, embedding_size):
self.FLAGS = FLAGS
self.english_vocab_size = english_vocab_size
self.embedding_size = embedding_size
self.encoder_input = tf.placeholder(shape=[None, self.FLAGS.max_len], dtype=tf.int32, name='encoder_inputs')
self.decoder_targets = tf.placeholder(shape=[None, self.FLAGS.max_len], dtype=tf.int32, name='decoder_targets')
self.encoder_input_sequence_length = tf.placeholder(shape=[None], dtype=tf.int32, name='encoder_inputs_length')
self.decoder_targets_length = tf.placeholder(shape=[None], dtype=tf.int32, name='decoder_targets_length')
self.batch_size = self.FLAGS.batch_size
with tf.name_scope('embedding_look_up'):
spanish_embeddings = tf.Variable(
tf.random_uniform([english_vocab_size,
embedding_size], -1.0, 1.0),
dtype=tf.float32)
english_embeddings = tf.Variable(
tf.random_uniform([english_vocab_size,
embedding_size], -1.0, 1.0),
dtype=tf.float32)
self.spanish_embeddings_inputs = tf.placeholder(
dtype=tf.float32, shape=[english_vocab_size, embedding_size],
name='spanish_embeddings_inputs')
self.english_embeddings_inputs = tf.placeholder(
dtype=tf.float32, shape=[english_vocab_size, embedding_size],
name='spanish_embeddings_inputs')
self.spanish_embeddings_inputs_op = spanish_embeddings.assign(self.spanish_embeddings_inputs)
self.english_embeddings_inputs_op = english_embeddings.assign(self.english_embeddings_inputs)
encoder_inputs = tf.nn.embedding_lookup(spanish_embeddings, self.encoder_input)
with tf.name_scope('encoder'):
enfw_rnn, enbw_rnn = self.bw_fw_rnn()
encoder_outputs, encoder_final_state = \
tf.nn.bidirectional_dynamic_rnn(enfw_rnn, enbw_rnn, encoder_inputs
, sequence_length=self.encoder_input_sequence_length, dtype=tf.float32)
print("encoder_final_state shpe")
# final_state_c=tf.concat([encoder_final_state[0][-1].c,encoder_final_state[1][-1].c],1)
# final_state_h=tf.concat([encoder_final_state[0][-1].h,encoder_final_state[1][-1].h],1)
# encoder_final_state=tf.contrib.rnn.LSTMStateTuple(c=final_state_c,
# h=final_state_h)
encoder_final_state=encoder_final_state[0][-1]
print(encoder_final_state)
with tf.name_scope('dense_layer'):
output_layer = tf.layers.Dense(english_vocab_size,
kernel_initializer=tf.truncated_normal_initializer(
mean=0.0, stddev=0.1
))
# training decoder
with tf.name_scope('decoder'), tf.variable_scope('decode'):
decoder_inputs=self.decode_inputs_preprocess(self.decoder_targets,english_id_matrix)
print('decoder_inputs shape before embedded')
print(decoder_inputs.shape)
decoder_inputs = tf.nn.embedding_lookup(english_embeddings,decoder_inputs)
print('decoder inputs shape after embedded')
print(decoder_inputs.shape)
defw_rnn=tf.nn.rnn_cell.MultiRNNCell([
tf.nn.rnn_cell.LSTMCell(num_units=self.FLAGS.rnn_units,
initializer=tf.orthogonal_initializer)
for _ in range(self.FLAGS.rnn_layer_size)])
training_helper = tf.contrib.seq2seq.TrainingHelper(inputs=decoder_inputs,
sequence_length=self.decoder_targets_length,
time_major=False)
training_decoder = \
tf.contrib.seq2seq.BasicDecoder(
defw_rnn, training_helper,
encoder_final_state,
output_layer)
training_decoder_output, _, training_decoder_output_length = \
tf.contrib.seq2seq.dynamic_decode(
training_decoder,
impute_finished=True,
maximum_iterations=self.FLAGS.max_len)
training_logits = tf.identity(training_decoder_output.rnn_output, 'logits')
print("training logits shape")
print(training_logits.shape)
# predicting decoder
with tf.variable_scope('decode', reuse=True):
start_tokens = tf.tile(tf.constant([english_id_matrix.index('<go>')], dtype=tf.int32),
[self.batch_size], name='start_tokens')
predicting_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(english_embeddings,
start_tokens,
english_id_matrix.index('<eos>'))
predicting_decoder = tf.contrib.seq2seq.BasicDecoder(defw_rnn,
predicting_helper,
encoder_final_state,
output_layer)
predicting_decoder_output, _, predicting_decoder_output_length =\
tf.contrib.seq2seq.dynamic_decode(
predicting_decoder,
impute_finished=True,
maximum_iterations=self.FLAGS.max_len)
self.predicting_logits = tf.identity(predicting_decoder_output.sample_id, name='predictions')
print("predicting logits shape")
print(self.predicting_logits.shape)
masks = tf.sequence_mask(self.decoder_targets_length, self.FLAGS.max_len, dtype=tf.float32, name='masks')
with tf.variable_scope('optimization'), tf.name_scope('optimization'):
# Loss
self.cost = tf.contrib.seq2seq.sequence_loss(training_logits, self.decoder_targets, masks)
# Optimizer
optimizer = tf.train.AdamOptimizer(self.FLAGS.alpha)
# Gradient Clipping
gradients = optimizer.compute_gradients(self.cost)
capped_gradients = [(tf.clip_by_value(grad, -5., 5.), var) for grad, var in gradients if grad is not None]
self.train_op = optimizer.apply_gradients(capped_gradients)