我在 Tensorflow 中实现了以下 LSTM 类,其中展开操作的灵感来自于 Tensorflow 中的 dynamic_rnn() 实现:
class LSTM():
def __init__(self, dim_x, dim_h, batch_size):
self.batch_size = batch_size
self.dim_x = dim_x
self.dim_h = dim_h
self.W_x_h = normal([dim_x, 4*dim_h])
self.W_h_h = normal([dim_h, 4*dim_h])
self.b_h = zeros([4*dim_h])
self.h_0 = zeros([batch_size, dim_h])
self.c_0 = zeros([batch_size, dim_h])
def lstmStep(self, x_t, h_t_minus, c_t_minus):
lstm_mat = tf.matmul(x_t, self.W_x_h) + tf.matmul(h_t_minus, self.W_h_h) \
+ self.b_h
i_lin, f_lin, o_lin, g_lin = tf.split(1, 4, lstm_mat)
i_t = tf.sigmoid(i_lin); f_t = tf.sigmoid(f_lin)
o_t = tf.sigmoid(o_lin); g_t = tf.tanh(g_lin)
c_t = c_t_minus * f_t + i_t * g_t
h_t = o_t * tf.tanh(c_t)
return h_t, c_t
def lstmUnroll(self, in_batch):
seq_len = array_ops.shape(in_batch)[0]
in_batch_ta = tensor_array_ops.TensorArray(dtype = in_batch.dtype, size = seq_len)
in_batch_ta = in_batch_ta.unpack(in_batch)
h_arr = tensor_array_ops.TensorArray(dtype = in_batch.dtype, size = seq_len)
time = array_ops.constant(0, dtype=tf.int32)
inputs_got_shape = in_batch.get_shape().with_rank(3)
(const_time_steps, const_batch_size, const_depth) = inputs_got_shape.as_list()
def compute(time, h_t, c_t, h_arr_t):
x_t = in_batch_ta.read(time)
h_t, c_t = self.lstmStep(x_t, h_t, c_t)
h_arr_t = h_arr_t.write(time, h_t)
return [time+1, h_t, c_t, h_arr_t]
(_1, _2, _3, h_arr) = control_flow_ops.While(
cond=lambda time, _1, _2, _3: time < seq_len,
body=compute,
loop_vars=(time, self.h_0, self.c_0, h_arr),
parallel_iterations=32)
output = h_arr.pack()
return output
我使用带有一些成本函数的 LSTM 定义了一个图。该图可以正确编译,我可以使用大小为 [sequence_length、batch_size、input_dim] 的“in_batch”进行前向传播。'sequence_length' 对于不同的批次可能会有所不同。但是,当我将优化器(Adam)与成本函数一起使用时,我收到以下错误消息:
Traceback (most recent call last):
File "textToImage.py", line 351, in <module>
opt = tf.train.AdamOptimizer().minimize(temp)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/optimizer.py", line 192, in minimize
name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/optimizer.py", line 297, in apply_gradients
update_ops.append(self._apply_dense(grad, var))
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/adam.py", line 129, in _apply_dense
self._epsilon_t, grad, use_locking=self._use_locking).op
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/gen_training_ops.py", line 81, in apply_adam
use_locking=use_locking, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/op_def_library.py", line 655, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2042, in create_op
set_shapes_for_outputs(ret)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1528, in set_shapes_for_outputs
shapes = shape_func(op)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/training_ops.py", line 72, in _ApplyAdamShape
grad_shape = op.inputs[9].get_shape().merge_with(v_shape)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/tensor_shape.py", line 541, in merge_with
self.assert_same_rank(other)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/tensor_shape.py", line 584, in assert_same_rank
"Shapes %s and %s must have the same rank" % (self, other))
ValueError: Shapes () and (1000, 512) must have the same rank
这里 1000 是 'dim_x' 而 512 是 4*'dim_h',所以错误是针对 'W_x_h'。我已经尝试在 'lstmUnroll()' 中对 'x_t'、'h_t'、'c_t' 和 'output' 使用 '.set_shape()',但仍然失败。
有什么想法可以让它与优化器一起工作吗?