tensorflow - Tensorflow RNN：每个时代的困惑度保持不变

Question

我正在使用 Tensorflow 训练基于 RNN 的语言模型。该模型与 TF 教程部分中的 PTB 模型示例非常相似。但是，当我尝试根据自己的数据训练模型时，模型的困惑度并没有下降；它在多个时期保持不变。谁能让我知道我可能做错了什么。

我有一种感觉，我没有正确处理目标，但我的目标代码的要点是：

def batcher(batch_size,unroll_steps,data,pad):
    print(len(data))
    batches = len(data) / batch_size
    inp = []
    target = []
    for i in range(batches):
            #print(len(data[i*batch_size:(i+1)*batch_size]))
            x = data[i*batch_size:(i+1)*batch_size]
            y =  [ line[1:]+[pad] for line in x ]
            yield (x,y)

也就是说，我只是将数据移动 1 并将其用作句子中下一个单词的目标。

训练脚本和模型（类）如下所示

训练脚本（摘录）：

def train(session, model, folder,batch_size,unroll_steps,epoch):

    word_to_id, id_to_word, train, val = build_inputs(folder,unroll_steps)
    pad = word_to_id['<pad>']
    costs = 0
    iters = 0
    train_size = len(train)
    batch_size = model.batch_size
    batches = train_size / batch_size
    state = session.run(model._initial_state)
    print("Running epoch %d" % epoch)
    for i in range(batches):
            fetches = [model.cost, model._final_state, model.logits]
            feed_dict = {}
            x = train[i*batch_size:(i+1)*batch_size]
            y = [ line[1:] +[pad] for line in x ]
            feed_dict[model.input] = x
            feed_dict[model.targets] = y
            feed_dict[model._initial_state] = state
            #print("Cell-state complete - Running")
            cost, state, logits = session.run(fetches, feed_dict)
            #print("Single Run complete")
            costs += cost
            iters += model.unroll_steps
    print("\tEpoch %d: Perplexity is %f" % (epoch, np.exp(costs/iters)))

    return np.exp(costs/iters)

模型：

import tensorflow as tf

class LM(object):

    def __init__(self, train, max_gradient, batch_size, unroll_steps, vocab, size, layers, learning_rate, init, prob):
            self.batch_size = batch_size
            self.max_gradient = max_gradient
            self.layers = layers
            self.learning_rate = learning_rate
            self.unroll_steps = unroll_steps
            self.init = init
            #with tf. name_scope("Paramters"):

            with tf.device('/gpu:0'), tf.name_scope("Input"):
                    self.input = tf.placeholder(tf.int64, shape=[batch_size, unroll_steps], name="input")
                    self.targets = tf.placeholder(tf.int64, shape=[batch_size, unroll_steps], name="targets")
                    #self.init = tf.placeholder(tf.float32, shape=[], name="init")

            with tf.device('/gpu:0'), tf.name_scope("Embedding"):
                    embedding = tf.Variable(tf.random_uniform([vocab, size], -self.init, self.init), dtype=tf.float32, name="embedding")
                    embedded_input = tf.nn.embedding_lookup(embedding, self.input, name="embedded_input")

            with tf.device('/gpu:0'), tf.name_scope("RNN"), tf.variable_scope(tf.get_variable_scope(), reuse = False) as scope:
                    lstm_cell = tf.contrib.rnn.BasicLSTMCell(size, forget_bias=0.0, state_is_tuple=True)
                    if train and prob < 1.0:
                            lstm_cell = tf.contrib.rnn.DropoutWrapper(lstm_cell, output_keep_prob=prob)
                    cell = tf.contrib.rnn.MultiRNNCell([lstm_cell for _ in range(layers)], state_is_tuple=True)

                    self._initial_state = cell.zero_state(batch_size, tf.float32)
                    outputs = []
                    state = self._initial_state
                    for step in range(unroll_steps):
                            if step > 0: tf.get_variable_scope().reuse_variables()
                            (cell_output, state) = cell(embedded_input[:, step, :], state)
                            outputs.append(cell_output)

            with tf.device('/gpu:0'), tf.name_scope("Cost"), tf.variable_scope(tf.get_variable_scope(), reuse = False) as scope:
                    output = tf.reshape(tf.concat(outputs,1), [-1,size])
                    softmax_w = tf.get_variable("softmax_w", [size, vocab], dtype=tf.float32)
                    softmax_b = tf.get_variable("softmax_b", [vocab], dtype=tf.float32)
                    logits = tf.matmul(output, softmax_w) + softmax_b
                    losses = []
                    for logit, target in zip([logits], [tf.reshape(self.targets,[-1])]):
                            target = tf.reshape(target, [-1])
                            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logit,labels=target)
                            losses.append(loss)
                    self.cost = tf.reduce_sum(losses) / batch_size
                    self._final_state = state
                    self.logits = logits
                    scope.reuse_variables()

            if not train:
                    return

            with tf.device('/gpu:0'), tf.name_scope("Train"), tf.variable_scope(tf.get_variable_scope(), reuse=False):
                    train_variables = tf.trainable_variables()
                    gradients, _ = tf.clip_by_global_norm(tf.gradients(self.cost, train_variables),self.max_gradient)
                    optimizer = tf.train.AdamOptimizer(self.learning_rate)
                    self.training = optimizer.apply_gradients(zip(gradients, train_variables))
                    tf.get_variable_scope().reuse_variables()

tensorflow - Tensorflow RNN：每个时代的困惑度保持不变

0 回答 0

Related

Reference