tensorflow - tf.layers.batch_normalization 在 sess.run() (1.5.0-dev20171031) 期间冻结

Question

图形构建阶段顺利通过，但程序在第一个时期的第一个小批量的 sess.run() 期间冻结（不读取硬盘驱动器，不更改内存，不......）。如果我删除此层或将其替换为 tf.contrib.layers.layer_norm，则程序运行没有问题。

x我传递给 tf.layers.batch_normalization的张量 ( ) 具有形状[#batches, 200]。我使用大多数默认值，但关闭了中心和比例。

x_BN = tf.layers.batch_normalization(
    x,
    axis=-1, 
    momentum=0.99,
    epsilon=1e-10, #0.001,
    center=False, #True,
    scale=False, #True,
    beta_initializer=tf.zeros_initializer(),
    gamma_initializer=tf.ones_initializer(),
    moving_mean_initializer=tf.zeros_initializer(),
    moving_variance_initializer=tf.ones_initializer(),
    beta_regularizer=None,
    gamma_regularizer=None,
    beta_constraint=None,
    gamma_constraint=None,
    training=Flg_training, #False,
    trainable=True,
    name=None,
    reuse=None,
    renorm=False,
    renorm_clipping=None,
    renorm_momentum=0.99,
    fused=False,
    virtual_batch_size=None,
    adjustment=None
    )

我使用的 tensorflow 版本是 tf-nightly-gpu（1.5.0-dev20171031 或 1.5.0-dev20171023）。有没有人遇到过类似的问题？

更新

当输入tf.layers.batch_normalizationis from时会发生这种情况tf.nn.bidirectional_dynamic_rnn，请查看简化代码以重现此问题：

import tensorflow as tf
import numpy as np

starter_learning_rate = 0.001
decay_steps = 100
decay_rate = 0.96
num_RNN_layers = 3
LSTM_CELL_SIZE = 100
keep_prob = 0.95

with tf.name_scope('Inputs'):
    x = tf.placeholder(dtype=tf.float32, shape=[None, 200])
    y = tf.placeholder(dtype=tf.float32, shape=[None, 200])
    length = tf.placeholder(dtype=tf.int32, shape=[None])
    Flg_training = tf.placeholder(dtype=tf.bool, shape=[])
    
    x_1 = tf.expand_dims(x, -1)

with tf.name_scope('BiLSTM'):
    dropcells = []
    for iiLyr in list(range(num_RNN_layers)):
        cell_iiLyr = tf.nn.rnn_cell.LSTMCell(num_units=LSTM_CELL_SIZE, state_is_tuple=True)
        dropcells.append(tf.nn.rnn_cell.DropoutWrapper(cell=cell_iiLyr, output_keep_prob=keep_prob))  #,, input_keep_prob=self.keep_prob input_keep_prob=1.0, seed=None
    
    MultiLyr_cell = tf.nn.rnn_cell.MultiRNNCell(cells=dropcells, state_is_tuple=True)

    outputs, states  = tf.nn.bidirectional_dynamic_rnn(
        cell_fw=MultiLyr_cell, 
        cell_bw=MultiLyr_cell, 
        dtype=tf.float32,
        sequence_length=length, #tf_b_lens 
        inputs=x_1, #stacked_RefPts_desc, #tf_b_VCCs_AMs_BN1
        scope = "BiLSTM"
        )

    #output_fw, output_bw = outputs
    states_fw, states_bw = states
    
    c_fw_lstLyr, h_fw_lstLyr = states_fw[-1]
    c_bw_lstLyr, h_bw_lstLyr = states_bw[-1]
    
    states_concat1 = tf.concat([h_fw_lstLyr, h_bw_lstLyr], axis = 1, name = 'states_concat')

with tf.name_scope("cs_BN1"):
    x_BN = tf.layers.batch_normalization(
        states_concat1,
        axis=-1, # axis that should be normalized (typically the features axis, in this case the concated states or hidden vectors)
        momentum=0.99,
        epsilon=1e-10, #0.001,
        center=False, #True,
        scale=False, #True,
        beta_initializer=tf.zeros_initializer(),
        gamma_initializer=tf.ones_initializer(),
        moving_mean_initializer=tf.zeros_initializer(),
        moving_variance_initializer=tf.ones_initializer(),
        beta_regularizer=None,
        gamma_regularizer=None,
        beta_constraint=None,
        gamma_constraint=None,
        training=Flg_training, #False,
        trainable=True,
        name="test_BN", #None,
        reuse=None,
        renorm=False,
        renorm_clipping=None,
        renorm_momentum=0.99,
        fused=False,
        virtual_batch_size=None,
        adjustment=None
        )

with tf.name_scope("Regression"):
    a = tf.get_variable("a", shape=[1], dtype=tf.float32, initializer=tf.constant_initializer(1.0))
    b = tf.get_variable("b", shape=[1], dtype=tf.float32, initializer=tf.constant_initializer(0.0))
    
with tf.name_scope("Prediction"):
    y_pred = tf.multiply(x_BN, a) + b

with tf.name_scope('Loss'):
    losses = tf.losses.mean_squared_error(y, y_pred, reduction=tf.losses.Reduction.NONE)
    mean_loss = tf.reduce_mean(losses)

with tf.name_scope('Training'):
    global_step = tf.Variable(0, trainable=False)
    learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
                                               decay_steps, decay_rate, staircase=True) 
    
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    
    with tf.control_dependencies(update_ops):
        train_step = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(losses, global_step=global_step)

    
#x_mean = tf.reduce_mean(x_BN, axis=0)

sess = tf.InteractiveSession()
train_writer = tf.summary.FileWriter("G:\\Surface_Ozone\\Temp\\", sess.graph)   
sess.run(tf.global_variables_initializer())

for ii in list(range(2000)):
    x_in = (np.random.rand(20, 200))
    y_in = x_in * 1.5 + 3.0
    length_in = np.full([20], 200, dtype=np.int32)

    _, mean_loss_val, a_val, b_val = sess.run([train_step, mean_loss, a, b], feed_dict={
        x: x_in, 
        Flg_training: True, 
        y: y_in,
        length: length_in
        })
    
    if (ii < 50):
        print("step {}: {} | a: {} | b: {}".format(ii, mean_loss_val, a_val, b_val))
    else:
        if (ii % 100 == 0):
            print("step {}: {} | a: {} | b: {}".format(ii, mean_loss_val, a_val, b_val))
        
print("Normal End.")

tensorflow - tf.layers.batch_normalization 在 sess.run() (1.5.0-dev20171031) 期间冻结

更新

0 回答 0

Related

Reference