我试图从https://arxiv.org/pdf/1706.02515实现新的 SELU 激活功能。有关更多信息,这里是我的代码:
import tensorflow as tf
import numpy as np
from PIL import Image
import os
from keras.activations import elu
batch_size = 32
def weight_variable(kernal_shape):
weights = tf.get_variable(name='weights', shape=kernal_shape, dtype=tf.float32, trainable=True,
initializer=tf.truncated_normal_initializer(stddev=0.02))
return weights
def bias_variable(shape):
initial = tf.constant(0.0, shape=shape)
return tf.Variable(initial)
def selu(x):
alpha = 1.6732632423543772848170429916717
scale = 1.0507009873554804934193349852946
#return scale * tf.where(x >= 0.0, x, alpha * tf.exp(x) - alpha)
return scale * elu(x, alpha)
def conv_layer(x, w_shape, b_shape, padding='SAME'):
W = weight_variable(w_shape)
tf.summary.histogram("weights", W)
b = bias_variable(b_shape)
tf.summary.histogram("biases", b)
# Note that I used a stride of 2 on purpose in order not to use max pool layer.
activations = selu(tf.nn.conv2d(x, W, strides=[1, 2, 2, 1], padding=padding) + b)
tf.summary.histogram(activations.name, activations)
W1 = tf.shape(x)[1]
W2 = tf.shape(activations)[1]
F = w_shape[0]
P = tf.divide(tf.add(tf.subtract(tf.multiply(tf.subtract(W2, 1), 2), W1), F), 2)
return activations, P
def deconv_layer(x, w_shape, b_shape, padding="SAME"):
W = weight_variable(w_shape)
tf.summary.histogram("weights", W)
b = bias_variable(b_shape)
tf.summary.histogram('biases', b)
x_shape = tf.shape(x)
out_shape = tf.stack([x_shape[0], x_shape[1] * 2, x_shape[2] * 2, w_shape[2]])
# Note that I have used a stride of 2 since I used a stride of 2 in conv layer.
transposed_activations = tf.nn.conv2d_transpose(x, W, out_shape, [1, 2, 2, 1], padding=padding) + b
tf.summary.histogram(transposed_activations.name, transposed_activations)
return transposed_activations
tfrecords_filename_seq = ["P16_db.tfrecords"]
filename_queue = tf.train.string_input_producer(tfrecords_filename_seq, num_epochs=None, shuffle=False, name='queue')
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
# Defaults are not specified since both keys are required.
features={
'height': tf.FixedLenFeature([], tf.int64),
'width': tf.FixedLenFeature([], tf.int64),
'image_raw': tf.FixedLenFeature([], tf.string),
'annotation_raw': tf.FixedLenFeature([], tf.string)
})
# This is how we create one example, that is, extract one example from the database.
image = tf.decode_raw(features['image_raw'], tf.uint8)
# The height and the weights are used to
height = tf.cast(features['height'], tf.int32)
width = tf.cast(features['width'], tf.int32)
# The image is reshaped since when stored as a binary format, it is flattened. Therefore, we need the
# height and the weight to restore the original image back.
image = tf.reshape(image, [height, width, 3])
image = tf.cast([image], tf.float32)
with tf.variable_scope('conv1'):
conv1, P1 = conv_layer(image, [3, 3, 3, 32], [32]) # image size: [56, 56]
with tf.variable_scope('conv2'):
conv2, P2 = conv_layer(conv1, [3, 3, 32, 64], [64]) # image size: [28, 28]
with tf.variable_scope('conv3'):
conv3, P3 = conv_layer(conv2, [3, 3, 64, 128], [128]) # image size: [14, 14]
with tf.variable_scope('conv4'):
conv4, P4 = conv_layer(conv3, [3, 3, 128, 256], [256]) # image size: [7, 7]
conv4_reshaped = tf.reshape(conv4, [-1, 7 * 7 * 256], name='conv4_reshaped')
w_c = tf.Variable(tf.truncated_normal([7 * 7 * 256, 100], stddev=0.1), name='weight_fc')
b_c = tf.Variable(tf.constant(0.1, shape=[100]), name='biases_fc')
tf.summary.histogram('weights_c', w_c)
tf.summary.histogram('biases_c', b_c)
with tf.variable_scope('z'):
z = selu(tf.nn.bias_add(tf.matmul(conv4_reshaped, w_c), b_c))
tf.summary.histogram('features_z', z)
w_dc = tf.Variable(tf.truncated_normal([100, 7 * 7 * 256], stddev=0.1), name='weights_dc')
b_dc = tf.Variable(tf.constant(0.1, shape=[7 * 7 * 256]), name='biases_dc')
tf.summary.histogram('weights_dc', w_dc)
tf.summary.histogram('biases_dc', b_dc)
with tf.variable_scope('deconv4'):
deconv4 = selu(tf.nn.bias_add(tf.matmul(z, w_dc), b_dc))
deconv4_reshaped = tf.reshape(deconv4, [-1, 7, 7, 256], name='deconv4_reshaped')
with tf.variable_scope('deconv3'):
deconv3 = deconv_layer(deconv4_reshaped, [3, 3, 128, 256], [128])
with tf.variable_scope('deconv2'):
deconv2 = deconv_layer(deconv3, [3, 3, 64, 128], [64])
with tf.variable_scope('deconv1'):
deconv1 = deconv_layer(deconv2, [3, 3, 32, 64], [32])
with tf.variable_scope('deconv_image'):
deconv_image = deconv_layer(deconv1, [3, 3, 3, 32], [3])
with tf.name_scope('loss'):
loss = tf.reduce_mean(tf.abs(deconv_image - image))
tf.summary.scalar('loss', loss)
with tf.name_scope('optimizer'):
optimizer = tf.train.AdamOptimizer(0.0001).minimize(loss)
init_op = tf.group(tf.local_variables_initializer(),
tf.global_variables_initializer())
saver = tf.train.Saver()
model_path = 'C:/Users/iayou005/Documents/tensorboard_logs/Graph_model/ckpt'
# Here is the session...
with tf.Session() as sess:
train_writer = tf.summary.FileWriter('C:/Users/iayou005/Documents/tensorboard_logs/New_Runs/DeconvNet', sess.graph)
merged = tf.summary.merge_all()
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
step = 0
sess.run(init_op)
# Note that the last name "Graph_model" is the name of the saved checkpoints file => the ckpt is saved
# under tensorboard_logs.
ckpt = tf.train.get_checkpoint_state(
os.path.dirname('C:/Users/iayou005/Documents/tensorboard_logs/Graph_model/ckpt'))
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess, ckpt.model_checkpoint_path)
print('checkpoints are saved!!!')
else:
print('No stored checkpoints')
while step < 100000:
if step % 1000 == 0:
img = sess.run([deconv_image])
img2 = Image.fromarray(np.uint8(img[0][0]))
img2.save('Reconstructed' + str(step) + '.png', 'png')
__, loss_s, summary = sess.run([optimizer, loss, merged])
if step % 100 == 0:
train_writer.add_summary(summary, step)
print(loss_s)
step += 1
save_path = saver.save(sess, model_path)
coord.request_stop()
coord.join(threads)
train_writer.close()
因此,在不使用 Keras 的情况下,我一直得到一个 NAN:
InvalidArgumentError (see above for traceback): Nan in summary histogram for: conv1/weights_1
[[Node: conv1/weights_1 = HistogramSummary[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](conv1/weights_1/tag, conv1/weights/read/_61)]]
我想知道获得 NAN 的原因。
任何帮助深表感谢!!