我在 Tensorflow 中开发了一个自回归时间卷积网络。但是,当我在 Temporal Block 中添加一个概率层时,它会停止全批次学习。在小批量中,损失提高了,准确度也提高了,但测试集中的准确度没有改变。
导致这种情况的问题是这行代码:
x = tfp.layers.DistributionLambda(lambda t: tfd.Normal(loc=t, scale=.1))(x)
代码如下:
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import tensorflow_probability as tfp
dataframe = pd.read_csv('international-airline-passengers.csv', usecols=[1], engine='python', skipfooter=3)
dataset = dataframe.values
dataset = dataset.astype('float32')
def norm(x):
return (x-np.min(x))/(np.max(x)-np.min(x))
#dataset=norm(dataset)
look_back=3
np.random.seed(7)
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
print(len(train), len(test))
def create_dataset(dataset, look_back=look_back):
dataX, dataY = [], []
for i in range(len(dataset)-look_back):
a = dataset[i:(i+look_back), 0]
dataX.append(a)
dataY.append(dataset[i + look_back, 0])
return np.array(dataX), np.array(dataY)
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)
trainX
trainY = trainY.reshape(len(trainY), 1)
testY = testY.reshape(len(testY), 1)
trainY
X0=trainX
Y0=trainY
tfd = tfp.distributions
class TemporalConvNet(tf.layers.Layer):
def __init__(self, num_channels, kernel_size=2, dropout=0.2,
trainable=True, name=None, dtype=None,
activity_regularizer=None, **kwargs):
super(TemporalConvNet, self).__init__(
trainable=trainable, dtype=dtype,
activity_regularizer=activity_regularizer,
name=name, **kwargs
)
self.layers = []
num_levels = len(num_channels)
for i in range(num_levels):
dilation_size = 2 ** i
out_channels = num_channels[i]
self.layers.append(
TemporalBlock(out_channels, kernel_size, strides=1, dilation_rate=dilation_size,
dropout=dropout, name="tblock_{}".format(i))
)
def call(self, inputs, training=True):
outputs = inputs
for layer in self.layers:
outputs = layer(outputs, training=training)
return outputs
learning_rate = 0.001
display_step = 10
num_input = 10
num_hidden = 20
num_classes = 1
dropout = 0.1
kernel_size = 8
levels = 6
class CausalConv1D(tf.layers.Conv1D):
def __init__(self, filters,
kernel_size,
strides=1,
dilation_rate=1,
activation=None,
use_bias=True,
kernel_initializer=None,
bias_initializer=tf.zeros_initializer(),
kernel_regularizer=None,
bias_regularizer=None,
activity_regularizer=None,
kernel_constraint=None,
bias_constraint=None,
trainable=True,
name=None,
**kwargs):
super(CausalConv1D, self).__init__(
filters=filters,
kernel_size=kernel_size,
strides=strides,
padding='valid',
data_format='channels_last',
dilation_rate=dilation_rate,
activation=activation,
use_bias=use_bias,
kernel_initializer=kernel_initializer,
bias_initializer=bias_initializer,
kernel_regularizer=kernel_regularizer,
bias_regularizer=bias_regularizer,
activity_regularizer=activity_regularizer,
kernel_constraint=kernel_constraint,
bias_constraint=bias_constraint,
trainable=trainable,
name=name, **kwargs
)
def call(self, inputs):
padding = (self.kernel_size[0] - 1) * self.dilation_rate[0]
inputs = tf.pad(inputs, tf.constant([(0, 0,), (1, 0), (0, 0)]) * padding)
return super(CausalConv1D, self).call(inputs)
class TemporalBlock(tf.layers.Layer):
def __init__(self, n_outputs, kernel_size, strides, dilation_rate, dropout=0.2,
trainable=True, name=None, dtype=None,
activity_regularizer=None, **kwargs):
super(TemporalBlock, self).__init__(
trainable=trainable, dtype=dtype,
activity_regularizer=activity_regularizer,
name=name, **kwargs
)
self.dropout = dropout
self.n_outputs = n_outputs
self.conv1 = CausalConv1D(
n_outputs, kernel_size, strides=strides,
dilation_rate=dilation_rate, activation=tf.nn.relu,
name="conv1")
self.conv2 = CausalConv1D(
n_outputs, kernel_size, strides=strides,
dilation_rate=dilation_rate, activation=tf.nn.relu,
name="conv2")
self.down_sample = None
def build(self, input_shape):
channel_dim = 2
self.dropout1 = tf.layers.Dropout(self.dropout, [tf.constant(1), tf.constant(1), tf.constant(self.n_outputs)])
self.dropout2 = tf.layers.Dropout(self.dropout, [tf.constant(1), tf.constant(1), tf.constant(self.n_outputs)])
if input_shape[channel_dim] != self.n_outputs:
self.down_sample = tf.layers.Dense(self.n_outputs, activation=None)
def call(self, inputs, training=True):
x = self.conv1(inputs)
x = tf.contrib.layers.layer_norm(x)
x = self.dropout1(x, training=training)
x = self.conv2(x)
x = tf.contrib.layers.layer_norm(x)
x = self.dropout2(x, training=training)
x = tfp.layers.DistributionLambda(lambda t: tfd.Normal(loc=t, scale=.1))(x)
if self.down_sample is not None:
inputs = self.down_sample(inputs)
return tf.nn.relu(x + inputs)
tf.reset_default_graph()
graph = tf.Graph()
with graph.as_default():
tf.set_random_seed(2)
X = tf.placeholder("float", [None, look_back,1])
Y = tf.placeholder("float", [None, num_classes])
is_training = tf.placeholder("bool")
logits = tf.layers.dense(
TemporalConvNet([num_hidden] * levels, kernel_size, dropout)(
X, training=is_training)[:, -1, :],
num_classes, activation=None,
kernel_initializer=tf.glorot_uniform_initializer()
)
#mm,_=tf.nn.moments(tf.nn.relu(logits),axes=[1])
prediction=tf.nn.relu(logits)
#prediction2 = tf.reshape(tf.cast(mm,tf.float32),[-1,1])
loss_op = tf.reduce_mean(tf.losses.mean_squared_error(
labels=Y,predictions=prediction))
accuracy=1-tf.sqrt(loss_op)
optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
train_op = optimizer.minimize(loss_op)
saver = tf.train.Saver()
print("All parameters:", np.sum([np.product([xi.value for xi in x.get_shape()]) for x in tf.global_variables()]))
print("Trainable parameters:", np.sum([np.product([xi.value for xi in x.get_shape()]) for x in tf.trainable_variables()]))
def next_batch(num, data, labels):
idx = np.arange(0 , len(data))
np.random.shuffle(idx)
idx = idx[:num]
data_shuffle = [data[ i] for i in idx]
labels_shuffle = [labels[ i] for i in idx]
return np.asarray(data_shuffle).astype(np.float32), np.asarray(labels_shuffle).astype(np.float32)
log_dir = "/home/rubens/Documents/Dados/"
tb_writer = tf.summary.FileWriter(log_dir, graph)
config = tf.ConfigProto()
config.gpu_options.allow_growth = False
config.gpu_options.per_process_gpu_memory_fraction = 0.7
best_val_acc = 0.7
training_epochs = 6000
batch_size = X0.shape[0]
X0=X0.reshape(-1,look_back,1)
testX=testX.reshape(-1,look_back,1)
with tf.Session(graph=graph, config=config) as sess:
init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
sess.run(init)
for step in range(1, training_epochs+1):
Xt, Yt = next_batch(batch_size, X0, Y0)
batch_x, batch_y = Xt,Yt
sess.run(train_op, feed_dict={X: batch_x, Y: batch_y, is_training: True})
if step % display_step == 0 or step == 1:
loss, acc = sess.run([loss_op, accuracy], feed_dict={
X: batch_x, Y: batch_y, is_training: False})
test_data = testX
test_label = testY
val_acc = sess.run(accuracy, feed_dict={X: test_data, Y: test_label, is_training: False})
print("Step " + str(step) + ", Minibatch Loss= " + \
"{:.4f}".format(loss) + ", Training Accuracy= " + \
"{:.4f}".format(acc) + ", Test Accuracy= " + \
"{:.4f}".format(val_acc))
print(acc)
if val_acc > best_val_acc:
best_val_acc = val_acc
save_path = saver.save(sess, "/home/rubens/Documents/Dados/model.ckpt")
print("Model saved in path: %s" % save_path)
pred00 = sess.run([prediction],feed_dict={X: test_data, is_training: False})
全批次的输出训练示例:
All parameters: 108425.0
Trainable parameters: 36141
Step 1, Minibatch Loss= 93.8851, Training Accuracy= -8.6894, Test Accuracy= -7.7621
-8.689434
Step 10, Minibatch Loss= 0.1591, Training Accuracy= 0.6011, Test Accuracy= 0.3290
0.6011038
Step 20, Minibatch Loss= 0.1023, Training Accuracy= 0.6801, Test Accuracy= 0.3290
0.6800898
Step 30, Minibatch Loss= 0.1023, Training Accuracy= 0.6801, Test Accuracy= 0.3290
0.6800898
Step 40, Minibatch Loss= 0.1023, Training Accuracy= 0.6801, Test Accuracy= 0.3290
0.6800898
Step 50, Minibatch Loss= 0.1023, Training Accuracy= 0.6801, Test Accuracy= 0.3290
0.6800898
Step 60, Minibatch Loss= 0.1023, Training Accuracy= 0.6801, Test Accuracy= 0.3290
0.6800898
Step 70, Minibatch Loss= 0.1023, Training Accuracy= 0.6801, Test Accuracy= 0.3290
0.6800898
Step 80, Minibatch Loss= 0.1023, Training Accuracy= 0.6801, Test Accuracy= 0.3290
0.6800898
Step 90, Minibatch Loss= 0.1023, Training Accuracy= 0.6801, Test Accuracy= 0.3290
0.6800898
Step 100, Minibatch Loss= 0.1023, Training Accuracy= 0.6801, Test Accuracy= 0.3290
0.6800898
Step 110, Minibatch Loss= 0.1023, Training Accuracy= 0.6801, Test Accuracy= 0.3290
0.6800898
Step 120, Minibatch Loss= 0.1023, Training Accuracy= 0.6801, Test Accuracy= 0.3290
0.6800898
Step 130, Minibatch Loss= 0.1023, Training Accuracy= 0.6801, Test Accuracy= 0.3290
0.6800898
Step 140, Minibatch Loss= 0.1023, Training Accuracy= 0.6801, Test Accuracy= 0.3290
0.6800898
Step 150, Minibatch Loss= 0.1023, Training Accuracy= 0.6801, Test Accuracy= 0.3290
0.6800898
Step 160, Minibatch Loss= 0.1023, Training Accuracy= 0.6801, Test Accuracy= 0.3290
0.6800898
Step 170, Minibatch Loss= 0.1023, Training Accuracy= 0.6801, Test Accuracy= 0.3290
0.6800898
Step 180, Minibatch Loss= 0.1023, Training Accuracy= 0.6801, Test Accuracy= 0.3290
0.6800898
Step 190, Minibatch Loss= 0.1023, Training Accuracy= 0.6801, Test Accuracy= 0.3290
0.6800898
Step 200, Minibatch Loss= 0.1023, Training Accuracy= 0.6801, Test Accuracy= 0.3290
0.6800898
使用小批量的输出训练示例:
Step 1, Minibatch Loss= 97.8395, Training Accuracy= -8.8914, Test Accuracy= -7.7148
-8.891384
Step 10, Minibatch Loss= 0.0639, Training Accuracy= 0.7473, Test Accuracy= 0.3290
0.747253
Step 20, Minibatch Loss= 0.0798, Training Accuracy= 0.7175, Test Accuracy= 0.3290
0.71748877
Step 30, Minibatch Loss= 0.1120, Training Accuracy= 0.6653, Test Accuracy= 0.3290
0.66534567
Step 40, Minibatch Loss= 0.0831, Training Accuracy= 0.7117, Test Accuracy= 0.3290
0.7116946
Step 50, Minibatch Loss= 0.1119, Training Accuracy= 0.6654, Test Accuracy= 0.3290
0.66541755
Step 60, Minibatch Loss= 0.0758, Training Accuracy= 0.7246, Test Accuracy= 0.3290
0.72463006
Step 70, Minibatch Loss= 0.1035, Training Accuracy= 0.6783, Test Accuracy= 0.3290
0.67830944
Step 80, Minibatch Loss= 0.1674, Training Accuracy= 0.5908, Test Accuracy= 0.3290
0.59082925
Step 90, Minibatch Loss= 0.0709, Training Accuracy= 0.7337, Test Accuracy= 0.3290
0.7337192
Step 100, Minibatch Loss= 0.1566, Training Accuracy= 0.6043, Test Accuracy= 0.3290
0.60427284
Step 110, Minibatch Loss= 0.0794, Training Accuracy= 0.7182, Test Accuracy= 0.3290
0.7182363
Step 120, Minibatch Loss= 0.1337, Training Accuracy= 0.6343, Test Accuracy= 0.3290
0.6343092
Step 130, Minibatch Loss= 0.0848, Training Accuracy= 0.7088, Test Accuracy= 0.3290
0.7087995
Step 140, Minibatch Loss= 0.0781, Training Accuracy= 0.7205, Test Accuracy= 0.3290
0.7205193
Step 150, Minibatch Loss= 0.1320, Training Accuracy= 0.6366, Test Accuracy= 0.3290
0.63664067
Step 160, Minibatch Loss= 0.1360, Training Accuracy= 0.6313, Test Accuracy= 0.3290
0.63125527
Step 170, Minibatch Loss= 0.0663, Training Accuracy= 0.7424, Test Accuracy= 0.3290
0.74244356
Step 180, Minibatch Loss= 0.1445, Training Accuracy= 0.6199, Test Accuracy= 0.3290
0.6198952
Step 190, Minibatch Loss= 0.1157, Training Accuracy= 0.6598, Test Accuracy= 0.3290
0.65980613
Step 200, Minibatch Loss= 0.0960, Training Accuracy= 0.6902, Test Accuracy= 0.3290
0.6902418
我添加了归一化,降低了学习率(因为它似乎在梯度中存在一个平台),改变了批量大小、激活函数、隐藏层但没有成功。
关于如何解决这个问题的任何想法?
数据可在此处获得
我正在使用 TensorFlow 1.14