tensorflow - Keras训练部分模型问题（关于GAN模型）

Question

我在使用 keras 实现 GAN 模型时遇到了一个奇怪的问题。

使用 GAN，我们需要先构建 G 和 D，然后添加一个新的序列模型 (GAN)，然后依次添加 (G)、add(D)。

当我这样做时，Keras 似乎反向传播回 G（通过 GAN 模型）D.train_on_batch，我得到了一个InvalidArgumentError: You must feed a value for placeholder tensor 'dense_input_1' with dtype float.

如果我删除GAN model（最后堆叠的 G 然后 D 顺序模型），它会d_loss正确计算。

我的环境是：

Ubuntu 16.04
喀拉拉邦 1.2.2
张量流GPU 1.0.0
keras 配置：{ "backend": "tensorflow", "image_dim_ordering": "tf", "epsilon": 1e-07, "floatx": "float32" }

我知道很多人已经成功地用 keras 实现了 GAN，所以我想知道我哪里错了。

import numpy as np
import keras.layers as kl
import keras.models as km
import keras.optimizers as ko
from keras.datasets import mnist

batch_size = 16
lr = 0.0001

def noise_gen(batch_size, z_dim):
    noise = np.zeros((batch_size, z_dim), dtype=np.float32)
    for i in range(batch_size):
        noise[i, :] = np.random.uniform(-1, 1, z_dim)
    return noise

# --------------------Generator Model--------------------

model = km.Sequential()

model.add(kl.Dense(input_dim=100, output_dim=1024))
model.add(kl.Activation('relu'))

model.add(kl.Dense(7*7*128))
model.add(kl.BatchNormalization())
model.add(kl.Activation('relu'))
model.add(kl.Reshape((7, 7, 128), input_shape=(7*7*128,)))

model.add(kl.Deconvolution2D(64, 5, 5, (None, 14, 14, 64), subsample=(2, 2),
    input_shape=(7, 7, 128), border_mode='same'))
model.add(kl.BatchNormalization())
model.add(kl.Activation('relu'))

model.add(kl.Deconvolution2D(1, 5, 5, (None, 28, 28, 1), subsample=(2, 2),
    input_shape=(14, 14, 64), border_mode='same'))

G = model
G.compile(  loss='binary_crossentropy', optimizer=ko.SGD(lr=lr, momentum=0.9, nesterov=True))

# --------------------Discriminator Model--------------------

model = km.Sequential()

model.add(kl.Convolution2D( 64, 5, 5, subsample=(2, 2), input_shape=(28, 28, 1)))
model.add(kl.LeakyReLU(alpha=0.2))

model.add(kl.Convolution2D(128, 5, 5, subsample=(2, 2)))
model.add(kl.BatchNormalization())
model.add(kl.LeakyReLU(alpha=0.2))

model.add(kl.Flatten())
model.add(kl.Dense(1))
model.add(kl.Activation('sigmoid'))

D = model
D.compile(  loss='binary_crossentropy', optimizer=ko.SGD(lr=lr, momentum=0.9, nesterov=True))

# --------------------GAN Model--------------------

model = km.Sequential()
model.add(G)
D.trainable = False  # Is this necessary?
model.add(D)
GAN = model
GAN.compile(loss='binary_crossentropy', optimizer=ko.SGD(lr=lr, momentum=0.9, nesterov=True))

# --------------------Main Code--------------------
(X, _), _ = mnist.load_data()
X = X / 255.
X = X[:, :, :, np.newaxis]

X_batch = X[0:batch_size, :]
Z1_batch = noise_gen(batch_size, 100)
Z2_batch = noise_gen(batch_size, 100)

fake_batch = G.predict(Z1_batch)
real_batch = X_batch
print('--------------------Fake Image Generated!--------------------')

combined_X_batch = np.concatenate((real_batch, fake_batch))
combined_y_batch = np.concatenate((np.ones((batch_size, 1)), np.zeros((batch_size, 1))))
print('real_batch={}, fake_batch={}'.format(real_batch.shape, fake_batch.shape))

D.trainable = True
d_loss = D.train_on_batch(combined_X_batch, combined_y_batch)
print('--------------------Discriminator trained!--------------------')
print(d_loss)

D.trainable = False
g_loss = GAN.train_on_batch(Z2_batch, np.ones((batch_size, 1)))
print('--------------------GAN trained!--------------------')
print(g_loss)

错误信息：

W tensorflow/core/framework/op_kernel.cc:993] Invalid argument: You must feed a value for placeholder tensor 'dense_input_1' with dtype float
     [[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
W tensorflow/core/framework/op_kernel.cc:993] Invalid argument: You must feed a value for placeholder tensor 'dense_input_1' with dtype float
     [[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
W tensorflow/core/framework/op_kernel.cc:993] Invalid argument: You must feed a value for placeholder tensor 'dense_input_1' with dtype float
     [[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
W tensorflow/core/framework/op_kernel.cc:993] Invalid argument: You must feed a value for placeholder tensor 'dense_input_1' with dtype float
     [[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
W tensorflow/core/framework/op_kernel.cc:993] Invalid argument: You must feed a value for placeholder tensor 'dense_input_1' with dtype float
     [[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
W tensorflow/core/framework/op_kernel.cc:993] Invalid argument: You must feed a value for placeholder tensor 'dense_input_1' with dtype float
     [[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
Traceback (most recent call last):
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 1022, in _do_call
    return fn(*args)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 1004, in _run_fn
    status, run_metadata)
  File "/usr/lib/python3.5/contextlib.py", line 66, in __exit__
    next(self.gen)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/errors_impl.py", line 469, in raise_exception_on_not_ok_status
    pywrap_tensorflow.TF_GetCode(status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must feed a value for placeholder tensor 'dense_input_1' with dtype float
     [[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
     [[Node: mul_5/_77 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_1018_mul_5", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "./gen.py", line 84, in <module>
    d_loss = D.train_on_batch(combined_X_batch, combined_y_batch)
  File "/usr/local/lib/python3.5/dist-packages/keras/models.py", line 766, in train_on_batch
    class_weight=class_weight)
  File "/usr/local/lib/python3.5/dist-packages/keras/engine/training.py", line 1320, in train_on_batch
    outputs = self.train_function(ins)
  File "/usr/local/lib/python3.5/dist-packages/keras/backend/tensorflow_backend.py", line 1943, in __call__
    feed_dict=feed_dict)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 767, in run
    run_metadata_ptr)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 965, in _run
    feed_dict_string, options, run_metadata)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 1015, in _do_run
    target_list, options, run_metadata)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 1035, in _do_call
    raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must feed a value for placeholder tensor 'dense_input_1' with dtype float
     [[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
     [[Node: mul_5/_77 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_1018_mul_5", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]

Caused by op 'dense_input_1', defined at:
  File "./gen.py", line 20, in <module>
    model.add(kl.Dense(input_dim=100, output_dim=1024))
  File "/usr/local/lib/python3.5/dist-packages/keras/models.py", line 299, in add
    layer.create_input_layer(batch_input_shape, input_dtype)
  File "/usr/local/lib/python3.5/dist-packages/keras/engine/topology.py", line 397, in create_input_layer
    dtype=input_dtype, name=name)
  File "/usr/local/lib/python3.5/dist-packages/keras/engine/topology.py", line 1198, in Input
    input_tensor=tensor)
  File "/usr/local/lib/python3.5/dist-packages/keras/engine/topology.py", line 1116, in __init__
    name=self.name)
  File "/usr/local/lib/python3.5/dist-packages/keras/backend/tensorflow_backend.py", line 321, in placeholder
    x = tf.placeholder(dtype, shape=shape, name=name)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/array_ops.py", line 1520, in placeholder
    name=name)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/gen_array_ops.py", line 2149, in _placeholder
    name=name)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py", line 763, in apply_op
    op_def=op_def)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 2395, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 1264, in __init__
    self._traceback = _extract_stack()

InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'dense_input_1' with dtype float
     [[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
     [[Node: mul_5/_77 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_1018_mul_5", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]

score 1 · Accepted Answer

努力了好久，终于知道是Discriminator的BatchNormalization层出了问题。

model.add(kl.BatchNormalization())如果您只是在鉴别器中注释掉。它会正常工作的。

但是，正如@NassimBen 所示，功能 API 不会导致任何问题。

import numpy as np
import keras.layers as kl
import keras.models as km
import keras.optimizers as ko
from keras.datasets import mnist

batch_size = 16
lr = 0.0001

def noise_gen(batch_size, z_dim):
    noise = np.zeros((batch_size, z_dim), dtype=np.float32)
    for i in range(batch_size):
        noise[i, :] = np.random.uniform(-1, 1, z_dim)
    return noise

# --------------------Generator Model--------------------

model = km.Sequential()

model.add(kl.Dense(input_dim=100, output_dim=1024))
model.add(kl.Activation('relu'))

model.add(kl.Dense(7*7*128))
model.add(kl.BatchNormalization())
model.add(kl.Activation('relu'))
model.add(kl.Reshape((7, 7, 128), input_shape=(7*7*128,)))

model.add(kl.Deconvolution2D(64, 5, 5, (None, 14, 14, 64), subsample=(2, 2),
                             input_shape=(7, 7, 128), border_mode='same'))
model.add(kl.BatchNormalization())
model.add(kl.Activation('relu'))

model.add(kl.Deconvolution2D(1, 5, 5, (None, 28, 28, 1), subsample=(2, 2),
                             input_shape=(14, 14, 64), border_mode='same'))

G = model
G.compile(  loss='binary_crossentropy', optimizer=ko.SGD(lr=lr, momentum=0.9, nesterov=True))

# --------------------Discriminator Model--------------------

model = km.Sequential()

model.add(kl.Convolution2D( 64, 5, 5, subsample=(2, 2), input_shape=(28, 28, 1)))
model.add(kl.LeakyReLU(alpha=0.2))

model.add(kl.Convolution2D(128, 5, 5, subsample=(2, 2)))
# model.add(kl.BatchNormalization())
model.add(kl.LeakyReLU(alpha=0.2))

model.add(kl.Flatten())
model.add(kl.Dense(1))
model.add(kl.Activation('sigmoid'))

D = model
D.compile(  loss='binary_crossentropy', optimizer=ko.SGD(lr=lr, momentum=0.9, nesterov=True))

# --------------------GAN Model--------------------

model = km.Sequential()
model.add(G)
D.trainable = False  # Is this necessary?
model.add(D)
GAN = model
GAN.compile(loss='binary_crossentropy', optimizer=ko.SGD(lr=lr, momentum=0.9, nesterov=True))

# --------------------Main Code--------------------
(X, _), _ = mnist.load_data()
X = X / 255.
X = X[:, :, :, np.newaxis]

X_batch = X[0:batch_size, :]
Z1_batch = noise_gen(batch_size, 100)
Z2_batch = noise_gen(batch_size, 100)

fake_batch = G.predict(Z1_batch)
real_batch = X_batch
print('--------------------Fake Image Generated!--------------------')

combined_X_batch = np.concatenate((real_batch, fake_batch))
combined_y_batch = np.concatenate((np.ones((batch_size, 1)), np.zeros((batch_size, 1))))
print('real_batch={}, fake_batch={}'.format(real_batch.shape, fake_batch.shape))

D.trainable = True
d_loss = D.train_on_batch(combined_X_batch, combined_y_batch)
print('--------------------Discriminator trained!--------------------')
print(d_loss)

D.trainable = False
g_loss = GAN.train_on_batch(Z2_batch, np.ones((batch_size, 1)))
print('--------------------GAN trained!--------------------')
print(g_loss)

score 1 · Accepted Answer

首先，我建议您切换到功能 API 模型。功能模型更容易处理这些类型的混合模型。

我不知道为什么您的解决方案不能诚实地工作，似乎当您将 D 模型链接到新输入时，它会有点“损坏”并链接到它。我发现解决这个问题的方法是定义层并将它们用于鉴别器和 GAN 模型。这是代码：

import numpy as np
from keras.layers import *
import keras.models as km
import keras.optimizers as ko
from keras.datasets import mnist

batch_size = 16
lr = 0.0001

def noise_gen(batch_size, z_dim):
    noise = np.zeros((batch_size, z_dim), dtype=np.float32)
    for i in range(batch_size):
        noise[i, :] = np.random.uniform(-1, 1, z_dim)
    return noise

# Changes the traiable argument for all the layers of model
# to the boolean argument "trainable"
def make_trainable(model, trainable):
    model.trainable = trainable
    for l in model.layers:
        l.trainable = trainable

# --------------------Generator Model--------------------

g_input = Input(shape=(100,))

g_hidden = Dense(1024, activation='relu')(g_input)
g_hidden = Dense(7*7*128, activation='relu')(g_hidden)
g_hidden = BatchNormalization()(g_hidden)
g_hidden = Reshape((7,7,128))(g_hidden)

g_hidden = Deconvolution2D(64,5,5, (None, 14, 14, 64), subsample=(2,2),
        border_mode='same', activation='relu')(g_hidden)
g_hidden = BatchNormalization()(g_hidden)
g_output = Deconvolution2D(1,5,5, (None, 28, 28, 1), subsample=(2,2),
        border_mode='same')(g_hidden)

G = km.Model(input=g_input,output=g_output)
G.compile(loss='binary_crossentropy', optimizer=ko.SGD(lr=lr, momentum=0.9, nesterov=True))
G.summary()

# --------------------Discriminator Model--------------------

d_input = Input(shape=(28,28,1))

d_l1 = Convolution2D(64,5,5, subsample=(2,2))
d_hidden_1 = d_l1(d_input)
d_l2 = LeakyReLU(alpha=0.2)
d_hidden_2 = d_l2(d_hidden_1)

d_l3 = Convolution2D(128,5,5, subsample=(2,2))
d_hidden_3 = d_l3(d_hidden_2)
d_l4 = BatchNormalization()
d_hidden_4 = d_l4(d_hidden_3)
d_l5 = LeakyReLU(alpha=0.2)
d_hidden_5 = d_l5(d_hidden_4)

d_l6 = Flatten()
d_hidden_6 = d_l6(d_hidden_5)
d_l7 = Dense(1, activation='sigmoid')
d_output = d_l7(d_hidden_6)

D = km.Model(input=d_input,output=d_output)
D.compile(loss='binary_crossentropy',optimizer=ko.SGD(lr=lr,momentum=0.9, nesterov=True))
D.summary()

# --------------------GAN Model--------------------
make_trainable(D,False)

gan_input = Input(shape=(100,))
gan_hidden = G(gan_input)
gan_hidden = d_l1(gan_hidden)
gan_hidden = d_l2(gan_hidden)
gan_hidden = d_l3(gan_hidden)
gan_hidden = d_l4(gan_hidden)
gan_hidden = d_l5(gan_hidden)
gan_hidden = d_l6(gan_hidden)
gan_output = d_l7(gan_hidden)

GAN = km.Model(input=gan_input,output=gan_output)
GAN.compile(loss='binary_crossentropy',optimizer=ko.SGD(lr=lr, momentum=0.9, nesterov=True))
GAN.summary()

# --------------------Main Code--------------------
(X, _), _ = mnist.load_data()
X = X / 255.
X = X[:, :, :, np.newaxis]

X_batch = X[0:batch_size, :]
Z1_batch = noise_gen(batch_size, 100)
Z2_batch = noise_gen(batch_size, 100)

print(type(X_batch),X_batch.shape)
print(type(Z1_batch),Z1_batch.shape)

fake_batch = G.predict(Z1_batch)
real_batch = X_batch
print('--------------------Fake Image Generated!--------------------')

combined_X_batch = np.concatenate((real_batch, fake_batch))
combined_y_batch = np.concatenate((np.ones((batch_size, 1)), np.zeros((batch_size, 1))))
print('real_batch={}, fake_batch={}'.format(real_batch.shape, fake_batch.shape))
print(type(combined_X_batch),combined_X_batch.dtype,combined_X_batch.shape)
print(type(combined_y_batch),combined_y_batch.dtype,combined_y_batch.shape)
make_trainable(D,True)
d_loss = D.train_on_batch(combined_X_batch, combined_y_batch)
print('--------------------Discriminator trained!--------------------')
print(d_loss)

make_trainable(D,False)
g_loss = GAN.train_on_batch(Z2_batch, np.ones((batch_size, 1)))
print('--------------------GAN trained!--------------------')
print(g_loss)

这有帮助吗？

tensorflow - Keras训练部分模型问题（关于GAN模型）

2 回答 2

Related

Reference