我有一大组未标记的数据和一组较小的标记数据。因此,我想首先在未标记的数据上训练一个变分自动编码器,然后使用编码器对标记的数据进行三类(附加全连接层)的分类。为了优化超参数,我想使用 Optuna。
一种可能性是首先优化自动编码器,然后优化全连接网络(分类),但随后自动编码器可能会学习到对分类没有意义的编码。
是否有可能联合优化自编码器和全连接网络?
我的自动编码器如下所示(params 只是一个保存参数的字典):
inputs = Input(shape=image_size, name='encoder_input')
x = inputs
for i in range(len(params["conv_filter_encoder"])):
x, _ = convolutional_unit(x, params["conv_filter_encoder"][i], params["conv_kernel_size_encoder"][i], params["strides_encoder"][i],
batchnorm=params["batchnorm"][i], dropout=params["dropout"][i], maxpool=params["maxpool"][i], deconv=False)
shape = K.int_shape(x)
x = Flatten()(x)
x = Dense(params["inner_dim"], activation='relu')(x)
z_mean = Dense(params["latent_dim"], name='z_mean')(x)
z_log_var = Dense(params["latent_dim"], name='z_log_var')(x)
# use reparameterization trick to push the sampling out as input
# note that "output_shape" isn't necessary with the TensorFlow backend
z = Lambda(sampling, output_shape=(params["latent_dim"],), name='z')([z_mean, z_log_var])
# instantiate encoder model
encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')
# build decoder model
latent_inputs = Input(shape=(params["latent_dim"],), name='z_sampling')
x = Dense(params["inner_dim"], activation='relu')(latent_inputs)
x = Dense(shape[1] * shape[2] * shape[3], activation='relu')(x)
x = Reshape((shape[1], shape[2], shape[3]))(x)
len_batchnorm = len(params["batchnorm"])
len_dropout = len(params["dropout"])
for i in range(len(params["conv_filter_decoder"])):
x, _ = convolutional_unit(x, params["conv_filter_decoder"][i], params["conv_kernel_size_decoder"][i], params["strides_decoder"][i],
batchnorm=params["batchnorm"][len_batchnorm-i-1], dropout=params["dropout"][len_dropout-i-1], maxpool=None, deconv=True, activity_regularizer=params["activity_regularizer"])
outputs = Conv2DTranspose(filters=1,
kernel_size=params["conv_kernel_size_decoder"][len(params["conv_kernel_size_decoder"])-1],
activation='sigmoid',
padding='same')(x)
# instantiate decoder model
decoder = Model(latent_inputs, outputs, name='decoder')
# instantiate VAE model
outputs = decoder(encoder(inputs)[2])
vae = Model(inputs, outputs, name='vae')
vae.higgins_beta = K.variable(value=params["beta"])
loss = config["loss"].value
def vae_loss(x, x_decoded_mean):
"""VAE loss function"""
# VAE loss = mse_loss or xent_loss + kl_loss
if loss == Loss.mse.value:
reconstruction_loss = mse(K.flatten(x), K.flatten(x_decoded_mean))
elif loss == Loss.bce.value:
reconstruction_loss = binary_crossentropy(K.flatten(x),
K.flatten(x_decoded_mean))
else:
raise ValueError("Loss unknown")
reconstruction_loss *= image_size[0] * image_size[1]
kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
kl_loss = K.sum(kl_loss, axis=-1)
# kl_loss *= -0.5
kl_loss *= -vae.higgins_beta
vae_loss = K.mean(reconstruction_loss + kl_loss)
return vae_loss
batch_size = params["batch_size"]
optimizer = keras.optimizers.Adam(lr=params["learning_rate"], beta_1=0.9, beta_2=0.999,
epsilon=1e-08, decay=params["learning_rate_decay"])
vae.compile(loss=vae_loss, optimizer=optimizer)
vae.fit(train_X, train_X,
epochs=config.CONFIG["n_epochs"],
batch_size=batch_size,
verbose=0,
callbacks=get_callbacks(config.CONFIG, autoencoder_path, encoder, decoder, vae),
shuffle=shuffle,
validation_data=(valid_X, valid_X))
我连接到编码器的全连接网络如下所示:
latent = vae.predict(images)[0]
inputs = Input(shape=(input_shape,), name='fc_input')
den = inputs
for i in range(len(self.params["units"])):
den = Dense(self.params["units"][i])(den)
den = Activation('relu')(den)
out = Dense(self.num_classes, activation='softmax')(den)
model = Model(inputs, out, name='fcnn')
optimizer = keras.optimizers.Adam(lr=self.mc.CONFIG["fcnn"]["learning_rate"], beta_1=0.9, beta_2=0.999,
epsilon=1e-08, decay=self.mc.CONFIG["fcnn"]["learning_rate_decay"])
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
model.fit(latent, y,
epochs=self.params["n_epochs"],
batch_size=self.params["batch_size"],
verbose=0,
shuffle=True)
y_prob = model.predict(latent)