1

我正在使用 tf 和 Keras 按照此处此处使用的方法创建一个循环GAN

我对生成器 A->B 和 B->A 生成的图像有一些奇怪的行为


下图从左到右显示

  • real_A(原始图像)
  • generated_B ( generator_AtoB 应用于 real_A)
  • generated_A (generator_BtoA 应用于前一张图像)

及其对应物

  • real_B(原始图像)
  • generated_A ( generator_BtoA 应用于 real_B)
  • generated_B (generator_AtoB 应用于前一张图片)

GAN 示例

图像 2 和 5 是生成器对原始图像的应用,它们具有非常强烈的方格伪影(我猜是由于反卷积),并且没有显示从马“变形”到斑马的迹象,反之亦然。

我不明白的是,图像 3 和 6 是完全相同的生成器,应用于“伪影”图像,但它们没有显示任何伪影迹象。

  • 我在训练发电机时做错了什么吗?

即使在 10k epochs 之后,也没有视觉改进的迹象: - 为什么图像 2 和 5 没有显示风格转移的迹象?- 为什么图像 2 和 5 显示非常强的伪影,而不是 3 和 6?


完整代码:

#!/usr/bin/env python
# -*- coding: UTF-8 -*-

# https://hardikbansal.github.io/CycleGANBlog/
import sys
import time
import pickle
import tensorflow as tf
import numpy as np
import keras
from keras.models import Sequential, Model
from keras.layers import Dense, Flatten, Input, Dropout
from keras.layers import multiply, add as kadd
from keras.layers import Conv2D, BatchNormalization, Conv2DTranspose
from keras.layers import LeakyReLU, ReLU
from keras.layers import Activation

from keras.preprocessing.image import ImageDataGenerator

from PIL import Image


from custom_layers import ReflectionPadding2D


# NET PARAMETERS
ngf = 32 # Number of filters in first layer of generator
ndf = 64 # Number of filters in first layer of discriminator
BATCH_SIZE = 1 # batch_size
pool_size = 50 # pool_size
IMG_WIDTH = 256 # Imput image will of width 256
IMG_HEIGHT = 256 # Input image will be of height 256
IMG_DEPTH = 3 # RGB format
INPUT_SHAPE = (IMG_WIDTH, IMG_HEIGHT, IMG_DEPTH)

USE_IDENTITY_LOSS = False


# TRAINING PARAMETERS
ITERATIONS = 1000000
DISCRIMINATOR_ITERATIONS = 1
SAVE_IMAGES_INTERVAL = 100

SAVE_MODEL_INTERVAL = 1000

FAKE_POOL_SIZE=50

# DATASET="vangogh2photo"
DATASET="horse2zebra"



def resnet_block(num_features):

    block = Sequential()
    block.add(Conv2D(num_features, kernel_size=3, strides=1, padding="SAME"))
    block.add(BatchNormalization())
    block.add(ReLU())
    block.add(Conv2D(num_features, kernel_size=3, strides=1, padding="SAME"))
    block.add(BatchNormalization())
    block.add(ReLU())


    # resblock_input = Input(shape=(64, 64, 256))
    resblock_input = Input(shape=(128, 128, 256))
    conv_model = block(resblock_input)

    _sum = kadd([resblock_input, conv_model])

    composed =  Model(inputs=[resblock_input], outputs=_sum)
    return composed


def discriminator( f=4, name=None):



    d = Sequential()
    d.add(Conv2D(ndf, kernel_size=f, strides=2, padding="SAME", name="discr_"+name+"_conv2d_1"))
    d.add(BatchNormalization())
    d.add(LeakyReLU(0.2))
    d.add(Dropout(0.1))
    d.add(Conv2D(ndf * 2, kernel_size=f, strides=2, padding="SAME", name="discr_"+name+"_conv2d_2"))
    d.add(BatchNormalization())
    d.add(LeakyReLU(0.2))
    d.add(Dropout(0.1))
    d.add(Conv2D(ndf * 4, kernel_size=f, strides=2, padding="SAME", name="discr_"+name+"_conv2d_3"))
    d.add(BatchNormalization())
    d.add(LeakyReLU(0.2))
    d.add(Dropout(0.1))
    d.add(Conv2D(ndf * 8, kernel_size=f, strides=2, padding="SAME", name="discr_"+name+"_conv2d_4"))
    d.add(BatchNormalization())
    d.add(LeakyReLU(0.2))
    d.add(Dropout(0.1))
    d.add(Conv2D(1, kernel_size=f, strides=1, padding="SAME", name="discr_"+name+"_conv2d_out"))

    # d.add(Activation("sigmoid"))


    model_input = Input(shape=INPUT_SHAPE)

    decision  = d(model_input)

    composed = Model(model_input, decision)
    # print(d.output_shape)
    # d.summary()

    return composed

def generator(name=None):

    g = Sequential()
    # ENCODER

    g.add(Conv2D(ngf, kernel_size=7,
            strides=1,
            # activation='relu',
            padding='SAME',
            kernel_initializer='random_normal',
            bias_initializer='zeros',
            input_shape=INPUT_SHAPE,
            name="encoder_"+name+"_0" ))
    # g.add(BatchNormalization())
    # g.add(ReLU())

    # g.add(ReflectionPadding2D())

    g.add(Conv2D(64*2, kernel_size=3,
            strides=2,
            padding='SAME',
            kernel_initializer='random_normal',
            bias_initializer='zeros',
            name="encoder_"+name+"_1" ))
    # g.add(BatchNormalization())
    # g.add(ReLU())
    # output shape = (128, 128, 128)

    # g.add(ReflectionPadding2D())

    g.add(Conv2D(64*4, kernel_size=3,
            strides=2,
            padding="SAME",
            kernel_initializer='random_normal',
            bias_initializer='zeros',
            name="encoder_"+name+"_2",
            ))
    # # g.add(BatchNormalization())
    # # g.add(ReLU())
    # # output shape = (64, 64, 256)

    # # END ENCODER



    # # TRANSFORM

    g.add(resnet_block(64*4))
    g.add(resnet_block(64*4))
    g.add(resnet_block(64*4))
    g.add(resnet_block(64*4))
    g.add(resnet_block(64*4))
    g.add(resnet_block(64*4))



    # # END TRANSFORM
    # # generator.shape = (64, 64, 256)

    # # DECODER

    g.add(Conv2DTranspose(ngf*2,kernel_size=3, strides=2, padding="SAME"))
    # g.add(BatchNormalization())
    # g.add(ReLU())

    g.add(Conv2DTranspose(ngf*2,kernel_size=3, strides=2, padding="SAME"))
    # # g.add(BatchNormalization())
    # # g.add(ReLU())

    g.add(Conv2D(3,kernel_size=7, strides=1, padding="SAME", name="generator_out_layer"))
    g.add(ReLU())
    g.summary()
    # exit()


    # END DECODER

    model_input = Input(shape=INPUT_SHAPE)
    generated_image = g(model_input)

    composed = Model(model_input, generated_image, name=name)
    return composed


def fromMinusOneToOne(x):
    return x/127.5 -1

def toRGB(x):
    return (1+x) * 127.5


def createImageGenerator( subset="train", data_type="A", batch_size=1, pp=None):

    # we create two instances with the same arguments
    data_gen_args = dict(
                         # rescale = 1./127.5,
                         # rotation_range=5.,
                         preprocessing_function= pp,
                         # width_shift_range=0.1,
                         # height_shift_range=0.1,
                         # zoom_range=0.1
                         )

    image_datagen = ImageDataGenerator(**data_gen_args)

    # Provide the same seed and keyword arguments to the fit and flow methods
    seed = 1

    image_directory=subset+data_type
    print('data/'+DATASET+'/'+image_directory)
    image_generator = image_datagen.flow_from_directory(
        'data/'+DATASET+'/'+image_directory,
        class_mode=None,
        batch_size=batch_size,
        seed=seed)

    return image_generator

def fit(
    generator_trainer,
    disc_trainer,
    generator_AtoB,
    generator_BtoA
    ):

    fake_A_pool = []
    fake_B_pool = []


    ones = np.ones((BATCH_SIZE,)+ generator_trainer.output_shape[0][1:])
    zeros = np.zeros((BATCH_SIZE,)+ generator_trainer.output_shape[0][1:])

    zeros = np.sum([zeros, 0.07])

    train_A_image_generator = createImageGenerator("train", "A")
    # print(train_A_image_generator.next())
    # for c in train_A_image_generator:
    #     print(c)
    #     exit()
    # exit()

    train_B_image_generator = createImageGenerator("train", "B")
    # test_A_image_generator = createImageGenerator("test", "A")
    # test_B_image_generator = createImageGenerator("test", "B")

    now = time.strftime("%Y-%m-%d_%H.%M.%S")
    it = 1
    while it  <= ITERATIONS:
        fw = tf.summary.FileWriter(logdir="./tensorboard/"+now)
        start = time.time()
        print("\nIteration %d " % it)
        sys.stdout.flush()

        # THIS ONLY WORKS IF BATCH SIZE == 1
        real_A = train_A_image_generator.next()
        real_B = train_B_image_generator.next()

        fake_A_pool.extend(generator_BtoA.predict(real_B))
        fake_B_pool.extend(generator_AtoB.predict(real_A))

        #resize pool
        fake_A_pool = fake_A_pool[-FAKE_POOL_SIZE:]
        fake_B_pool = fake_B_pool[-FAKE_POOL_SIZE:]

        fake_A = [ fake_A_pool[ind] for ind in np.random.choice(len(fake_A_pool), size=(BATCH_SIZE,), replace=False) ]
        fake_B = [ fake_B_pool[ind] for ind in np.random.choice(len(fake_B_pool), size=(BATCH_SIZE,), replace=False) ]

        fake_A = np.array(fake_A)
        fake_B = np.array(fake_B)





        for x in range(0, DISCRIMINATOR_ITERATIONS):
            _, D_loss_real_A, D_loss_fake_A, D_loss_real_B, D_loss_fake_B = \
            disc_trainer.train_on_batch(
                [real_A, fake_A, real_B, fake_B],
                [zeros, ones * 0.9, zeros, ones * 0.9] )
                # [zeros, ones, zeros, ones] )


        print("=====")
        print("Discriminator loss:")
        print("Real A: %s, Fake A: %s || Real B: %s, Fake B: %s " % ( D_loss_real_A, D_loss_fake_A, D_loss_real_B, D_loss_fake_B))

        if USE_IDENTITY_LOSS:
            _, G_loss_fake_B, G_loss_fake_A, G_loss_rec_A, G_loss_rec_B, G_loss_id_A, G_loss_id_B = \
                generator_trainer.train_on_batch(
                    [real_A, real_B],
                    [zeros, zeros, real_A, real_B, real_A, real_B])
        else:
            _, G_loss_fake_B, G_loss_fake_A, G_loss_rec_A, G_loss_rec_B = \
                generator_trainer.train_on_batch(
                    [real_A, real_B],
                    [zeros, zeros, real_A, real_B])

                # generator_trainer outputs:
                # [discriminator_generated_B,   discriminator_generated_A,cyc_A,      cyc_B,]




        print("=====")
        print("Generator loss:")

        if USE_IDENTITY_LOSS:
            print("Fake B: %s, Cyclic A: %s || Fake A: %s, Cyclic B: %s || ID A: %s, ID B: %s" % (G_loss_fake_B, G_loss_rec_A, G_loss_fake_A, G_loss_rec_B, G_loss_id_A, G_loss_id_B))
        else:
            print("Fake B: %s, Cyclic A: %s || Fake A: %s, Cyclic B: %s " % (G_loss_fake_B, G_loss_rec_A, G_loss_fake_A, G_loss_rec_B))

        end = time.time()
        print("Iteration time: %s s" % (end-start))
        sys.stdout.flush()

        summary = tf.Summary(value=[
            tf.Summary.Value(tag="disc_A_loss_on_real", simple_value = D_loss_real_A),
            tf.Summary.Value(tag="disc_A_loss_on_generated", simple_value = D_loss_fake_A),
            tf.Summary.Value(tag="disc_B_loss_on_real", simple_value = D_loss_real_B),
            tf.Summary.Value(tag="disc_B_loss_on_generated", simple_value = D_loss_fake_B),

            tf.Summary.Value(tag="gen_generated_A", simple_value = G_loss_fake_A),
            tf.Summary.Value(tag="gen_generated_B", simple_value = G_loss_fake_B),
            tf.Summary.Value(tag="gen_cyc_A", simple_value = G_loss_rec_A),
            tf.Summary.Value(tag="gen_cyc_B", simple_value = G_loss_rec_B),
        ])

        fw.add_summary(summary, global_step=it)
        fw.flush()
        fw.close()

        if not (it % SAVE_IMAGES_INTERVAL ):
            imgA = real_A
            # print(imgA.shape)
            imga2b = generator_AtoB.predict(imgA)
            # print(imga2b.shape)
            imga2b2a = generator_BtoA.predict(imga2b)
            # print(imga2b2a.shape)
            imgB = real_B
            imgb2a = generator_BtoA.predict(imgB)
            imgb2a2b = generator_AtoB.predict(imgb2a)

            c = np.concatenate([imgA, imga2b, imga2b2a, imgB, imgb2a, imgb2a2b], axis=2).astype(np.uint8)
            # print(c.shape)
            x = Image.fromarray(c[0])
            x.save("data/generated/iteration_%s.jpg" % str(it).zfill(4))



        # with open("models/generator_AtoB.pickle", "wb") as saveFile:
        #     pickle.dump(generator_AtoB, saveFile)

        # with open("models/generator_BtoA.pickle", "wb") as saveFile:
        #     pickle.dump(generator_BtoA, saveFile)

        if not (it % SAVE_MODEL_INTERVAL):
            generator_AtoB.save("models/generator_AtoB_id.h5")
            generator_BtoA.save("models/generator_BtoA_id.h5")

        it+=1



    generator_AtoB.save("models/generator_AtoB_id.h5")
    generator_BtoA.save("models/generator_BtoA_id.h5")


    return

if __name__ == '__main__':

    generator_AtoB = generator(name="gen_A")
    generator_BtoA = generator(name="gen_B")

    discriminator_A = discriminator(name="disc_A")
    discriminator_B = discriminator(name="disc_B")





    ### GENERATOR TRAINING
    optim = keras.optimizers.Adam(lr=0.0002, beta_1=0.5, beta_2=0.999, epsilon=1e-08)

    input_A = Input(batch_shape=(None, IMG_WIDTH, IMG_HEIGHT, IMG_DEPTH), name="input_A")
    generated_B = generator_AtoB(input_A)
    discriminator_generated_B = discriminator_B(generated_B)
    cyc_A = generator_BtoA(generated_B)


    input_B = Input(batch_shape=(None, IMG_WIDTH, IMG_HEIGHT, IMG_DEPTH), name="input_B")
    generated_A = generator_BtoA(input_B)
    discriminator_generated_A = discriminator_A(generated_A )
    cyc_B = generator_AtoB(generated_A)


    # cyclic error is increased, because it's more important
    cyclic_weight_multipier = 10

    if USE_IDENTITY_LOSS:
        generator_trainer =  Model([input_A, input_B],
                         [discriminator_generated_B,   discriminator_generated_A,
                         cyc_A,      cyc_B,
                         generated_B,     generated_A ]
                         )
        losses =         [ "MSE", "MSE", "MAE",                   "MAE",                    "MAE", "MAE"]
        losses_weights = [ 1,     1,     cyclic_weight_multipier, cyclic_weight_multipier,  1,     1    ]
    else:
        generator_trainer =  Model([input_A, input_B],
                     [discriminator_generated_B,   discriminator_generated_A,
                     cyc_A,      cyc_B,])

        losses =         [ "MSE", "MSE", "MAE",                   "MAE"]
        losses_weights = [ 1,     1,     cyclic_weight_multipier, cyclic_weight_multipier]

    generator_trainer.compile(optimizer=optim, loss = losses, loss_weights=losses_weights)



    ### DISCRIMINATOR TRAINING

    disc_optim = keras.optimizers.Adam(lr=0.0002, beta_1=0.5, beta_2=0.999, epsilon=1e-08)

    real_A = Input(batch_shape=(None, IMG_WIDTH, IMG_HEIGHT, IMG_DEPTH), name="in_real_A")
    real_B = Input(batch_shape=(None, IMG_WIDTH, IMG_HEIGHT, IMG_DEPTH), name="in_real_B")

    generated_A = Input(batch_shape=(None, IMG_WIDTH, IMG_HEIGHT, IMG_DEPTH), name="in_gen_A")
    generated_B = Input(batch_shape=(None, IMG_WIDTH, IMG_HEIGHT, IMG_DEPTH), name="in_gen_B")

    discriminator_real_A = discriminator_A(real_A)
    discriminator_generated_A = discriminator_A(generated_A)
    discriminator_real_B =  discriminator_B(real_B)
    discriminator_generated_B = discriminator_B(generated_B)

    disc_trainer = Model([real_A, generated_A, real_B, generated_B],
                         [  discriminator_real_A,
                            discriminator_generated_A,
                            discriminator_real_B,
                            discriminator_generated_B] )


    disc_trainer.compile(optimizer=disc_optim, loss = 'MSE')


    #########
    ##
    ## TRAINING
    ##
    #########




    fit(generator_trainer,
        disc_trainer,
        generator_AtoB,
        generator_BtoA)
4

0 回答 0