Source code for cadl.gan

"""Generative Adversarial Network.
"""
"""
Copyright 2017 Parag K. Mital.  See also NOTICE.md.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import os
from cadl import utils
from cadl.dataset_utils import create_input_pipeline
from cadl.datasets import CELEB


[docs]def encoder(x, dimensions=[], filter_sizes=[], convolutional=False, activation=tf.nn.relu, output_activation=tf.nn.sigmoid, reuse=False): """Encoder network codes input `x` to layers defined by dimensions. Parameters ---------- x : tf.Tensor Input to the encoder network, e.g. tf.Placeholder or tf.Variable dimensions : list, optional List of the number of neurons in each layer (convolutional=False) -or- List of the number of filters in each layer (convolutional=True), e.g. [100, 100, 100, 100] for a 4-layer deep network with 100 in each layer. filter_sizes : list, optional List of the size of the kernel in each layer, e.g.: [3, 3, 3, 3] is a 4-layer deep network w/ 3 x 3 kernels in every layer. convolutional : bool, optional Whether or not to use convolutional layers. activation : fn, optional Function for applying an activation, e.g. tf.nn.relu output_activation : fn, optional Function for applying an activation on the last layer, e.g. tf.nn.relu reuse : bool, optional For each layer's variable scope, whether to reuse existing variables. Returns ------- h : tf.Tensor Output tensor of the encoder """ # %% # ensure 2-d is converted to square tensor. if convolutional: x_tensor = utils.to_tensor(x) else: x_tensor = tf.reshape(tensor=x, shape=[-1, dimensions[0]]) dimensions = dimensions[1:] current_input = x_tensor for layer_i, n_output in enumerate(dimensions): with tf.variable_scope(str(layer_i), reuse=reuse): if convolutional: h, W = utils.conv2d( x=current_input, n_output=n_output, k_h=filter_sizes[layer_i], k_w=filter_sizes[layer_i], padding='SAME', reuse=reuse) else: h, W = utils.linear( x=current_input, n_output=n_output, reuse=reuse) output = activation(h) current_input = output flattened = utils.flatten(current_input, name='flatten', reuse=reuse) if output_activation is None: return flattened else: return output_activation(flattened)
[docs]def decoder(z, dimensions=[], channels=[], filter_sizes=[], convolutional=False, activation=tf.nn.relu, output_activation=tf.nn.tanh, reuse=None): """Decoder network codes input `x` to layers defined by dimensions. In contrast with `encoder`, this requires information on the number of output channels in each layer for convolution. Otherwise, it is mostly the same. Parameters ---------- z : tf.Tensor Input to the decoder network, e.g. tf.Placeholder or tf.Variable dimensions : list, optional List of the number of neurons in each layer (convolutional=False) -or- List of the number of filters in each layer (convolutional=True), e.g. [100, 100, 100, 100] for a 4-layer deep network with 100 in each layer. channels : list, optional For decoding when convolutional=True, require the number of output channels in each layer. filter_sizes : list, optional List of the size of the kernel in each layer, e.g.: [3, 3, 3, 3] is a 4-layer deep network w/ 3 x 3 kernels in every layer. convolutional : bool, optional Whether or not to use convolutional layers. activation : fn, optional Function for applying an activation, e.g. tf.nn.relu output_activation : fn, optional Function for applying an activation on the last layer, e.g. tf.nn.relu reuse : bool, optional For each layer's variable scope, whether to reuse existing variables. Returns ------- h : tf.Tensor Output tensor of the decoder """ if convolutional: with tf.variable_scope('fc', reuse=reuse): z1, W = utils.linear( x=z, n_output=channels[0] * dimensions[0][0] * dimensions[0][1], reuse=reuse) rsz = tf.reshape( z1, [-1, dimensions[0][0], dimensions[0][1], channels[0]]) current_input = activation(rsz) dimensions = dimensions[1:] channels = channels[1:] filter_sizes = filter_sizes[1:] else: current_input = z for layer_i, n_output in enumerate(dimensions): with tf.variable_scope(str(layer_i), reuse=reuse): if convolutional: h, W = utils.deconv2d( x=current_input, n_output_h=n_output[0], n_output_w=n_output[1], n_output_ch=channels[layer_i], k_h=filter_sizes[layer_i], k_w=filter_sizes[layer_i], padding='SAME', reuse=reuse) else: h, W = utils.linear( x=current_input, n_output=n_output, reuse=reuse) if layer_i < len(dimensions) - 1: output = activation(h) else: output = h current_input = output if output_activation is None: return current_input else: return output_activation(current_input)
[docs]def generator(z, output_h, output_w, convolutional=True, n_features=32, rgb=False, reuse=None): """Simple interface to build a decoder network given the input parameters. Parameters ---------- z : tf.Tensor Input to the generator, i.e. tf.Placeholder of tf.Variable output_h : int Final generated height output_w : int Final generated width convolutional : bool, optional Whether or not to build a convolutional generative network. n_features : int, optional Number of channels to use in the last hidden layer. rgb : bool, optional Whether or not the final generated image is RGB or not. reuse : None, optional Whether or not to reuse the variables if they are already created. Returns ------- x_tilde : tf.Tensor Output of the generator network. """ n_channels = 3 if rgb else 1 with tf.variable_scope('generator', reuse=reuse): return decoder( z=z, convolutional=convolutional, filter_sizes=[5, 5, 5, 5, 5], channels=[ n_features * 8, n_features * 4, n_features * 2, n_features, n_channels ], dimensions=[[output_h // 16, output_w // 16], [output_h // 8, output_w // 8], [output_h // 4, output_w // 4], [output_h // 2, output_w // 2], [output_h, output_w]] if convolutional else [384, 512, n_features], activation=tf.nn.relu6, output_activation=tf.nn.tanh, reuse=reuse)
[docs]def discriminator(x, convolutional=True, n_features=32, rgb=False, reuse=False): """Summary Parameters ---------- x : TYPE Description convolutional : bool, optional Description n_features : int, optional Description rgb : bool, optional Description reuse : bool, optional Description Returns ------- name : TYPE Description """ with tf.variable_scope('discriminator', reuse=reuse): return encoder( x=x, convolutional=convolutional, filter_sizes=[5, 5, 5, 5], dimensions=[ n_features, n_features * 2, n_features * 4, n_features * 8 ] if convolutional else [n_features, 128, 256], activation=tf.nn.relu6, output_activation=None, reuse=reuse)
[docs]def GAN(input_shape, n_latent, n_features, rgb, debug=True): """Summary Parameters ---------- input_shape : TYPE Description n_latent : TYPE Description n_features : TYPE Description rgb : TYPE Description debug : bool, optional Description Returns ------- name : TYPE Description """ # Real input samples # n_features is either the image dimension or flattened number of features x = tf.placeholder(tf.float32, input_shape, 'x') x = (x / 127.5) - 1.0 sum_x = tf.summary.image("x", x) # Discriminator for real input samples D_real_logits = discriminator(x, n_features=n_features, rgb=rgb) D_real = tf.nn.sigmoid(D_real_logits) sum_D_real = tf.summary.histogram("D_real", D_real) # Generator tries to recreate input samples using latent feature vector z = tf.placeholder(tf.float32, [None, n_latent], 'z') sum_z = tf.summary.histogram("z", z) G = generator( z, output_h=input_shape[1], output_w=input_shape[2], n_features=n_features, rgb=rgb) sum_G = tf.summary.image("G", G) # Discriminator for generated samples D_fake_logits = discriminator(G, n_features=n_features, rgb=rgb, reuse=True) D_fake = tf.nn.sigmoid(D_fake_logits) sum_D_fake = tf.summary.histogram("D_fake", D_fake) with tf.variable_scope('loss'): # Loss functions loss_D_real = utils.binary_cross_entropy( D_real, tf.ones_like(D_real), name='loss_D_real') loss_D_fake = utils.binary_cross_entropy( D_fake, tf.zeros_like(D_fake), name='loss_D_fake') loss_D = tf.reduce_mean((loss_D_real + loss_D_fake) / 2) loss_G = tf.reduce_mean( utils.binary_cross_entropy( D_fake, tf.ones_like(D_fake), name='loss_G')) # Summaries sum_loss_D_real = tf.summary.histogram("loss_D_real", loss_D_real) sum_loss_D_fake = tf.summary.histogram("loss_D_fake", loss_D_fake) sum_loss_D = tf.summary.scalar("loss_D", loss_D) sum_loss_G = tf.summary.scalar("loss_G", loss_G) sum_D_real = tf.summary.histogram("D_real", D_real) sum_D_fake = tf.summary.histogram("D_fake", D_fake) return { 'loss_D': loss_D, 'loss_G': loss_G, 'x': x, 'G': G, 'z': z, 'sums': { 'G': sum_G, 'D_real': sum_D_real, 'D_fake': sum_D_fake, 'loss_G': sum_loss_G, 'loss_D': sum_loss_D, 'loss_D_real': sum_loss_D_real, 'loss_D_fake': sum_loss_D_fake, 'z': sum_z, 'x': sum_x } }
[docs]def train_input_pipeline(files, init_lr_g=1e-4, init_lr_d=1e-4, n_features=10, n_latent=100, n_epochs=1000000, batch_size=200, n_samples=15, input_shape=[218, 178, 3], crop_shape=[64, 64, 3], crop_factor=0.8): """Summary Parameters ---------- files : TYPE Description init_lr_g : float, optional Description init_lr_d : float, optional Description n_features : int, optional Description n_latent : int, optional Description n_epochs : int, optional Description batch_size : int, optional Description n_samples : int, optional Description input_shape : list, optional Description crop_shape : list, optional Description crop_factor : float, optional Description No Longer Returned ------------------ name : TYPE Description """ with tf.Graph().as_default(), tf.Session() as sess: batch = create_input_pipeline( files=files, batch_size=batch_size, n_epochs=n_epochs, crop_shape=crop_shape, crop_factor=crop_factor, shape=input_shape) gan = GAN( input_shape=[None] + crop_shape, n_features=n_features, n_latent=n_latent, rgb=True, debug=False) vars_d = [ v for v in tf.trainable_variables() if v.name.startswith('discriminator') ] print('Training discriminator variables:') [ print(v.name) for v in tf.trainable_variables() if v.name.startswith('discriminator') ] vars_g = [ v for v in tf.trainable_variables() if v.name.startswith('generator') ] print('Training generator variables:') [ print(v.name) for v in tf.trainable_variables() if v.name.startswith('generator') ] zs = np.random.uniform(-1.0, 1.0, [4, n_latent]).astype(np.float32) zs = utils.make_latent_manifold(zs, n_samples) lr_g = tf.placeholder(tf.float32, shape=[], name='learning_rate_g') lr_d = tf.placeholder(tf.float32, shape=[], name='learning_rate_d') try: from tf.contrib.layers import apply_regularization d_reg = apply_regularization( tf.contrib.layers.l2_regularizer(1e-6), vars_d) g_reg = apply_regularization( tf.contrib.layers.l2_regularizer(1e-6), vars_g) except: d_reg, g_reg = 0, 0 opt_g = tf.train.AdamOptimizer( lr_g, name='Adam_g').minimize( gan['loss_G'] + g_reg, var_list=vars_g) opt_d = tf.train.AdamOptimizer( lr_d, name='Adam_d').minimize( gan['loss_D'] + d_reg, var_list=vars_d) # %% # We create a session to use the graph saver = tf.train.Saver() sums = gan['sums'] G_sum_op = tf.summary.merge([ sums['G'], sums['loss_G'], sums['z'], sums['loss_D_fake'], sums['D_fake'] ]) D_sum_op = tf.summary.merge([ sums['loss_D'], sums['loss_D_real'], sums['loss_D_fake'], sums['z'], sums['x'], sums['D_real'], sums['D_fake'] ]) writer = tf.summary.FileWriter("./logs", sess.graph_def) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess.run(init_op) coord = tf.train.Coordinator() tf.get_default_graph().finalize() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # g = tf.get_default_graph() # [print(op.name) for op in g.get_operations()] if os.path.exists("gan.ckpt"): saver.restore(sess, "gan.ckpt") print("GAN model restored.") fig, ax = plt.subplots(1, 1, figsize=(10, 10)) step_i, t_i = 0, 0 loss_d = 1 loss_g = 1 n_loss_d, total_loss_d = 1, 1 n_loss_g, total_loss_g = 1, 1 try: while not coord.should_stop(): batch_xs = sess.run(batch) step_i += 1 batch_zs = np.random.uniform( -1.0, 1.0, [batch_size, n_latent]).astype(np.float32) this_lr_g = min(1e-2, max(1e-6, init_lr_g * (loss_g / loss_d)**2)) this_lr_d = min(1e-2, max(1e-6, init_lr_d * (loss_d / loss_g)**2)) # this_lr_d *= ((1.0 - (step_i / 100000)) ** 2) # this_lr_g *= ((1.0 - (step_i / 100000)) ** 2) # if np.random.random() > (loss_g / (loss_d + loss_g)): if step_i % 3 == 1: loss_d, _, sum_d = sess.run( [gan['loss_D'], opt_d, D_sum_op], feed_dict={ gan['x']: batch_xs, gan['z']: batch_zs, lr_d: this_lr_d }) total_loss_d += loss_d n_loss_d += 1 writer.add_summary(sum_d, step_i) print('%04d d* = lr: %0.08f, loss: %08.06f, \t' % ( step_i, this_lr_d, loss_d ) + 'g = lr: %0.08f, loss: %08.06f' % (this_lr_g, loss_g)) else: loss_g, _, sum_g = sess.run( [gan['loss_G'], opt_g, G_sum_op], feed_dict={gan['z']: batch_zs, lr_g: this_lr_g}) total_loss_g += loss_g n_loss_g += 1 writer.add_summary(sum_g, step_i) print('%04d d = lr: %0.08f, loss: %08.06f, \t' % ( step_i, this_lr_d, loss_d ) + 'g* = lr: %0.08f, loss: %08.06f' % (this_lr_g, loss_g)) if step_i % 100 == 0: samples = sess.run(gan['G'], feed_dict={gan['z']: zs}) utils.montage( np.clip((samples + 1) * 127.5, 0, 255).astype(np.uint8), 'imgs/gan_%08d.png' % t_i) t_i += 1 print('generator loss:', total_loss_g / n_loss_g) print('discriminator loss:', total_loss_d / n_loss_d) # Save the variables to disk. save_path = saver.save( sess, "./gan.ckpt", global_step=step_i, write_meta_graph=False) print("Model saved in file: %s" % save_path) except tf.errors.OutOfRangeError: print('Done training -- epoch limit reached') finally: # One of the threads has issued an exception. So let's tell all the # threads to shutdown. coord.request_stop() # Wait until all threads have finished. coord.join(threads)
if __name__ == '__main__': files = CELEB() train_input_pipeline(files=files)