Source code for cadl.pixelrnn

"""Basic PixelRNN i.e. CharRNN style, none of the fancy ones (i.e. Row, Diag, BiDiag).
"""
"""
Copyright 2017 Parag K. Mital.  See also NOTICE.md.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

Attributes
----------
B : int
    Description
C : int
    Description
ckpt_name : str
    Description
H : int
    Description
n_epochs : int
    Description
n_units : int
    Description
W : int
    Description
"""
import os
import numpy as np
import tensorflow as tf
import tensorflow.contrib.slim as slim
from cadl import dataset_utils as dsu

# Parameters for training
ckpt_name = 'pixelrnn.ckpt'
n_epochs = 10
n_units = 100
B = 50
H = 32
W = 32
C = 3


[docs]def build_pixel_rnn_basic_model(B=50, H=32, W=32, C=32, n_units=100,
                                n_layers=2):
    """Summary

    Parameters
    ----------
    B : int, optional
        Description
    H : int, optional
        Description
    W : int, optional
        Description
    C : int, optional
        Description
    n_units : int, optional
        Description
    n_layers : int, optional
        Description

    Returns
    -------
    TYPE
        Description
    """
    # Input to the network, a batch of images
    X = tf.placeholder(tf.float32, shape=[B, H, W, C], name='X')
    keep_prob = tf.placeholder(tf.float32, shape=1, name='keep_prob')

    # Flatten to 2 dimensions
    X_2d = tf.reshape(X, [-1, H * W * C])

    # Turn each pixel value into a vector of one-hot values
    X_onehot = tf.one_hot(tf.cast(X_2d, tf.uint8), depth=256, axis=2)

    # Split each pixel into its own tensor resulting in H * W * C number of
    # Tensors each shaped as B x 256
    pixels = [
        tf.squeeze(p, axis=1) for p in tf.split(X_onehot, H * W * C, axis=1)
    ]

    # Create a GRU recurrent layer
    cells = tf.contrib.rnn.GRUCell(n_units)
    initial_state = cells.zero_state(
        batch_size=tf.shape(X)[0], dtype=tf.float32)
    if n_layers > 1:
        cells = tf.contrib.rnn.MultiRNNCell(
            [cells] * n_layers, state_is_tuple=True)
        initial_state = cells.zero_state(tf.shape(X)[0], tf.float32)
    cells = tf.contrib.rnn.DropoutWrapper(cells, output_keep_prob=keep_prob)

    # Connect our pixel distributions (onehots) to an rnn, this will return us a
    # list of tensors, one for each of our pixels.
    hs, final_state = tf.contrib.rnn.static_rnn(
        cells, pixels, initial_state=initial_state)

    # Concat N pixels result back into a Tensor, B x N x n_units
    stacked = tf.concat([tf.expand_dims(h_i, axis=1) for h_i in hs], axis=1)

    # And now to 2d so we can connect to FC layer
    stacked = tf.reshape(stacked, [-1, n_units])

    # And now connect to FC layer
    prediction = slim.linear(stacked, 256, scope='linear')
    if B * H * W * C > 1:
        prediction = tf.slice(prediction, [0, 0],
                              [int(prediction.shape[0] - 1), -1])
        X_onehot_flat = tf.slice(
            tf.reshape(X_onehot, [-1, 256]), [1, 0], [-1, -1])
        loss = tf.nn.softmax_cross_entropy_with_logits(
            labels=X_onehot_flat, logits=prediction)

        cost = tf.reduce_mean(loss)
    else:
        cost = None

    return {
        'X': X,
        'recon': prediction,
        'cost': cost,
        'initial_state': initial_state,
        'final_state': final_state
    }


[docs]def infer(sess, net, H, W, C, pixel_value=128, state=None):
    """Summary

    Parameters
    ----------
    sess : TYPE
        Description
    net : TYPE
        Description
    H : TYPE
        Description
    W : TYPE
        Description
    C : TYPE
        Description
    pixel_value : int, optional
        Description
    state : None, optional
        Description

    Returns
    -------
    TYPE
        Description
    """
    X = np.reshape(pixel_value, [1, 1, 1, 1])
    synthesis = [pixel_value]
    if state is None:
        state = sess.run(net['initial_state'])
    for pixel_i in range(H * W * C - 1):
        next, state = sess.run(
            [net['recon'], net['final_state']],
            feed_dict={net['X']: X,
                       net['initial_state']: state})
        synthesis.append(np.argmax(next))
    return synthesis


[docs]def train_tiny_imagenet():
    """Summary
    """
    net = build_pixel_rnn_basic_model()

    # build the optimizer (this will take a while!)
    optimizer = tf.train.AdamOptimizer(
        learning_rate=0.001).minimize(net['cost'])

    # Load a list of files for tiny imagenet, downloading if necessary
    imagenet_files = dsu.tiny_imagenet_load()

    # Create a threaded image pipeline which will load/shuffle/crop/resize
    batch = dsu.create_input_pipeline(
        imagenet_files,
        batch_size=B,
        n_epochs=n_epochs,
        shape=[64, 64, 3],
        crop_shape=[32, 32, 3],
        crop_factor=0.5,
        n_threads=8)

    sess = tf.Session()
    saver = tf.train.Saver()
    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())
    sess.run(init_op)

    # This will handle our threaded image pipeline
    coord = tf.train.Coordinator()

    # Ensure no more changes to graph
    tf.get_default_graph().finalize()

    # Start up the queues for handling the image pipeline
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)

    if os.path.exists(ckpt_name + '.index') or os.path.exists(ckpt_name):
        saver.restore(sess, ckpt_name)
        saver.restore(sess, tf.train.latest_checkpoint('./'))

    epoch_i = 0
    batch_i = 0
    save_step = 100
    try:
        while not coord.should_stop() and epoch_i < n_epochs:
            batch_i += 1
            batch_xs = sess.run(batch)
            train_cost = sess.run(
                [net['cost'], optimizer], feed_dict={net['X']: batch_xs})[0]
            print(batch_i, train_cost)
            if batch_i % save_step == 0:
                # Save the variables to disk.  Don't write the meta graph
                # since we can use the code to create it, and it takes a long
                # time to create the graph since it is so deep
                saver.save(
                    sess,
                    ckpt_name,
                    global_step=batch_i,
                    write_meta_graph=False)
    except tf.errors.OutOfRangeError:
        print('Done.')
    finally:
        # One of the threads has issued an exception.  So let's tell all the
        # threads to shutdown.
        coord.request_stop()

    # Wait until all threads have finished.
    coord.join(threads)

    # Clean up the session.
    sess.close()


if __name__ == '__main__':
    train_tiny_imagenet()