Source code for cadl.deepdream

"""Deep Dream using the Inception v5 network.
"""
"""
Copyright 2017 Parag K. Mital.  See also NOTICE.md.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import os
import numpy as np
import tensorflow as tf
from scipy.ndimage.filters import gaussian_filter
from skimage.transform import resize
from scipy.misc import imsave
from cadl import inception, vgg16, i2v
from cadl import gif


[docs]def get_labels(model='inception'):
    """Return labels corresponding to the `neuron_i` parameter of deep dream.

    Parameters
    ----------
    model : str, optional
        Which model to load. Must be one of: ['inception'], 'i2v_tag', 'i2v',
        'vgg16', or 'vgg_face'.

    Raises
    ------
    ValueError
        Unknown model.  Must be one of: ['inception'], 'i2v_tag', 'i2v',
        'vgg16', or 'vgg_face'.

    Returns
    -------
    TYPE
        Description
    """
    if model == 'inception':
        net = inception.get_inception_model()
        return net['labels']
    elif model == 'i2v_tag':
        net = i2v.get_i2v_tag_model()
        return net['labels']
    elif model == 'vgg16':
        net = vgg16.get_vgg_model()
        return net['labels']
    elif model == 'vgg_face':
        net = vgg16.get_vgg_face_model()
        return net['labels']
    else:
        raise ValueError("Unknown model or this model does not have labels!")


[docs]def get_layer_names(model='inception'):
    """Retun every layer's index and name in the given model.

    Parameters
    ----------
    model : str, optional
        Which model to load. Must be one of: ['inception'], 'i2v_tag', 'i2v',
        'vgg16', or 'vgg_face'.

    Returns
    -------
    names : list of tuples
        The index and layer's name for every layer in the given model.
    """
    g = tf.Graph()
    with tf.Session(graph=g):
        if model == 'inception':
            net = inception.get_inception_model()
        elif model == 'vgg_face':
            net = vgg16.get_vgg_face_model()
        elif model == 'vgg16':
            net = vgg16.get_vgg_model()
        elif model == 'i2v':
            net = i2v.get_i2v_model()
        elif model == 'i2v-tag':
            net = i2v.get_i2v_tag_model()

        tf.import_graph_def(net['graph_def'], name='net')
        names = [(i, op.name) for i, op in enumerate(g.get_operations())]
        return names


def _setup(input_img, model, downsize):
    """Internal use only. Load the given model's graph and preprocess an image.

    Parameters
    ----------
    input_img : np.ndarray
        Image to process with the model's normalizaiton process.
    model : str
        Which model to load. Must be one of: ['inception'], 'i2v_tag', 'i2v',
        'vgg16', or 'vgg_face'.
    downsize : bool
        Optionally crop/resize the input image to the standard shape.  Only
        applies to inception network which is all convolutional.

    Returns
    -------
    net, img, preprocess, deprocess : dict, np.ndarray, function, function
        net : The networks graph_def and labels
        img : The preprocessed input image
        preprocess: Function for preprocessing an image
        deprocess: Function for deprocessing an image

    Raises
    ------
    ValueError
        If model is unknown.
    """
    if model == 'inception':
        net = inception.get_inception_model()
        img = inception.preprocess(
            input_img, resize=downsize, crop=downsize)[np.newaxis]
        deprocess, preprocess = inception.deprocess, inception.preprocess
    elif model == 'vgg_face':
        net = vgg16.get_vgg_face_model()
        img = vgg16.preprocess(input_img)[np.newaxis]
        deprocess, preprocess = vgg16.deprocess, vgg16.preprocess
    elif model == 'vgg16':
        net = vgg16.get_vgg_model()
        img = vgg16.preprocess(input_img)[np.newaxis]
        deprocess, preprocess = vgg16.deprocess, vgg16.preprocess
    elif model == 'i2v':
        net = i2v.get_i2v_model()
        img = i2v.preprocess(input_img)[np.newaxis]
        deprocess, preprocess = i2v.deprocess, i2v.preprocess
    elif model == 'i2v_tag':
        net = i2v.get_i2v_tag_model()
        img = i2v.preprocess(input_img)[np.newaxis]
        deprocess, preprocess = i2v.deprocess, i2v.preprocess
    else:
        raise ValueError("Unknown model name!  Supported: " +
                         "['inception', 'vgg_face', 'vgg16', 'i2v', 'i2v_tag']")

    return net, img, preprocess, deprocess


def _apply(img,
           gradient,
           it_i,
           decay=0.998,
           sigma=1.5,
           blur_step=10,
           step=1.0,
           crop=0,
           crop_step=1,
           pth=0):
    """Interal use only. Apply the gradient to an image with the given params.

    Parameters
    ----------
    img : np.ndarray
        Tensor to apply gradient ascent to.
    gradient : np.ndarray
        Gradient to ascend to.
    it_i : int
        Current iteration (used for step modulos)
    decay : float, optional
        Amount to decay.
    sigma : float, optional
        Sigma for Gaussian Kernel.
    blur_step : int, optional
        How often to blur.
    step : float, optional
        Step for gradient ascent.
    crop : int, optional
        Amount to crop from each border.
    crop_step : int, optional
        How often to crop.
    pth : int, optional
        Percentile to mask out.

    No Longer Returned
    ------------------
    img : np.ndarray
        Ascended image.
    """
    gradient /= (np.std(gradient) + 1e-10)
    img += gradient * step
    img *= decay

    if pth:
        mask = (np.abs(img) < np.percentile(np.abs(img), pth))
        img = img - img * mask

    if blur_step and it_i % blur_step == 0:
        for ch_i in range(3):
            img[..., ch_i] = gaussian_filter(img[..., ch_i], sigma)

    if crop and it_i % crop_step == 0:
        height, width, *ch = img[0].shape

        # Crop a 1 pixel border from height and width
        img = img[:, crop:-crop, crop:-crop, :]

        # Resize
        img = resize(
            img[0], (height, width), order=3, clip=False,
            preserve_range=True)[np.newaxis].astype(np.float32)


[docs]def deep_dream(input_img,
               downsize=False,
               model='inception',
               layer_i=-1,
               neuron_i=-1,
               n_iterations=100,
               save_gif=None,
               save_images='imgs',
               device='/cpu:0',
               **kwargs):
    """Deep Dream with the given parameters.

    Parameters
    ----------
    input_img : np.ndarray
        Image to apply deep dream to.  Should be 3-dimenionsal H x W x C
        RGB uint8 or float32.
    downsize : bool, optional
        Whether or not to downsize the image.  Only applies to
        model=='inception'.
    model : str, optional
        Which model to load.  Must be one of: ['inception'], 'i2v_tag', 'i2v',
        'vgg16', or 'vgg_face'.
    layer_i : int, optional
        Which layer to use for finding the gradient.  E.g. the softmax layer
        for inception is -1, for vgg networks it is -2.  Use the function
        "get_layer_names" to find the layer number that you need.
    neuron_i : int, optional
        Which neuron to use.  -1 for the entire layer.
    n_iterations : int, optional
        Number of iterations to dream.
    save_gif : bool, optional
        Save a GIF.
    save_images : str, optional
        Folder to save images to.
    device : str, optional
        Which device to use, e.g. ['/cpu:0'] or '/gpu:0'.
    **kwargs : dict
        See "_apply" for additional parameters.

    Returns
    -------
    imgs : list of np.array
        Images of every iteration
    """
    net, img, preprocess, deprocess = _setup(input_img, model, downsize)
    batch, height, width, *ch = img.shape

    g = tf.Graph()
    with tf.Session(graph=g) as sess, g.device(device):

        tf.import_graph_def(net['graph_def'], name='net')
        names = [op.name for op in g.get_operations()]
        input_name = names[0] + ':0'
        x = g.get_tensor_by_name(input_name)

        layer = g.get_tensor_by_name(names[layer_i] + ':0')
        layer_shape = sess.run(tf.shape(layer), feed_dict={x: img})
        layer_vec = np.ones(layer_shape) / layer_shape[-1]
        layer_vec[..., neuron_i] = 1.0 - (1.0 / layer_shape[-1])

        ascent = tf.gradients(layer, x)

        imgs = []
        for it_i in range(n_iterations):
            print(it_i, np.min(img), np.max(img))
            if neuron_i == -1:
                this_res = sess.run(ascent, feed_dict={x: img})[0]
            else:
                this_res = sess.run(
                    ascent, feed_dict={x: img,
                                       layer: layer_vec})[0]

            _apply(img, this_res, it_i, **kwargs)
            imgs.append(deprocess(img[0]))

            if save_images is not None:
                if not os.path.exists(save_images):
                    os.mkdir(save_images)
                imsave(
                    os.path.join(save_images, 'frame{}.png'.format(it_i)),
                    imgs[-1])

        if save_gif is not None:
            gif.build_gif(imgs, saveto=save_gif)

    return imgs


[docs]def guided_dream(input_img,
                 guide_img=None,
                 downsize=False,
                 layers=[162, 183, 184, 247],
                 label_i=962,
                 layer_i=-1,
                 feature_loss_weight=1.0,
                 tv_loss_weight=1.0,
                 l2_loss_weight=1.0,
                 softmax_loss_weight=1.0,
                 model='inception',
                 neuron_i=920,
                 n_iterations=100,
                 save_gif=None,
                 save_images='imgs',
                 device='/cpu:0',
                 **kwargs):
    """Deep Dream v2.  Use an optional guide image and other techniques.

    Parameters
    ----------
    input_img : np.ndarray
        Image to apply deep dream to.  Should be 3-dimenionsal H x W x C
        RGB uint8 or float32.
    guide_img : np.ndarray, optional
        Optional image to find features at different layers for.  Must pass in
        a list of layers that you want to find features for.  Then the guided
        dream will try to match this images features at those layers.
    downsize : bool, optional
        Whether or not to downsize the image.  Only applies to
        model=='inception'.
    layers : list, optional
        A list of layers to find features for in the "guide_img".
    label_i : int, optional
        Which label to use for the softmax layer.  Use the "get_labels" function
        to find the index corresponding the object of interest.  If None, not
        used.
    layer_i : int, optional
        Which layer to use for finding the gradient.  E.g. the softmax layer
        for inception is -1, for vgg networks it is -2.  Use the function
        "get_layer_names" to find the layer number that you need.
    feature_loss_weight : float, optional
        Weighting for the feature loss from the guide_img.
    tv_loss_weight : float, optional
        Total variational loss weighting.  Enforces smoothness.
    l2_loss_weight : float, optional
        L2 loss weighting.  Enforces smaller values and reduces saturation.
    softmax_loss_weight : float, optional
        Softmax loss weighting.  Must set label_i.
    model : str, optional
        Which model to load.  Must be one of: ['inception'], 'i2v_tag', 'i2v',
        'vgg16', or 'vgg_face'.
    neuron_i : int, optional
        Which neuron to use.  -1 for the entire layer.
    n_iterations : int, optional
        Number of iterations to dream.
    save_gif : bool, optional
        Save a GIF.
    save_images : str, optional
        Folder to save images to.
    device : str, optional
        Which device to use, e.g. ['/cpu:0'] or '/gpu:0'.
    **kwargs : dict
        See "_apply" for additional parameters.

    Returns
    -------
    imgs : list of np.ndarray
        Images of the dream.
    """
    net, img, preprocess, deprocess = _setup(input_img, model, downsize)
    print(img.shape, input_img.shape)
    print(img.min(), img.max())

    if guide_img is not None:
        guide_img = preprocess(guide_img.copy(), model)[np.newaxis]
        assert (guide_img.shape == img.shape)
    batch, height, width, *ch = img.shape

    g = tf.Graph()
    with tf.Session(graph=g) as sess, g.device(device):
        tf.import_graph_def(net['graph_def'], name='net')
        names = [op.name for op in g.get_operations()]
        input_name = names[0] + ':0'
        x = g.get_tensor_by_name(input_name)

        features = [names[layer_i] + ':0' for layer_i in layers]
        feature_loss = tf.Variable(0.0)
        for feature_i in features:
            layer = g.get_tensor_by_name(feature_i)
            if guide_img is None:
                feature_loss += tf.reduce_mean(layer)
            else:
                # Reshape it to 2D vector
                layer = tf.reshape(layer, [-1, 1])
                # Do the same for our guide image
                guide_layer = sess.run(layer, feed_dict={x: guide_img})
                guide_layer = guide_layer.reshape(-1, 1)
                # Now calculate their dot product
                correlation = tf.matmul(guide_layer.T, layer)
                feature_loss += feature_loss_weight * tf.reduce_mean(
                    correlation)
        softmax_loss = tf.Variable(0.0)
        if label_i is not None:
            layer = g.get_tensor_by_name(names[layer_i] + ':0')
            layer_shape = sess.run(tf.shape(layer), feed_dict={x: img})
            layer_vec = np.ones(layer_shape) / layer_shape[-1]
            layer_vec[..., neuron_i] = 1.0 - 1.0 / layer_shape[1]
            softmax_loss += softmax_loss_weight * tf.reduce_mean(
                tf.nn.l2_loss(layer - layer_vec))

        dx = tf.square(x[:, :height - 1, :width - 1, :] -
                       x[:, :height - 1, 1:, :])
        dy = tf.square(x[:, :height - 1, :width - 1, :] -
                       x[:, 1:, :width - 1, :])
        tv_loss = tv_loss_weight * tf.reduce_mean(tf.pow(dx + dy, 1.2))
        l2_loss = l2_loss_weight * tf.reduce_mean(tf.nn.l2_loss(x))

        ascent = tf.gradients(feature_loss + softmax_loss + tv_loss + l2_loss,
                              x)[0]
        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())
        sess.run(init_op)
        imgs = []
        for it_i in range(n_iterations):
            this_res, this_feature_loss, this_softmax_loss, this_tv_loss, this_l2_loss = sess.run(
                [ascent, feature_loss, softmax_loss, tv_loss, l2_loss],
                feed_dict={x: img})
            print('feature:', this_feature_loss, 'softmax:', this_softmax_loss,
                  'tv', this_tv_loss, 'l2', this_l2_loss)

            _apply(img, -this_res, it_i, **kwargs)
            imgs.append(deprocess(img[0]))

            if save_images is not None:
                imsave(
                    os.path.join(save_images, 'frame{}.png'.format(it_i)),
                    imgs[-1])

        if save_gif is not None:
            gif.build_gif(imgs, saveto=save_gif)

    return imgs