Source code for cadl.stylenet

"""Style Net w/ tests for Video Style Net.
"""
"""
Video Style Net requires OpenCV 3.0.0+ w/ Contrib for Python to be installed.

Copyright 2017 Parag K. Mital.  See also NOTICE.md.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import os
from cadl import vgg16
from cadl import gif
from scipy.misc import imresize


[docs]def make_4d(img):
    """Create a 4-dimensional N x H x W x C image.

    Parameters
    ----------
    img : np.ndarray
        Given image as H x W x C or H x W.

    Returns
    -------
    img : np.ndarray
        N x H x W x C image.

    Raises
    ------
    ValueError
        Unexpected number of dimensions.
    """
    if img.ndim == 2:
        img = np.expand_dims(img[np.newaxis], 3)
    elif img.ndim == 3:
        img = img[np.newaxis]
    elif img.ndim == 4:
        return img
    else:
        raise ValueError('Incorrect dimensions for image!')
    return img


[docs]def stylize(content_img,
            style_img,
            base_img=None,
            saveto=None,
            gif_step=5,
            n_iterations=100,
            style_weight=1.0,
            content_weight=1.0):
    """Stylization w/ the given content and style images.

    Follows the approach in Leon Gatys et al.

    Parameters
    ----------
    content_img : np.ndarray
        Image to use for finding the content features.
    style_img : TYPE
        Image to use for finding the style features.
    base_img : None, optional
        Image to use for the base content.  Can be noise or an existing image.
        If None, the content image will be used.
    saveto : str, optional
        Name of GIF image to write to, e.g. "stylization.gif"
    gif_step : int, optional
        Modulo of iterations to save the current stylization.
    n_iterations : int, optional
        Number of iterations to run for.
    style_weight : float, optional
        Weighting on the style features.
    content_weight : float, optional
        Weighting on the content features.

    Returns
    -------
    stylization : np.ndarray
        Final iteration of the stylization.
    """
    # Preprocess both content and style images
    content_img = vgg16.preprocess(content_img, dsize=(224, 224))[np.newaxis]
    style_img = vgg16.preprocess(style_img, dsize=(224, 224))[np.newaxis]
    if base_img is None:
        base_img = content_img
    else:
        base_img = make_4d(vgg16.preprocess(base_img, dsize=(224, 224)))

    # Get Content and Style features
    net = vgg16.get_vgg_model()
    g = tf.Graph()
    with tf.Session(graph=g) as sess:
        tf.import_graph_def(net['graph_def'], name='vgg')
        names = [op.name for op in g.get_operations()]
        x = g.get_tensor_by_name(names[0] + ':0')
        content_layer = 'vgg/conv3_2/conv3_2:0'
        content_features = g.get_tensor_by_name(content_layer).eval(
            feed_dict={
                x: content_img,
                'vgg/dropout_1/random_uniform:0': [[1.0] * 4096],
                'vgg/dropout/random_uniform:0': [[1.0] * 4096]
            })
        style_layers = [
            'vgg/conv1_1/conv1_1:0', 'vgg/conv2_1/conv2_1:0',
            'vgg/conv3_1/conv3_1:0', 'vgg/conv4_1/conv4_1:0',
            'vgg/conv5_1/conv5_1:0'
        ]
        style_activations = []
        for style_i in style_layers:
            style_activation_i = g.get_tensor_by_name(style_i).eval(
                feed_dict={
                    x: style_img,
                    'vgg/dropout_1/random_uniform:0': [[1.0] * 4096],
                    'vgg/dropout/random_uniform:0': [[1.0] * 4096]
                })
            style_activations.append(style_activation_i)
        style_features = []
        for style_activation_i in style_activations:
            s_i = np.reshape(style_activation_i,
                             [-1, style_activation_i.shape[-1]])
            gram_matrix = np.matmul(s_i.T, s_i) / s_i.size
            style_features.append(gram_matrix.astype(np.float32))

    # Optimize both
    g = tf.Graph()
    with tf.Session(graph=g) as sess:
        net_input = tf.Variable(base_img)
        tf.import_graph_def(
            net['graph_def'], name='vgg', input_map={'images:0': net_input})

        content_loss = tf.nn.l2_loss(
            (g.get_tensor_by_name(content_layer) - content_features) /
            content_features.size)
        style_loss = np.float32(0.0)
        for style_layer_i, style_gram_i in zip(style_layers, style_features):
            layer_i = g.get_tensor_by_name(style_layer_i)
            layer_shape = layer_i.get_shape().as_list()
            layer_size = layer_shape[1] * layer_shape[2] * layer_shape[3]
            layer_flat = tf.reshape(layer_i, [-1, layer_shape[3]])
            gram_matrix = tf.matmul(tf.transpose(layer_flat),
                                    layer_flat) / layer_size
            style_loss = tf.add(style_loss,
                                tf.nn.l2_loss((gram_matrix - style_gram_i) /
                                              np.float32(style_gram_i.size)))
        loss = content_weight * content_loss + style_weight * style_loss
        optimizer = tf.train.AdamOptimizer(0.01).minimize(loss)

        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())
        sess.run(init_op)
        imgs = []
        for it_i in range(n_iterations):
            _, this_loss, synth = sess.run(
                [optimizer, loss, net_input],
                feed_dict={
                    'vgg/dropout_1/random_uniform:0':
                    np.ones(
                        g.get_tensor_by_name('vgg/dropout_1/random_uniform:0')
                        .get_shape().as_list()),
                    'vgg/dropout/random_uniform:0':
                    np.ones(
                        g.get_tensor_by_name('vgg/dropout/random_uniform:0')
                        .get_shape().as_list())
                })
            print(
                "iteration %d, loss: %f, range: (%f - %f)" %
                (it_i, this_loss, np.min(synth), np.max(synth)),
                end='\r')
            if it_i % gif_step == 0:
                imgs.append(np.clip(synth[0], 0, 1))
        if saveto is not None:
            gif.build_gif(imgs, saveto=saveto)
    return np.clip(synth[0], 0, 1)


[docs]def warp_img(img, dx, dy):
    """Apply the motion vectors to the given image.

    Parameters
    ----------
    img : np.ndarray
        Input image to apply motion to.
    dx : np.ndarray
        H x W matrix defining the magnitude of the X vector
    dy : np.ndarray
        H x W matrix defining the magnitude of the Y vector

    Returns
    -------
    img : np.ndarray
        Image with pixels warped according to dx, dy.
    """
    warped = img.copy()
    for row_i in range(img.shape[0]):
        for col_i in range(img.shape[1]):
            dx_i = int(np.round(dx[row_i, col_i]))
            dy_i = int(np.round(dy[row_i, col_i]))
            sample_dx = np.clip(dx_i + col_i, 0, img.shape[0] - 1)
            sample_dy = np.clip(dy_i + row_i, 0, img.shape[1] - 1)
            warped[sample_dy, sample_dx, :] = img[row_i, col_i, :]
    return warped


[docs]def test_video(style_img='arles.jpg', videodir='kurosawa'):

    has_cv2 = True
    try:
        import cv2
        has_cv2 = True
        optflow = cv2.optflow.createOptFlow_DeepFlow()
    except ImportError:
        has_cv2 = False

    style_img = plt.imread(style_img)
    content_files = [
        os.path.join(videodir, f) for f in os.listdir(videodir)
        if f.endswith('.png')
    ]
    content_img = plt.imread(content_files[0])
    style_img = imresize(style_img, (448, 448)).astype(np.float32) / 255.0
    content_img = imresize(content_img, (448, 448)).astype(np.float32) / 255.0
    if has_cv2:
        prev_lum = cv2.cvtColor(content_img, cv2.COLOR_RGB2HSV)[:, :, 2]
    else:
        prev_lum = (content_img[..., 0] * 0.3 + content_img[..., 1] * 0.59 +
                    content_img[..., 2] * 0.11)
    imgs = []
    stylized = stylize(
        content_img,
        style_img,
        content_weight=5.0,
        style_weight=0.5,
        n_iterations=50)
    plt.imsave(fname=content_files[0] + 'stylized.png', arr=stylized)
    imgs.append(stylized)
    for f in content_files[1:]:
        content_img = plt.imread(f)
        content_img = imresize(content_img, (448,
                                             448)).astype(np.float32) / 255.0
        if has_cv2:
            lum = cv2.cvtColor(content_img, cv2.COLOR_RGB2HSV)[:, :, 2]
            flow = optflow.calc(prev_lum, lum, None)
            warped = warp_img(stylized, flow[..., 0], flow[..., 1])
            stylized = stylize(
                content_img,
                style_img,
                content_weight=5.0,
                style_weight=0.5,
                base_img=warped,
                n_iterations=50)
        else:
            lum = (content_img[..., 0] * 0.3 + content_img[..., 1] * 0.59 +
                   content_img[..., 2] * 0.11)
            stylized = stylize(
                content_img,
                style_img,
                content_weight=5.0,
                style_weight=0.5,
                base_img=None,
                n_iterations=50)
        imgs.append(stylized)
        plt.imsave(fname=f + 'stylized.png', arr=stylized)
        prev_lum = lum
    return imgs


[docs]def test():
    """Test for artistic stylization.
    """
    from six.moves import urllib
    f = ('https://upload.wikimedia.org/wikipedia/commons/thumb/5/54/' +
         'Claude_Monet%2C_Impression%2C_soleil_levant.jpg/617px-Claude_Monet' +
         '%2C_Impression%2C_soleil_levant.jpg?download')
    filepath, _ = urllib.request.urlretrieve(f, f.split('/')[-1], None)
    style = plt.imread(filepath).astype(np.float32) / 255.0

    f = ('https://upload.wikimedia.org/wikipedia/commons/thumb/a/ae/' +
         'El_jard%C3%ADn_de_las_Delicias%2C_de_El_Bosco.jpg/640px-El_jard' +
         '%C3%ADn_de_las_Delicias%2C_de_El_Bosco.jpg')
    filepath, _ = urllib.request.urlretrieve(f, f.split('/')[-1], None)
    content = plt.imread(filepath).astype(np.float32) / 255.0

    stylize(content, style, n_iterations=20)


if __name__ == '__main__':
    test_video()