The deep-convolutional-autoencoder from arashsaber

Fetch values of the latent space

Hi @arashsaber
Thank you for the comprehensive explanation of the code. Can you let me know how do i fetch values of the compressed representation (latent space), which is 'fc2 layer' as i understand? I have been trying out code snippets for the same, but couldn't get the values yet.
Thank you

Allocation of X exceeds 10% of system memory for custom input.

I'm trying to extend the code to train on colored images of size 84x84. But I am getting the following error during training:

2018-07-23 11:23:45.136501: I tensorflow/core/platform/cpu_feature_guard.cc:140] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
2018-07-23 11:23:45.156209: W tensorflow/core/framework/allocator.cc:101] Allocation of 1792336896 exceeds 10% of system memory.
2018-07-23 11:23:47.214466: W tensorflow/core/framework/allocator.cc:101] Allocation of 1792336896 exceeds 10% of system memory.
2018-07-23 11:23:49.253364: W tensorflow/core/framework/allocator.cc:101] Allocation of 1792336896 exceeds 10% of system memory.

Initially I thought the issue was with my CPU not being compatible or my machine had to little memory, so I tried to train on a K80 GPU but got the same error. Now I am thinking I made an error in memory allocation but I can't pin point the issue. Here is my code:

import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from glob import glob
from utils import *  # Self-defined utils file in the same dir

# Some important consts
num_examples = 669
batch_size = 1  # try vary batch size to observe mode collapse. was 100, nb may run into mem issue for big batch size

# Fetch input data (faces/trees/imgs)
data_path = os.path.join("./data/celebC/", '*.jpg')
data = glob(data_path)

if len(data) == 0:
    raise Exception("[!] No data found in '" + data_path+ "'")

np.random.shuffle(data)
imreadImg = imread(data[0])  # test read an image

if __debug__:
    print(imreadImg.shape)

if len(imreadImg.shape) >= 3: # check if image is a non-grayscale image by checking channel number
    c_dim = imread(data[0]).shape[-1]
else:
    c_dim = 1

is_grayscale = (c_dim == 1)

# tf Graph Input
# face data image of shape 84*84=7056 N.B. originally without the depth 3
x = tf.placeholder(tf.float32, [1, 84*84*3], name='InputData')

print(x.shape)

# This is
logs_path = "./logs-CAE/"
#   ---------------------------------
"""
We start by creating the layers with name scopes so that the graph in
the tensorboard looks meaningful
"""
#   ---------------------------------
def conv2d(input, name, kshape, strides=[1, 1, 1, 1]):
    with tf.name_scope(name):
        W = tf.get_variable(name='w_'+name,
                            shape=kshape,
                            initializer=tf.contrib.layers.xavier_initializer(uniform=False))
        b = tf.get_variable(name='b_' + name,
                            shape=[kshape[3]],
                            initializer=tf.contrib.layers.xavier_initializer(uniform=False))
        out = tf.nn.conv2d(input,W,strides=strides, padding='SAME')
        out = tf.nn.bias_add(out, b)
        out = tf.nn.relu(out)
        return out
# ---------------------------------
# tf.contrib.layers.conv2d_transpose, do not get confused with 
# tf.layers.conv2d_transpose
def deconv2d(input, name, kshape, n_outputs, strides=[1, 1]):
    with tf.name_scope(name):
        out = tf.contrib.layers.conv2d_transpose(input,
                                                 num_outputs= n_outputs,
                                                 kernel_size=kshape,
                                                 stride=strides,
                                                 padding='SAME',
                                                 weights_initializer=tf.contrib.layers.xavier_initializer_conv2d(uniform=False),
                                                 biases_initializer=tf.contrib.layers.xavier_initializer(uniform=False),
                                                 activation_fn=tf.nn.relu)
        return out
#   ---------------------------------
# ksize: A list or tuple of 4 ints. The size of the window for each dimension of the input tensor.
# strides: A list or tuple of 4 ints. The stride of the sliding window for each dimension of the input tensor.
# reference https://www.quora.com/What-is-the-size-of-the-output-of-a-maxpool-layer-in-a-CNN
# for size of output of maxpool layer
def maxpool2d(x,name,kshape=[1, 2, 2, 1], strides=[1, 2, 2, 1]):
    with tf.name_scope(name):
        out = tf.nn.max_pool(x,
                             ksize=kshape, #size of window
                             strides=strides,
                             padding='SAME')
        return out
#   ---------------------------------
def upsample(input, name, factor=[2,2]):
    size = [int(input.shape[1] * factor[0]), int(input.shape[2] * factor[1])]
    with tf.name_scope(name):
        out = tf.image.resize_bilinear(input, size=size, align_corners=None, name=None)
        return out
#   ---------------------------------
def fullyConnected(input, name, output_size):
    with tf.name_scope(name):
        input_size = input.shape[1:]
        input_size = int(np.prod(input_size)) # get total num of cells in one input image
        W = tf.get_variable(name='w_'+name,
                            shape=[input_size, output_size],
                            initializer=tf.contrib.layers.xavier_initializer(uniform=False))
        b = tf.get_variable(name='b_'+name,
                            shape=[output_size],
                            initializer=tf.contrib.layers.xavier_initializer(uniform=False))
        input = tf.reshape(input, [-1, input_size])
        out = tf.nn.relu(tf.add(tf.matmul(input, W), b))
        return out
#   ---------------------------------
def dropout(input, name, keep_rate):
    with tf.name_scope(name):
        out = tf.nn.dropout(input, keep_rate)
        return out
#   ---------------------------------
# Let us now design the autoencoder
def ConvAutoEncoder(x, name):
    with tf.name_scope(name):
        """
        We want to get dimensionality reduction of 11664 to 44656
        Layers:
            input --> 84, 84 (7056)
            conv1 --> kernel size: (5,5), n_filters:25 ???make it small so that it runs fast
            pool1 --> 42, 42, 25
            dropout1 --> keeprate 0.8
            reshape --> 42*42*25
            FC1 --> 42*42*25, 42*42*5
            dropout2 --> keeprate 0.8
            FC2 --> 42*42*5, 8820 --> output is the encoder vars
            FC3 --> 8820, 42*42*5
            dropout3 --> keeprate 0.8
            FC4 --> 42*42*5,42*42*25
            dropout4 --> keeprate 0.8
            reshape --> 42, 42, 25
            deconv1 --> kernel size:(5,5,25), n_filters: 25
            upsample1 --> 84, 84, 25
            FullyConnected (outputlayer) -->  84* 84* 25, 84 * 84 *  1
            reshape --> 84 * 84
        """
        input = tf.reshape(x, shape=[-1, 84, 84, 3])

        # coding part
        c1 = conv2d(input, name='c1', kshape=[5, 5, 3, 25])  # kshape = [k_h, k_w, in_channels, out_chnnels]
        p1 = maxpool2d(c1, name='p1')
        do1 = dropout(p1, name='do1', keep_rate=0.75)
        do1 = tf.reshape(do1, shape=[-1, 42*42*25])  # reshape to 1 dimensional (-1 is batch size)
        fc1 = fullyConnected(do1, name='fc1', output_size=42*42*5)
        do2 = dropout(fc1, name='do2', keep_rate=0.75)
        fc2 = fullyConnected(do2, name='fc2', output_size=42*42)
        # Decoding part
        fc3 = fullyConnected(fc2, name='fc3', output_size=42 * 42 * 5)
        do3 = dropout(fc3, name='do3', keep_rate=0.75)
        fc4 = fullyConnected(do3, name='fc4', output_size=42 * 42 * 25)
        do4 = dropout(fc4, name='do3', keep_rate=0.75)
        do4 = tf.reshape(do4, shape=[-1, 42, 42, 25])
        dc1 = deconv2d(do4, name='dc1', kshape=[5,5],n_outputs=25)
        up1 = upsample(dc1, name='up1', factor=[2, 2])
        output = fullyConnected(input, name='output', output_size=84*84*3)
        # print(output1.shape)
        # print(x.shape)
        with tf.name_scope('cost'):
            # N.B. reduce_mean is a batch operation! finds the mean across the batch
            cost = tf.reduce_mean(tf.square(tf.subtract(output, x)))
        return output, cost
#   ---------------------------------
def train_network(x):
    # Use this output to visualize the output of the decoder.

    output, cost = ConvAutoEncoder(x, 'ConvAutoEnc')
    with tf.name_scope('opt'):
        optimizer = tf.train.AdamOptimizer().minimize(cost)

    # Create a summary to monitor cost tensor
    tf.summary.scalar("cost", cost)

    # Merge all summaries into a single op
    merged_summary_op = tf.summary.merge_all()

    n_epochs = 5
    with tf.Session() as sess:

        sess.run(tf.global_variables_initializer())  # memory allocation exceeded 10% issue

        # create log writer object
        writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph())
        if __debug__:
            print("init session")
        for epoch in range(n_epochs):
            avg_cost = 0
            n_batches = int(num_examples / batch_size)
            print("epoch " + str(epoch))
            # Loop over all batches
            for i in range(n_batches):
                print("batch " + str(i))
                # batch_x, batch_y = mnist.train.next_batch(batch_size)
                # .next_batch -> https://stackoverflow.com/questions/41454511/tensorflow-how-is-dataset-train-next-batch-defined/41454722

                batch_files = data[i*batch_size:(i+1)*batch_size]  # get the current batch of files
                # TODO: add get_image() functionality from model.py to transform the batch as well.
                batch = [
                get_image(batch_file,
                        input_height=84,
                        input_width=84,
                        resize_height=84,
                        resize_width=84,
                        crop=True,
                        grayscale=False) for batch_file in batch_files] # get_image will get image from file dir after applying resize operation. 
                batch_images = np.array(batch).astype(np.float32)[:, :, :, None]

                print("BATCH_IMG SHAPE")
                print(batch_images.shape)

                # Run optimization op (backprop) and cost op (to get loss value)
                # _, c, summary = sess.run([optimizer, cost, merged_summary_op], feed_dict={x: batch_x, y: batch_y})
                _, c, summary = sess.run([optimizer, cost, merged_summary_op], feed_dict={x: batch_images})

                # Compute average loss
                avg_cost += c / n_batches
                # write log
                writer.add_summary(summary, epoch * n_batches + i)

            # Display logs per epoch step
            print('Epoch', epoch+1, ' / ', n_epochs, 'cost:', avg_cost)
        print('Optimization Finished')
        print('Cost:', cost.eval({x: mnist.test.images}))


train_network(x)

Any ideas on what might have caused this issue? Thanks!

arashsaber / deep-convolutional-autoencoder Goto Github PK

deep-convolutional-autoencoder's Introduction

Deep-Convolutional-AutoEncoder

deep-convolutional-autoencoder's People

Contributors

Stargazers

Watchers

Forkers

deep-convolutional-autoencoder's Issues

Fetch values of the latent space

how to modify this code for custom dataset

Allocation of X exceeds 10% of system memory for custom input.

Recommend Projects

React

Vue.js

Typescript

TensorFlow

Django

Laravel

D3

Recommend Topics

javascript

web

server

Machine learning

Visualization

Game

Recommend Org

Facebook

Microsoft

Google

Alibaba

D3

Tencent