I'm trying to extend the code to train on colored images of size 84x84. But I am getting the following error during training:
Initially I thought the issue was with my CPU not being compatible or my machine had to little memory, so I tried to train on a K80 GPU but got the same error. Now I am thinking I made an error in memory allocation but I can't pin point the issue. Here is my code:
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from glob import glob
from utils import * # Self-defined utils file in the same dir
# Some important consts
num_examples = 669
batch_size = 1 # try vary batch size to observe mode collapse. was 100, nb may run into mem issue for big batch size
# Fetch input data (faces/trees/imgs)
data_path = os.path.join("./data/celebC/", '*.jpg')
data = glob(data_path)
if len(data) == 0:
raise Exception("[!] No data found in '" + data_path+ "'")
np.random.shuffle(data)
imreadImg = imread(data[0]) # test read an image
if __debug__:
print(imreadImg.shape)
if len(imreadImg.shape) >= 3: # check if image is a non-grayscale image by checking channel number
c_dim = imread(data[0]).shape[-1]
else:
c_dim = 1
is_grayscale = (c_dim == 1)
# tf Graph Input
# face data image of shape 84*84=7056 N.B. originally without the depth 3
x = tf.placeholder(tf.float32, [1, 84*84*3], name='InputData')
print(x.shape)
# This is
logs_path = "./logs-CAE/"
# ---------------------------------
"""
We start by creating the layers with name scopes so that the graph in
the tensorboard looks meaningful
"""
# ---------------------------------
def conv2d(input, name, kshape, strides=[1, 1, 1, 1]):
with tf.name_scope(name):
W = tf.get_variable(name='w_'+name,
shape=kshape,
initializer=tf.contrib.layers.xavier_initializer(uniform=False))
b = tf.get_variable(name='b_' + name,
shape=[kshape[3]],
initializer=tf.contrib.layers.xavier_initializer(uniform=False))
out = tf.nn.conv2d(input,W,strides=strides, padding='SAME')
out = tf.nn.bias_add(out, b)
out = tf.nn.relu(out)
return out
# ---------------------------------
# tf.contrib.layers.conv2d_transpose, do not get confused with
# tf.layers.conv2d_transpose
def deconv2d(input, name, kshape, n_outputs, strides=[1, 1]):
with tf.name_scope(name):
out = tf.contrib.layers.conv2d_transpose(input,
num_outputs= n_outputs,
kernel_size=kshape,
stride=strides,
padding='SAME',
weights_initializer=tf.contrib.layers.xavier_initializer_conv2d(uniform=False),
biases_initializer=tf.contrib.layers.xavier_initializer(uniform=False),
activation_fn=tf.nn.relu)
return out
# ---------------------------------
# ksize: A list or tuple of 4 ints. The size of the window for each dimension of the input tensor.
# strides: A list or tuple of 4 ints. The stride of the sliding window for each dimension of the input tensor.
# reference https://www.quora.com/What-is-the-size-of-the-output-of-a-maxpool-layer-in-a-CNN
# for size of output of maxpool layer
def maxpool2d(x,name,kshape=[1, 2, 2, 1], strides=[1, 2, 2, 1]):
with tf.name_scope(name):
out = tf.nn.max_pool(x,
ksize=kshape, #size of window
strides=strides,
padding='SAME')
return out
# ---------------------------------
def upsample(input, name, factor=[2,2]):
size = [int(input.shape[1] * factor[0]), int(input.shape[2] * factor[1])]
with tf.name_scope(name):
out = tf.image.resize_bilinear(input, size=size, align_corners=None, name=None)
return out
# ---------------------------------
def fullyConnected(input, name, output_size):
with tf.name_scope(name):
input_size = input.shape[1:]
input_size = int(np.prod(input_size)) # get total num of cells in one input image
W = tf.get_variable(name='w_'+name,
shape=[input_size, output_size],
initializer=tf.contrib.layers.xavier_initializer(uniform=False))
b = tf.get_variable(name='b_'+name,
shape=[output_size],
initializer=tf.contrib.layers.xavier_initializer(uniform=False))
input = tf.reshape(input, [-1, input_size])
out = tf.nn.relu(tf.add(tf.matmul(input, W), b))
return out
# ---------------------------------
def dropout(input, name, keep_rate):
with tf.name_scope(name):
out = tf.nn.dropout(input, keep_rate)
return out
# ---------------------------------
# Let us now design the autoencoder
def ConvAutoEncoder(x, name):
with tf.name_scope(name):
"""
We want to get dimensionality reduction of 11664 to 44656
Layers:
input --> 84, 84 (7056)
conv1 --> kernel size: (5,5), n_filters:25 ???make it small so that it runs fast
pool1 --> 42, 42, 25
dropout1 --> keeprate 0.8
reshape --> 42*42*25
FC1 --> 42*42*25, 42*42*5
dropout2 --> keeprate 0.8
FC2 --> 42*42*5, 8820 --> output is the encoder vars
FC3 --> 8820, 42*42*5
dropout3 --> keeprate 0.8
FC4 --> 42*42*5,42*42*25
dropout4 --> keeprate 0.8
reshape --> 42, 42, 25
deconv1 --> kernel size:(5,5,25), n_filters: 25
upsample1 --> 84, 84, 25
FullyConnected (outputlayer) --> 84* 84* 25, 84 * 84 * 1
reshape --> 84 * 84
"""
input = tf.reshape(x, shape=[-1, 84, 84, 3])
# coding part
c1 = conv2d(input, name='c1', kshape=[5, 5, 3, 25]) # kshape = [k_h, k_w, in_channels, out_chnnels]
p1 = maxpool2d(c1, name='p1')
do1 = dropout(p1, name='do1', keep_rate=0.75)
do1 = tf.reshape(do1, shape=[-1, 42*42*25]) # reshape to 1 dimensional (-1 is batch size)
fc1 = fullyConnected(do1, name='fc1', output_size=42*42*5)
do2 = dropout(fc1, name='do2', keep_rate=0.75)
fc2 = fullyConnected(do2, name='fc2', output_size=42*42)
# Decoding part
fc3 = fullyConnected(fc2, name='fc3', output_size=42 * 42 * 5)
do3 = dropout(fc3, name='do3', keep_rate=0.75)
fc4 = fullyConnected(do3, name='fc4', output_size=42 * 42 * 25)
do4 = dropout(fc4, name='do3', keep_rate=0.75)
do4 = tf.reshape(do4, shape=[-1, 42, 42, 25])
dc1 = deconv2d(do4, name='dc1', kshape=[5,5],n_outputs=25)
up1 = upsample(dc1, name='up1', factor=[2, 2])
output = fullyConnected(input, name='output', output_size=84*84*3)
# print(output1.shape)
# print(x.shape)
with tf.name_scope('cost'):
# N.B. reduce_mean is a batch operation! finds the mean across the batch
cost = tf.reduce_mean(tf.square(tf.subtract(output, x)))
return output, cost
# ---------------------------------
def train_network(x):
# Use this output to visualize the output of the decoder.
output, cost = ConvAutoEncoder(x, 'ConvAutoEnc')
with tf.name_scope('opt'):
optimizer = tf.train.AdamOptimizer().minimize(cost)
# Create a summary to monitor cost tensor
tf.summary.scalar("cost", cost)
# Merge all summaries into a single op
merged_summary_op = tf.summary.merge_all()
n_epochs = 5
with tf.Session() as sess:
sess.run(tf.global_variables_initializer()) # memory allocation exceeded 10% issue
# create log writer object
writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph())
if __debug__:
print("init session")
for epoch in range(n_epochs):
avg_cost = 0
n_batches = int(num_examples / batch_size)
print("epoch " + str(epoch))
# Loop over all batches
for i in range(n_batches):
print("batch " + str(i))
# batch_x, batch_y = mnist.train.next_batch(batch_size)
# .next_batch -> https://stackoverflow.com/questions/41454511/tensorflow-how-is-dataset-train-next-batch-defined/41454722
batch_files = data[i*batch_size:(i+1)*batch_size] # get the current batch of files
# TODO: add get_image() functionality from model.py to transform the batch as well.
batch = [
get_image(batch_file,
input_height=84,
input_width=84,
resize_height=84,
resize_width=84,
crop=True,
grayscale=False) for batch_file in batch_files] # get_image will get image from file dir after applying resize operation.
batch_images = np.array(batch).astype(np.float32)[:, :, :, None]
print("BATCH_IMG SHAPE")
print(batch_images.shape)
# Run optimization op (backprop) and cost op (to get loss value)
# _, c, summary = sess.run([optimizer, cost, merged_summary_op], feed_dict={x: batch_x, y: batch_y})
_, c, summary = sess.run([optimizer, cost, merged_summary_op], feed_dict={x: batch_images})
# Compute average loss
avg_cost += c / n_batches
# write log
writer.add_summary(summary, epoch * n_batches + i)
# Display logs per epoch step
print('Epoch', epoch+1, ' / ', n_epochs, 'cost:', avg_cost)
print('Optimization Finished')
print('Cost:', cost.eval({x: mnist.test.images}))
train_network(x)