Hello,
I successfully trained yesterday on a custom dataset, and when I tried to retrain my model, it throws the following error:
[] Reading checkpoints...
[] Success to read DCGAN.model-64502
[*] Load SUCCESS
2019-01-30 09:23:31.722534: E tensorflow/stream_executor/cuda/cuda_dnn.cc:373] Could not create cudnn handle: CUDNN_STATUS_NOT_INITIALIZED
2019-01-30 09:23:31.722578: E tensorflow/stream_executor/cuda/cuda_dnn.cc:381] Possibly insufficient driver version: 384.130.0
2019-01-30 09:23:31.722589: W ./tensorflow/stream_executor/stream.h:2093] attempting to perform DNN operation using StreamExecutor without DNN support
2019-01-30 09:23:31.722603: E tensorflow/stream_executor/cuda/cuda_dnn.cc:373] Could not create cudnn handle: CUDNN_STATUS_NOT_INITIALIZED
2019-01-30 09:23:31.722641: E tensorflow/stream_executor/cuda/cuda_dnn.cc:381] Possibly insufficient driver version: 384.130.0
2019-01-30 09:23:31.722650: E tensorflow/stream_executor/cuda/cuda_dnn.cc:373] Could not create cudnn handle: CUDNN_STATUS_NOT_INITIALIZED
2019-01-30 09:23:31.722665: E tensorflow/stream_executor/cuda/cuda_dnn.cc:381] Possibly insufficient driver version: 384.130.0
Traceback (most recent call last):
File "main.py", line 124, in
tf.app.run()
File "/home/lipika/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/python/platform/app.py", line 125, in run
_sys.exit(main(argv))
File "main.py", line 106, in main
dcgan.train(FLAGS)
File "/home/lipika/AnoGAN/model.py", line 239, in train
_, summary_str = self.sess.run([d_optim, self.d_sum], feed_dict = d_feed_dict)
File "/home/lipika/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 929, in run
run_metadata_ptr)
File "/home/lipika/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1152, in _run
feed_dict_tensor, options, run_metadata)
File "/home/lipika/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1328, in _do_run
run_metadata)
File "/home/lipika/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1348, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InternalError: cuDNN launch failure : input shape ([4,512,31,32])
[[node generator/g_bn0/FusedBatchNorm (defined at /home/lipika/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/contrib/layers/python/layers/layers.py:368) = FusedBatchNorm[T=DT_FLOAT, data_format="NCHW", epsilon=1.001e-05, is_training=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](generator/g_bn0/FusedBatchNorm-0-TransposeNHWCToNCHW-LayoutOptimizer, generator/g_bn0/gamma/read, generator/g_bn0/beta/read, discriminator/d_bn1/Const, discriminator/d_bn1/Const)]]
[[{{node add/_109}} = _Recvclient_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_595_add", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]]
Caused by op u'generator/g_bn0/FusedBatchNorm', defined at:
File "main.py", line 124, in
tf.app.run()
File "/home/lipika/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/python/platform/app.py", line 125, in run
_sys.exit(main(argv))
File "main.py", line 101, in main
test_dir = FLAGS.test_dir)
File "/home/lipika/AnoGAN/model.py", line 75, in init
self.build_model()
File "/home/lipika/AnoGAN/model.py", line 112, in build_model
self.G = self.generator(self.z, self.y)
File "/home/lipika/AnoGAN/model.py", line 346, in generator
h0 = tf.nn.relu(self.g_bn0(self.h0))
File "/home/lipika/AnoGAN/ops.py", line 34, in call
scope=self.name)
File "/home/lipika/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 182, in func_with_args
return func(*args, **current_args)
File "/home/lipika/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/contrib/layers/python/layers/layers.py", line 596, in batch_norm
scope=scope)
File "/home/lipika/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/contrib/layers/python/layers/layers.py", line 382, in _fused_batch_norm
is_training, _fused_batch_norm_training, _fused_batch_norm_inference)
File "/home/lipika/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/contrib/layers/python/layers/utils.py", line 214, in smart_cond
return static_cond(pred_value, fn1, fn2)
File "/home/lipika/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/contrib/layers/python/layers/utils.py", line 192, in static_cond
return fn1()
File "/home/lipika/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/contrib/layers/python/layers/layers.py", line 368, in _fused_batch_norm_training
inputs, gamma, beta, epsilon=epsilon, data_format=data_format)
File "/home/lipika/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/python/ops/nn_impl.py", line 909, in fused_batch_norm
name=name)
File "/home/lipika/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/python/ops/gen_nn_ops.py", line 3466, in _fused_batch_norm
is_training=is_training, name=name)
File "/home/lipika/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/home/lipika/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/python/util/deprecation.py", line 488, in new_func
return func(*args, **kwargs)
File "/home/lipika/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 3274, in create_op
op_def=op_def)
File "/home/lipika/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1770, in init
self._traceback = tf_stack.extract_stack()
InternalError (see above for traceback): cuDNN launch failure : input shape ([4,512,31,32])
[[node generator/g_bn0/FusedBatchNorm (defined at /home/lipika/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/contrib/layers/python/layers/layers.py:368) = FusedBatchNorm[T=DT_FLOAT, data_format="NCHW", epsilon=1.001e-05, is_training=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](generator/g_bn0/FusedBatchNorm-0-TransposeNHWCToNCHW-LayoutOptimizer, generator/g_bn0/gamma/read, generator/g_bn0/beta/read, discriminator/d_bn1/Const, discriminator/d_bn1/Const)]]
[[{{node add/_109}} = _Recvclient_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_595_add", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]]
Could you please assist me with this?
Thank you :)