mrharicot / monodepth

Unsupervised single image depth prediction with CNNs

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

tensorflow error

seoluck77 opened this issue · comments

Traceback (most recent call last):
File "C:\Users\HONG IL\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\client\session.py", line 1365, in _do_call
return fn(*args)
File "C:\Users\HONG IL\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\client\session.py", line 1350, in _run_fn
target_list, run_metadata)
File "C:\Users\HONG IL\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\client\session.py", line 1443, in _call_tf_sessionrun
run_metadata)
tensorflow.python.framework.errors_impl.OutOfRangeError: RandomShuffleQueue '_1_shuffle_batch/random_shuffle_queue' is closed and has insufficient elements (requested 8, current size 0)
[[{{node shuffle_batch}}]]

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File "C:/Users/HONG IL/Desktop/Work_2/monodepth-master/something.py", line 163, in
_, loss_value = sess.run([apply_gradient_op, total_loss])
File "C:\Users\HONG IL\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\client\session.py", line 956, in run
run_metadata_ptr)
File "C:\Users\HONG IL\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\client\session.py", line 1180, in _run
feed_dict_tensor, options, run_metadata)
File "C:\Users\HONG IL\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\client\session.py", line 1359, in _do_run
run_metadata)
File "C:\Users\HONG IL\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\client\session.py", line 1384, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.OutOfRangeError: RandomShuffleQueue '_1_shuffle_batch/random_shuffle_queue' is closed and has insufficient elements (requested 8, current size 0)
[[node shuffle_batch (defined at \Users\HONG IL\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\framework\ops.py:1748) ]]

Original stack trace for 'shuffle_batch':
File "/Users/HONG IL/Desktop/Work_2/monodepth-master/something.py", line 95, in
dataloader = MonodepthDataloader(data_path, filenames_file, params, dataset, mode)
File "\Users\HONG IL\Desktop\Work_2\monodepth-master\monodepth_dataloader.py", line 70, in init
params.num_threads)
File "\Users\HONG IL\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\util\deprecation.py", line 324, in new_func
return func(*args, **kwargs)
File "\Users\HONG IL\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\input.py", line 1347, in shuffle_batch
name=name)
File "\Users\HONG IL\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\input.py", line 874, in _shuffle_batch
dequeued = queue.dequeue_many(batch_size, name=name)
File "\Users\HONG IL\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\ops\data_flow_ops.py", line 489, in dequeue_many
self._queue_ref, n=n, component_types=self._dtypes, name=name)
File "\Users\HONG IL\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\ops\gen_data_flow_ops.py", line 3862, in queue_dequeue_many_v2
timeout_ms=timeout_ms, name=name)
File "\Users\HONG IL\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\framework\op_def_library.py", line 794, in _apply_op_helper
op_def=op_def)
File "\Users\HONG IL\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\util\deprecation.py", line 507, in new_func
return func(*args, **kwargs)
File "\Users\HONG IL\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\framework\ops.py", line 3357, in create_op
attrs, op_def, compute_device)
File "\Users\HONG IL\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\framework\ops.py", line 3426, in _create_op_internal
op_def=op_def)
File "\Users\HONG IL\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\framework\ops.py", line 1748, in init
self._traceback = tf_stack.extract_stack()

I modified the code a bit for learning, but an error occurred and could not be executed. I would like help for this error. Thank you. I always look good papers.

modified code

from future import absolute_import, division, print_function

only keep warnings and errors

import os
os.environ['TF_CPP_MIN_LOG_LEVEL']='1'

import numpy as np
import argparse
import re
import time
import tensorflow as tf
import tensorflow.contrib.slim as slim

from monodepth_model import *
from monodepth_dataloader import *
from average_gradients import *

mode='train'
model_name='monodepth'
encoder='vgg'
dataset='kitti'
data_path='./dataset/'
filenames_file='./utils/filenames/kitti_train_files.txt'
input_height=256
input_width=512
batch_size=8
num_epochs=100
learning_rate=1e-4
lr_loss_weight=1.0
alpha_image_loss=0.85
disp_gradient_loss_weight=0.1
do_stereo='store_true'
wrap_mode='border'
use_deconv='store_true'
num_gpus=1
num_threads=8
output_directory=''
log_directory='/log/'

checkpoint_path='/checkpoint/'

checkpoint_path=''
retrain='store_true'
full_summary='store_true'

params = monodepth_parameters(
encoder=encoder,
height=input_height,
width=input_width,
batch_size=batch_size,
num_threads=num_threads,
num_epochs=num_epochs,
do_stereo=do_stereo,
wrap_mode=wrap_mode,
use_deconv=use_deconv,
alpha_image_loss=alpha_image_loss,
disp_gradient_loss_weight=disp_gradient_loss_weight,
lr_loss_weight=lr_loss_weight,
full_summary=full_summary)

def post_process_disparity(disp):
_, h, w = disp.shape
l_disp = disp[0,:,:]
r_disp = np.fliplr(disp[1,:,:])
m_disp = 0.5 * (l_disp + r_disp)
l, _ = np.meshgrid(np.linspace(0, 1, w), np.linspace(0, 1, h))
l_mask = 1.0 - np.clip(20 * (l - 0.05), 0, 1)
r_mask = np.fliplr(l_mask)
return r_mask * l_disp + l_mask * r_disp + (1.0 - l_mask - r_mask) * m_disp

def count_text_lines(file_path):
f = open(file_path, 'r')
lines = f.readlines()
f.close()
return len(lines)

global_step = tf.Variable(0, trainable=False)

OPTIMIZER

num_training_samples = count_text_lines(filenames_file)

steps_per_epoch = np.ceil(num_training_samples / params.batch_size).astype(np.int32)
num_total_steps = params.num_epochs * steps_per_epoch
start_learning_rate = learning_rate

boundaries = [np.int32((3 / 5) * num_total_steps), np.int32((4 / 5) * num_total_steps)]
values = [learning_rate, learning_rate / 2, learning_rate / 4]
learning_rate = tf.train.piecewise_constant(global_step, boundaries, values)

opt_step = tf.train.AdamOptimizer(learning_rate)

print("total number of samples: {}".format(num_training_samples))
print("total number of steps: {}".format(num_total_steps))

dataloader = MonodepthDataloader(data_path, filenames_file, params, dataset, mode)
left = dataloader.left_image_batch
right = dataloader.right_image_batch

split for each gpu

left_splits = tf.split(left, num_gpus, 0)
right_splits = tf.split(right, num_gpus, 0)

tower_grads = []
tower_losses = []
reuse_variables = None

with tf.variable_scope(tf.get_variable_scope()):
for i in range(num_gpus):
with tf.device('/gpu:%d' % i):
model = MonodepthModel(params, mode, left_splits[i], right_splits[i], reuse_variables, i)

        loss = model.total_loss
        tower_losses.append(loss)

        reuse_variables = True

        grads = opt_step.compute_gradients(loss)

        tower_grads.append(grads)

grads = average_gradients(tower_grads)

apply_gradient_op = opt_step.apply_gradients(grads, global_step=global_step)

total_loss = tf.reduce_mean(tower_losses)

tf.summary.scalar('learning_rate', learning_rate, ['model_0'])
tf.summary.scalar('total_loss', total_loss, ['model_0'])
summary_op = tf.summary.merge_all('model_0')

SESSION

config = tf.ConfigProto(allow_soft_placement=True)
sess = tf.Session(config=config)

SAVER

summary_writer = tf.summary.FileWriter(log_directory + '/' + model_name, sess.graph)
train_saver = tf.train.Saver()

COUNT PARAMS

total_num_parameters = 0
for variable in tf.trainable_variables():
total_num_parameters += np.array(variable.get_shape().as_list()).prod()
print("number of trainable parameters: {}".format(total_num_parameters))

INIT

sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
coordinator = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coordinator)

LOAD CHECKPOINT IF SET

if checkpoint_path != '':
train_saver.restore(sess, checkpoint_path.split(".")[0])

if retrain:
    sess.run(global_step.assign(0))

GO!

start_step = global_step.eval(session=sess)
start_time = time.time()
for step in range(start_step, num_total_steps):
before_op_time = time.time()
_, loss_value = sess.run([apply_gradient_op, total_loss])
duration = time.time() - before_op_time
if step and step % 100 == 0:
examples_per_sec = params.batch_size / duration
time_sofar = (time.time() - start_time) / 3600
training_time_left = (num_total_steps / step - 1.0) * time_sofar
print_string = 'batch {:>6} | examples/s: {:4.2f} | loss: {:.5f} | time elapsed: {:.2f}h | time left: {:.2f}h'
print(print_string.format(step, examples_per_sec, loss_value, time_sofar, training_time_left))
summary_str = sess.run(summary_op)
summary_writer.add_summary(summary_str, global_step=step)
if step and step % 10000 == 0:
train_saver.save(sess, log_directory + '/' + model_name + '/model', global_step=step)

train_saver.save(sess, log_directory + '/' + model_name + '/model', global_step=num_total_steps)

Hi @seoluck77 . Did you make any progress with this issue? Thanks for response

Hi, I am getting the same error. Yes please, would be nice to know progress with this issue. Thank you!

if you need this code, i help you

Hey, If you solve this error then please tell me how do you resolve this? Thank you!

Hey, If you solve this error then please tell me how do you resolve this? Thank you very much!

I have solved this error. Still exists after converting the picture PNG format, the corresponding in the training file Png delete.

@yerongguang how you solved ? could you please clarify?