kuk / log-progress

https://habr.com/ru/post/276725/

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

Very Useful - Picky request

Tellusionist opened this issue · comments

This is by far the best progress bar to use in Jupyter, so thank you! I am being greedy now, would it be possible to implement an ETA based on previous iterations as well as keep track of running time at a certain interval. Not looking to get too crazy, just a basic average of time between task start and end multiplied by the number of tasks left. Then for the running time, I would imagine updating every second might be unnecessary so maybe an argument with a default set to something like 5 seconds.

Thank you again for sharing!

I took a stab at implementing this for times when is_iterator == False, seems to be working ok. There likely is a much better way to do this but wanted to share! Right now it will only be accurate for runs <24 hours but figured it would be a starting point for anyone who wanted the ETAs as well.

def progress_bar(sequence, every=None, size=None, name='Items'):
	is_iterator = False
	if size is None:
		try:
			size = len(sequence)
		except TypeError:
			is_iterator = True
	if size is not None:
		if every is None:
			if size <= 200:
				every = 1
			else:
				every = int(size / 200)     # every 0.5%
	else:
		assert every is not None, 'sequence is iterator, set every'

	if is_iterator:
		progress = IntProgress(min=0, max=1, value=1)
		progress.bar_style = 'info'
	else:
		progress = IntProgress(min=0, max=size, value=0)
	label = HTML()
	box = VBox(children=[label, progress])
	display(box)
	

	index = 0
	iteration = 1
	avg_run = 0
	time_elapsed = 0
	eta = 0
	overall_start = time.time()
	avg_run_pretty='--'
	eta_pretty='--'
	time_end = '...calculating...'
	try:
		for index, record in enumerate(sequence, 1):
			start = time.time()
			if index == 1 or index % every == 0:
				if is_iterator:
					label.value = '{name}: {index} / ?'.format(
						name=name,
						index=index
					)
				else:
					progress.value = index

					label.value = u'{name}: {index} / {size} - Avg Time: {avg_run} | ETA: {eta} ({time_end})'.format(
						name=name,
						index=index,
						size=size,
						avg_run = avg_run_pretty,
						eta = eta_pretty,
						time_end = time_end
					)
			yield record
			
			iteration_time = time.time() - start
			time_elapsed = time_elapsed + iteration_time
			avg_run = time_elapsed / iteration
			time_end = time.strftime("%I:%M:%S %p",time.localtime(time.time()+eta))
			
			if eta > 3600:
				avg_run_pretty = time.strftime('%#Hh %#Mm %#Ss', time.gmtime(avg_run))
			elif avg_run >60:
				avg_run_pretty = time.strftime('%#Mm %#Ss', time.gmtime(avg_run))
			else:
				avg_run_pretty = '{0:.1f}'.format(avg_run) +'s'
			eta = (size - iteration) * avg_run
			
			if eta > 3600:
				eta_pretty = time.strftime('%#Hh %#Mm %#Ss', time.gmtime(eta))
			elif eta > 60:
				eta_pretty = time.strftime('%#Mm %#Ss', time.gmtime(eta))
			else:
				eta_pretty = '{0:.1f}'.format(eta)+'s'
			
			iteration = iteration+1
			
	except:
		progress.bar_style = 'danger'
		raise
	else:
		progress.bar_style = 'success'
		progress.value = index
		overall_time = time.time() - overall_start
		
		if overall_time > 3600:
			overall_time_pretty = time.strftime('%#Hh %#Mm %#Ss', time.gmtime(overall_time))
		elif overall_time > 60:
			overall_time_pretty = time.strftime('%#Mm %#Ss', time.gmtime(overall_time))
		else:
			overall_time_pretty = '{0:.1f}'.format(overall_time)+'s'
			
		label.value = "{name}: {index} - Avg Time: {avg_run} | Time Elapsed: {overall_time}".format(
			name=name,
			index=str(index or '?'),
			avg_run = avg_run_pretty,
			overall_time = overall_time_pretty
		)