Smoothing in tqdm progress bar has no effect
heth27 opened this issue · comments
Bug description
The option smoothing when creating progress bars in TQDMProgressBar has no effect in the default implementation, as
_update_n only calls bar.refresh() and not the update method of the progress bar. Thus only the global average is taken, as the update method of the tqdm class is responsible for calculating moving averages.
Either the update method of the progress bar could be used or it should be added to the documentation if smoothing having no effect is the desired behavior (overriding a default that has no effect is a bit misleading)
What version are you seeing the problem on?
master
How to reproduce the bug
import time
import lightning.pytorch as pl
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader, Sampler
from src.main.ml.data.data_augmentation.helpers.random_numbers import create_rng_from_string
import sys
from typing import Any
import lightning.pytorch as pl
from lightning.pytorch.callbacks import TQDMProgressBar
from lightning.pytorch.callbacks.progress.tqdm_progress import Tqdm
from lightning.pytorch.utilities.types import STEP_OUTPUT
from typing_extensions import override
class LitProgressBar(TQDMProgressBar):
"""
different smoothing factor than default lightning TQDMProgressBar, where smoothing=0 (average),
instead of smoothing=1 (current speed) is taken
See also:
https://tqdm.github.io/docs/tqdm/
"""
def init_train_tqdm(self) -> Tqdm:
"""Override this to customize the tqdm bar for training."""
return Tqdm(
desc=self.train_description,
position=(2 * self.process_position),
disable=self.is_disabled,
leave=True,
dynamic_ncols=True,
file=sys.stdout,
smoothing=1.0,
bar_format=self.BAR_FORMAT,
)
# default method
# @override
# def on_train_batch_end(
# self, trainer: "pl.Trainer", pl_module: "pl.LightningModule", outputs: STEP_OUTPUT, batch: Any, batch_idx: int
# ) -> None:
# n = batch_idx + 1
# if self._should_update(n, self.train_progress_bar.total):
# _update_n(self.train_progress_bar, n)
# self.train_progress_bar.set_postfix(self.get_metrics(trainer, pl_module))
# my own method that uses smoothing by using the update method of progress bar
@override
def on_train_batch_end(
self, trainer: "pl.Trainer", pl_module: "pl.LightningModule", outputs: STEP_OUTPUT, batch: Any,
batch_idx: int
) -> None:
n = batch_idx + 1
if self._should_update(n, self.train_progress_bar.total):
self.train_progress_bar.update(self.refresh_rate)
self.train_progress_bar.set_postfix(self.get_metrics(trainer, pl_module))
class TestModule(nn.Module):
def __init__(self, in_dim=512, out_dim=16):
super().__init__()
self.in_dim = in_dim
self.out_dim = out_dim
self.simple_layer = nn.Linear(self.in_dim, self.out_dim, bias=True)
def forward(self, input):
return self.simple_layer(input)
class TestBatchSampler(Sampler):
def __init__(self, step=0):
super().__init__()
self.step = step
def __len__(self) -> int:
return 1e100
# return len(self.train_allfiles)
def __iter__(self): # -> Iterator[int]:
return self
def __next__(self): # -> Iterator[int]:
return_value = self.step
self.step += 1
return [return_value]
class TestDataset(Dataset):
def __init__(self, in_dim):
super().__init__()
self.in_dim = in_dim
self.total_len = 512
def __len__(self):
return 1
def __getitem__(self, idx):
rng = create_rng_from_string(
str(idx) + "_"
+ "random_choice_sampler")
return torch.tensor(rng.random(self.in_dim), dtype=torch.float32)
class TestDataModule(pl.LightningDataModule):
def __init__(self, start_step=0):
super().__init__()
self.in_dim = 512
self.val_batch_size = 1
self.start_step = start_step
def train_dataloader(self):
train_ds = TestDataset(self.in_dim)
train_dl = DataLoader(train_ds, batch_sampler=TestBatchSampler(step=self.start_step), num_workers=4,
shuffle=False)
return train_dl
class TestLitModel(pl.LightningModule):
def __init__(self):
super().__init__()
self.test_module_obj = TestModule(in_dim=512, out_dim=16)
self.automatic_optimization = False
def training_step(self, batch, batch_idx):
if batch_idx == 0:
time.sleep(5)
time.sleep(0.5)
optimizer = self.optimizers()
output = self.test_module_obj(batch)
loss = output.sum()
self.manual_backward(loss)
optimizer.step()
def configure_optimizers(self):
optimizer = torch.optim.AdamW(
self.test_module_obj.parameters()
)
return optimizer
if __name__ == '__main__':
test_data_loader = TestDataModule()
test_lit_model = TestLitModel()
bar = LitProgressBar(refresh_rate=5)
trainer = pl.Trainer(
log_every_n_steps=1,
callbacks=[bar],
max_epochs=-1,
max_steps=400000,
)
trainer.fit(test_lit_model,
datamodule=test_data_loader)
### Error messages and logs
Error messages and logs here please
### Environment
<details>
<summary>Current environment</summary>
#- Lightning Component (e.g. Trainer, LightningModule, LightningApp, LightningWork, LightningFlow):
#- PyTorch Lightning Version (e.g., 1.5.0):
#- Lightning App Version (e.g., 0.5.2):
#- PyTorch Version (e.g., 2.0):
#- Python version (e.g., 3.9):
#- OS (e.g., Linux):
#- CUDA/cuDNN version:
#- GPU models and configuration:
#- How you installed Lightning(conda
, pip
, source):
#- Running environment of LightningApp (e.g. local, cloud):
</details>
### More info
_No response_