Erlemar / pytorch_tempest

My repo for training neural nets using pytorch-lightning and hydra

Home Page:https://pytorch-tempest.readthedocs.io/en/latest/

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

first run not work

utrobinmv opened this issue · comments

If run command

python train.py --config-name mnist_config model.encoder.params.to_one_channel=True

I get an error

`LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

| Name | Type | Params

0 | model | Net | 23.6 M
1 | loss | CrossEntropyLoss | 0
2 | metric | Accuracy | 0

23.6 M Trainable params
0 Non-trainable params
23.6 M Total params
94.253 Total estimated model params size (MB)
Epoch 0: 0%| | 0/16 [00:00<?, ?it/s]Traceback (most recent call last):
File "train.py", line 92, in
run_model()
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/hydra/main.py", line 32, in decorated_main
_run_hydra(
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/hydra/_internal/utils.py", line 346, in _run_hydra
run_and_report(
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/hydra/_internal/utils.py", line 201, in run_and_report
raise ex
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/hydra/_internal/utils.py", line 198, in run_and_report
return func()
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/hydra/_internal/utils.py", line 347, in
lambda: hydra.run(
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/hydra/_internal/hydra.py", line 107, in run
return run_job(
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/hydra/core/utils.py", line 125, in run_job
ret.return_value = task_function(task_cfg)
File "train.py", line 88, in run_model
run(cfg)
File "train.py", line 61, in run
trainer.fit(model, dm)
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 460, in fit
self._run(model)
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 758, in _run
self.dispatch()
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 799, in dispatch
self.accelerator.start_training(self)
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/pytorch_lightning/accelerators/accelerator.py", line 96, in start_training
self.training_type_plugin.start_training(trainer)
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py", line 144, in start_training
self._results = trainer.run_stage()
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 809, in run_stage
return self.run_train()
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 871, in run_train
self.train_loop.run_training_epoch()
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/pytorch_lightning/trainer/training_loop.py", line 499, in run_training_epoch
batch_output = self.run_training_batch(batch, batch_idx, dataloader_idx)
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/pytorch_lightning/trainer/training_loop.py", line 738, in run_training_batch
self.optimizer_step(optimizer, opt_idx, batch_idx, train_step_and_backward_closure)
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/pytorch_lightning/trainer/training_loop.py", line 434, in optimizer_step
model_ref.optimizer_step(
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/pytorch_lightning/core/lightning.py", line 1403, in optimizer_step
optimizer.step(closure=optimizer_closure)
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/pytorch_lightning/core/optimizer.py", line 214, in step
self.__optimizer_step(*args, closure=closure, profiler_name=profiler_name, **kwargs)
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/pytorch_lightning/core/optimizer.py", line 134, in __optimizer_step
trainer.accelerator.optimizer_step(optimizer, self._optimizer_idx, lambda_closure=closure, **kwargs)
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/pytorch_lightning/accelerators/accelerator.py", line 329, in optimizer_step
self.run_optimizer_step(optimizer, opt_idx, lambda_closure, **kwargs)
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/pytorch_lightning/accelerators/accelerator.py", line 336, in run_optimizer_step
self.training_type_plugin.optimizer_step(optimizer, lambda_closure=lambda_closure, **kwargs)
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py", line 193, in optimizer_step
optimizer.step(closure=lambda_closure, **kwargs)
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/torch/optim/optimizer.py", line 88, in wrapper
return func(*args, **kwargs)
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/torch/autograd/grad_mode.py", line 28, in decorate_context
return func(*args, **kwargs)
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/torch/optim/adamw.py", line 65, in step
loss = closure()
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/pytorch_lightning/trainer/training_loop.py", line 732, in train_step_and_backward_closure
result = self.training_step_and_backward(
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/pytorch_lightning/trainer/training_loop.py", line 823, in training_step_and_backward
result = self.training_step(split_batch, batch_idx, opt_idx, hiddens)
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/pytorch_lightning/trainer/training_loop.py", line 290, in training_step
training_step_output = self.trainer.accelerator.training_step(args)
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/pytorch_lightning/accelerators/accelerator.py", line 204, in training_step
return self.training_type_plugin.training_step(*args)
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/pytorch_lightning/plugins/training_type/dp.py", line 98, in training_step
return self.model(*args, **kwargs)
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/torch/nn/parallel/data_parallel.py", line 166, in forward
return self.module(*inputs[0], **kwargs[0])
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/pytorch_lightning/overrides/data_parallel.py", line 77, in forward
output = super().forward(*inputs, **kwargs)
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/pytorch_lightning/overrides/base.py", line 46, in forward
output = self.module.training_step(*inputs, **kwargs)
File "/home/joefox/data/nextcloud/projects/pytorch_tempest/src/lightning_classes/lightning_image_classification.py", line 54, in training_step
score = self.metric(logits.argmax(1), target)
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/torchmetrics/metric.py", line 190, in forward
self.update(*args, **kwargs)
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/torchmetrics/metric.py", line 249, in wrapped_func
return update(*args, **kwargs)
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/torchmetrics/classification/accuracy.py", line 231, in update
mode = _mode(preds, target, self.threshold, self.top_k, self.num_classes, self.multiclass)
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/torchmetrics/functional/classification/accuracy.py", line 36, in _mode
mode = _check_classification_inputs(
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/torchmetrics/utilities/checks.py", line 288, in _check_classification_inputs
_check_num_classes_mc(preds, target, num_classes, multiclass, implied_classes)
File "/home/joefox/.pyenv/versions/hydra/lib/python3.8/site-packages/torchmetrics/utilities/checks.py", line 164, in _check_num_classes_mc
raise ValueError("The highest label in target should be smaller than num_classes.")
ValueError: The highest label in target should be smaller than num_classes.
Epoch 0: 0%| | 0/16 [00:00<?, ?it/s] `

What have I done wrong?

You have done nothing wrong - this was my mistake:
bd8d4f2#diff-574fcddb16978e787be1562d998154601636dbe871359dc28b6c81f421842c2d

I have changed a config file for the Accuracy metric and defined the num_classes param as a fixed value initially. Now I have changed it to use the value from the training config.

After you pull the changes everything should work again.