I set n_experiences=10 for split-cifar10 and task_boundaries=False at line 98 in main.py. I get the following error while running the experiment
(ocl_survey) [[email protected]@GPU6 experiments]$ HYDRA_FULL_ERROR=1 python main.py strategy=icarl experiment=split_cifar10 evaluation=parallel strategy.train_epochs=1 strategy.mem_size=2000
/data/hvaidya/ocl_survey/experiments/main.py:21: UserWarning:
The version_base parameter is not specified.
Please specify a compatability version level, or None.
Will assume defaults for version 1.1
@hydra.main(config_path="../config", config_name="config.yaml")
/home/h/hvaidya/.conda/envs/ocl_survey/lib/python3.10/site-packages/hydra/_internal/hydra.py:119: UserWarning: Future Hydra versions will no longer change working directory at job runtime by default.
See https://hydra.cc/docs/1.2/upgrades/1.1_to_1.2/changes_to_job_working_dir/ for more information.
ret = run_job(
Files already downloaded and verified
Files already downloaded and verified
[4, 1, 7, 5, 3, 9, 0, 8, 6, 2]
SGD (
Parameter Group 0
dampening: 0
differentiable: False
foreach: None
lr: 0.1
maximize: False
momentum: 0.0
nesterov: False
weight_decay: 0.0
)
Using strategy: OnlineICaRL
With plugins: [<src.toolkit.parallel_eval.ParallelEvaluationPlugin object at 0x7f41779fee00>, <src.strategies.icarl._ICaRLPlugin object at 0x7f41434727a0>, <avalanche.training.plugins.replay.ReplayPlugin object
at 0x7f41434731f0>, <src.strategies.icarl.OnlineICaRLLossPlugin object at 0x7f4177bb0f10>, <avalanche.training.plugins.evaluation.EvaluationPlugin object at 0x7f4177bb0b80>, <avalanche.training.templates.base_sgd
.PeriodicEval object at 0x7f4143472890>, <avalanche.training.plugins.clock.Clock object at 0x7f41434728f0>]
-- >> Start of training phase << --
-- Starting training on experience 0 (Task 0) from train stream --
0%|▎ | 1/475 [00:03<25:52, 3.28s/it]
Error executing job with overrides: ['strategy=icarl', 'experiment=split_cifar10', 'evaluation=parallel', 'strategy.train_epochs=1', 'strategy.mem_size=2000']
Traceback (most recent call last):
File "/data/hvaidya/ocl_survey/experiments/main.py", line 129, in <module>
main()
File "/home/h/hvaidya/.conda/envs/ocl_survey/lib/python3.10/site-packages/hydra/main.py", line 94, in decorated_main
_run_hydra(
File "/home/h/hvaidya/.conda/envs/ocl_survey/lib/python3.10/site-packages/hydra/_internal/utils.py", line 394, in _run_hydra
_run_app(
File "/home/h/hvaidya/.conda/envs/ocl_survey/lib/python3.10/site-packages/hydra/_internal/utils.py", line 457, in _run_app
run_and_report(
File "/home/h/hvaidya/.conda/envs/ocl_survey/lib/python3.10/site-packages/hydra/_internal/utils.py", line 223, in run_and_report
raise ex
File "/home/h/hvaidya/.conda/envs/ocl_survey/lib/python3.10/site-packages/hydra/_internal/utils.py", line 220, in run_and_report
return func()
File "/home/h/hvaidya/.conda/envs/ocl_survey/lib/python3.10/site-packages/hydra/_internal/utils.py", line 458, in <lambda>
lambda: hydra.run(
File "/home/h/hvaidya/.conda/envs/ocl_survey/lib/python3.10/site-packages/hydra/_internal/hydra.py", line 132, in run
_ = ret.return_value
File "/home/h/hvaidya/.conda/envs/ocl_survey/lib/python3.10/site-packages/hydra/core/utils.py", line 260, in return_value
raise self._return_value
File "/home/h/hvaidya/.conda/envs/ocl_survey/lib/python3.10/site-packages/hydra/core/utils.py", line 186, in run_job
ret.return_value = task_function(task_cfg)
File "/data/hvaidya/ocl_survey/experiments/main.py", line 109, in main
strategy.train(
File "/home/h/hvaidya/.conda/envs/ocl_survey/lib/python3.10/site-packages/avalanche/training/templates/base_sgd.py", line 168, in train
super().train(experiences, eval_streams, **kwargs)
File "/home/h/hvaidya/.conda/envs/ocl_survey/lib/python3.10/site-packages/avalanche/training/templates/base.py", line 144, in train
self._train_exp(self.experience, eval_streams, **kwargs)
File "/home/h/hvaidya/.conda/envs/ocl_survey/lib/python3.10/site-packages/avalanche/training/templates/base_sgd.py", line 294, in _train_exp
self.training_epoch(**kwargs)
File "/home/h/hvaidya/.conda/envs/ocl_survey/lib/python3.10/site-packages/avalanche/training/templates/update_type/sgd_update.py", line 22, in training_epoch
self._before_forward(**kwargs)
File "/home/h/hvaidya/.conda/envs/ocl_survey/lib/python3.10/site-packages/avalanche/training/templates/base_sgd.py", line 473, in _before_forward
trigger_plugins(self, "before_forward", **kwargs)
File "/home/h/hvaidya/.conda/envs/ocl_survey/lib/python3.10/site-packages/avalanche/training/utils.py", line 69, in trigger_plugins
getattr(p, event)(strategy, **kwargs)
File "/data/hvaidya/ocl_survey/src/strategies/icarl.py", line 52, in before_forward
self.old_logits = self.old_model(strategy.mb_x)
File "/home/h/hvaidya/.conda/envs/ocl_survey/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/h/hvaidya/.conda/envs/ocl_survey/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/home/h/hvaidya/.conda/envs/ocl_survey/lib/python3.10/site-packages/avalanche/models/dynamic_modules.py", line 460, in forward
return self.eval_classifier(x)
File "/home/h/hvaidya/.conda/envs/ocl_survey/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/h/hvaidya/.conda/envs/ocl_survey/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/home/h/hvaidya/.conda/envs/ocl_survey/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/home/h/hvaidya/.conda/envs/ocl_survey/lib/python3.10/site-packages/avalanche/models/ncm_classifier.py", line 77, in forward
assert self.class_means_dict != {}, "no class means available."
AssertionError: no class means available.
Exception ignored in atexit callback: <bound method BlockingScheduler.close of <src.toolkit.parallel_eval.BlockingScheduler object at 0x7f4177b5df60>>
Traceback (most recent call last):
File "/data/hvaidya/ocl_survey/src/toolkit/parallel_eval.py", line 37, in close
ray.get(self.scheduled_tasks)
File "/home/h/hvaidya/.conda/envs/ocl_survey/lib/python3.10/site-packages/ray/_private/auto_init_hook.py", line 24, in auto_init_wrapper
return fn(*args, **kwargs)
File "/home/h/hvaidya/.conda/envs/ocl_survey/lib/python3.10/site-packages/ray/_private/client_mode_hook.py", line 103, in wrapper
return func(*args, **kwargs)
File "/home/h/hvaidya/.conda/envs/ocl_survey/lib/python3.10/site-packages/ray/_private/worker.py", line 2524, in get
raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(AssertionError): ray::EvaluationActor.eval() (pid=959167, ip=10.10.10.6, actor_id=6004a1dcca0184816d26c34101000000, repr=<src.toolkit.parallel_eval.EvaluationActor object at 0x7f05f22b
6e30>)
File "/data/hvaidya/ocl_survey/src/toolkit/parallel_eval.py", line 66, in eval
self.strat.eval(stream, **kwargs)
File "/home/h/hvaidya/.conda/envs/ocl_survey/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/home/h/hvaidya/.conda/envs/ocl_survey/lib/python3.10/site-packages/avalanche/training/templates/base_sgd.py", line 183, in eval
super().eval(exp_list, **kwargs)
File "/home/h/hvaidya/.conda/envs/ocl_survey/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/home/h/hvaidya/.conda/envs/ocl_survey/lib/python3.10/site-packages/avalanche/training/templates/base.py", line 193, in eval
self._eval_exp(**kwargs)
File "/home/h/hvaidya/.conda/envs/ocl_survey/lib/python3.10/site-packages/avalanche/training/templates/base_sgd.py", line 187, in _eval_exp
self.eval_epoch(**kwargs)
File "/home/h/hvaidya/.conda/envs/ocl_survey/lib/python3.10/site-packages/avalanche/training/templates/base_sgd.py", line 229, in eval_epoch
self.mb_output = self.forward()
File "/home/h/hvaidya/.conda/envs/ocl_survey/lib/python3.10/site-packages/avalanche/training/templates/problem_type/supervised_problem.py", line 39, in forward
return avalanche_forward(self.model, self.mb_x, self.mb_task_id)
File "/home/h/hvaidya/.conda/envs/ocl_survey/lib/python3.10/site-packages/avalanche/models/utils.py", line 21, in avalanche_forward
return model(x)
File "/home/h/hvaidya/.conda/envs/ocl_survey/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/h/hvaidya/.conda/envs/ocl_survey/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/home/h/hvaidya/.conda/envs/ocl_survey/lib/python3.10/site-packages/avalanche/models/dynamic_modules.py", line 460, in forward
return self.eval_classifier(x)
File "/home/h/hvaidya/.conda/envs/ocl_survey/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/h/hvaidya/.conda/envs/ocl_survey/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/home/h/hvaidya/.conda/envs/ocl_survey/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/home/h/hvaidya/.conda/envs/ocl_survey/lib/python3.10/site-packages/avalanche/models/ncm_classifier.py", line 77, in forward
assert self.class_means_dict != {}, "no class means available."
AssertionError: no class means available.
0%|▎