warnings.warn("None of the inputs have requires_grad=True. Gradients will be None")
/home/ybZhang/miniconda3/envs/whister/lib/python3.8/site-packages/bitsandbytes/autograd/_functions.py:298: UserWarning: MatMul8bitLt: inputs will be cast from torch.float32 to float16 during quantization
warnings.warn(f"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization")
Traceback (most recent call last):
File "finetune.py", line 124, in <module>
trainer.train(resume_from_checkpoint=args.resume_from_checkpoint)
File "/home/ybZhang/miniconda3/envs/whister/lib/python3.8/site-packages/transformers/trainer.py", line 1662, in train
return inner_training_loop(
File "/home/ybZhang/miniconda3/envs/whister/lib/python3.8/site-packages/transformers/trainer.py", line 1929, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs)
File "/home/ybZhang/miniconda3/envs/whister/lib/python3.8/site-packages/transformers/trainer.py", line 2699, in training_step
loss = self.compute_loss(model, inputs)
File "/home/ybZhang/miniconda3/envs/whister/lib/python3.8/site-packages/transformers/trainer.py", line 2731, in compute_loss
outputs = model(**inputs)
File "/home/ybZhang/miniconda3/envs/whister/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "/home/ybZhang/miniconda3/envs/whister/lib/python3.8/site-packages/peft/peft_model.py", line 281, in forward
return self.get_base_model()(*args, **kwargs)
File "/home/ybZhang/miniconda3/envs/whister/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "/home/ybZhang/miniconda3/envs/whister/lib/python3.8/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "/home/ybZhang/miniconda3/envs/whister/lib/python3.8/site-packages/transformers/models/whisper/modeling_whisper.py", line 1435, in forward
loss = loss_fct(lm_logits.view(-1, self.config.vocab_size), labels.reshape(-1))
File "/home/ybZhang/miniconda3/envs/whister/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "/home/ybZhang/miniconda3/envs/whister/lib/python3.8/site-packages/torch/nn/modules/loss.py", line 1174, in forward
return F.cross_entropy(input, target, weight=self.weight,
File "/home/ybZhang/miniconda3/envs/whister/lib/python3.8/site-packages/torch/nn/functional.py", line 3026, in cross_entropy
return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing)
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:1 and cuda:0! (when checking argument for argument target in method wrapper_nll_loss_forward)
0%|