Anyone has similar issue?
RecursionError Traceback (most recent call last)
Cell In[8], line 21
16 device = accelerator.device
18 print('Loading model..')
---> 21 model, image_processor, tokenizer = create_model_and_transforms(
22 clip_vision_encoder_path="ViT-L-14",
23 clip_vision_encoder_pretrained="openai",
24 #lang_encoder_path="huggyllama/llama-7b",
25 #tokenizer_path= "huggyllama/llama-7b",
26 lang_encoder_path="decapoda-research/llama-7b-hf",
27 tokenizer_path= "decapoda-research/llama-7b-hf",
28 cross_attn_every_n_layers=4,
29 )
Cell In[2], line 43, in create_model_and_transforms(clip_vision_encoder_path, clip_vision_encoder_pretrained, lang_encoder_path, tokenizer_path, cross_attn_every_n_layers, use_local_files, decoder_layers_attr_name, freeze_lm_embeddings, **flamingo_kwargs)
40 # set the vision encoder to output the visual features
41 vision_encoder.visual.output_tokens = True
---> 43 text_tokenizer = AutoTokenizer.from_pretrained(
44 tokenizer_path,
45 local_files_only=use_local_files,
46 trust_remote_code=True,
47 )
48 # add Flamingo special tokens to the tokenizer
49 text_tokenizer.add_special_tokens(
50 {"additional_special_tokens": ["<|endofchunk|>", ""]}
51 )
File ~/anaconda3/envs/dui/lib/python3.11/site-packages/transformers/models/auto/tokenization_auto.py:736, in AutoTokenizer.from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs)
732 if tokenizer_class is None:
733 raise ValueError(
734 f"Tokenizer class {tokenizer_class_candidate} does not exist or is not currently imported."
735 )
--> 736 return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
738 # Otherwise we have to be creative.
739 # if model is an encoder decoder, the encoder tokenizer class is used by default
740 if isinstance(config, EncoderDecoderConfig):
File ~/anaconda3/envs/dui/lib/python3.11/site-packages/transformers/tokenization_utils_base.py:1854, in PreTrainedTokenizerBase.from_pretrained(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, *init_inputs, **kwargs)
1851 else:
1852 logger.info(f"loading file {file_path} from cache at {resolved_vocab_files[file_id]}")
-> 1854 return cls._from_pretrained(
1855 resolved_vocab_files,
1856 pretrained_model_name_or_path,
1857 init_configuration,
1858 *init_inputs,
1859 token=token,
1860 cache_dir=cache_dir,
1861 local_files_only=local_files_only,
1862 _commit_hash=commit_hash,
1863 _is_local=is_local,
1864 **kwargs,
1865 )
File ~/anaconda3/envs/dui/lib/python3.11/site-packages/transformers/tokenization_utils_base.py:2017, in PreTrainedTokenizerBase._from_pretrained(cls, resolved_vocab_files, pretrained_model_name_or_path, init_configuration, token, cache_dir, local_files_only, _commit_hash, _is_local, *init_inputs, **kwargs)
2015 # Instantiate tokenizer.
2016 try:
-> 2017 tokenizer = cls(*init_inputs, **init_kwargs)
2018 except OSError:
2019 raise OSError(
2020 "Unable to load vocabulary from file. "
2021 "Please check that the provided vocabulary is accessible and not corrupted."
2022 )
File ~/anaconda3/envs/dui/lib/python3.11/site-packages/transformers/models/llama/tokenization_llama_fast.py:128, in LlamaTokenizerFast.init(self, vocab_file, tokenizer_file, clean_up_tokenization_spaces, unk_token, bos_token, eos_token, add_bos_token, add_eos_token, use_default_system_prompt, **kwargs)
126 self._add_bos_token = add_bos_token
127 self._add_eos_token = add_eos_token
--> 128 self.update_post_processor()
129 self.use_default_system_prompt = use_default_system_prompt
130 self.vocab_file = vocab_file
File ~/anaconda3/envs/dui/lib/python3.11/site-packages/transformers/models/llama/tokenization_llama_fast.py:141, in LlamaTokenizerFast.update_post_processor(self)
137 """
138 Updates the underlying post processor with the current bos_token
and eos_token
.
139 """
140 bos = self.bos_token
--> 141 bos_token_id = self.bos_token_id
143 eos = self.eos_token
144 eos_token_id = self.eos_token_id
File ~/anaconda3/envs/dui/lib/python3.11/site-packages/transformers/tokenization_utils_base.py:1141, in SpecialTokensMixin.bos_token_id(self)
1139 if self._bos_token is None:
1140 return None
-> 1141 return self.convert_tokens_to_ids(self.bos_token)
File ~/anaconda3/envs/dui/lib/python3.11/site-packages/transformers/tokenization_utils_fast.py:277, in PreTrainedTokenizerFast.convert_tokens_to_ids(self, tokens)
274 return None
276 if isinstance(tokens, str):
--> 277 return self._convert_token_to_id_with_added_voc(tokens)
279 return [self._convert_token_to_id_with_added_voc(token) for token in tokens]
File ~/anaconda3/envs/dui/lib/python3.11/site-packages/transformers/tokenization_utils_fast.py:284, in PreTrainedTokenizerFast._convert_token_to_id_with_added_voc(self, token)
282 index = self._tokenizer.token_to_id(token)
283 if index is None:
--> 284 return self.unk_token_id
285 return index
File ~/anaconda3/envs/dui/lib/python3.11/site-packages/transformers/tokenization_utils_base.py:1160, in SpecialTokensMixin.unk_token_id(self)
1158 if self._unk_token is None:
1159 return None
-> 1160 return self.convert_tokens_to_ids(self.unk_token)
File ~/anaconda3/envs/dui/lib/python3.11/site-packages/transformers/tokenization_utils_fast.py:277, in PreTrainedTokenizerFast.convert_tokens_to_ids(self, tokens)
274 return None
276 if isinstance(tokens, str):
--> 277 return self._convert_token_to_id_with_added_voc(tokens)
279 return [self._convert_token_to_id_with_added_voc(token) for token in tokens]
File ~/anaconda3/envs/dui/lib/python3.11/site-packages/transformers/tokenization_utils_fast.py:284, in PreTrainedTokenizerFast._convert_token_to_id_with_added_voc(self, token)
282 index = self._tokenizer.token_to_id(token)
283 if index is None:
--> 284 return self.unk_token_id
285 return index
File ~/anaconda3/envs/dui/lib/python3.11/site-packages/transformers/tokenization_utils_base.py:1160, in SpecialTokensMixin.unk_token_id(self)
1158 if self._unk_token is None:
1159 return None
-> 1160 return self.convert_tokens_to_ids(self.unk_token)
[... skipping similar frames: PreTrainedTokenizerFast._convert_token_to_id_with_added_voc at line 284 (986 times), PreTrainedTokenizerFast.convert_tokens_to_ids at line 277 (986 times), SpecialTokensMixin.unk_token_id at line 1160 (985 times)]
File ~/anaconda3/envs/dui/lib/python3.11/site-packages/transformers/tokenization_utils_base.py:1160, in SpecialTokensMixin.unk_token_id(self)
1158 if self._unk_token is None:
1159 return None
-> 1160 return self.convert_tokens_to_ids(self.unk_token)
File ~/anaconda3/envs/dui/lib/python3.11/site-packages/transformers/tokenization_utils_fast.py:277, in PreTrainedTokenizerFast.convert_tokens_to_ids(self, tokens)
274 return None
276 if isinstance(tokens, str):
--> 277 return self._convert_token_to_id_with_added_voc(tokens)
279 return [self._convert_token_to_id_with_added_voc(token) for token in tokens]
File ~/anaconda3/envs/dui/lib/python3.11/site-packages/transformers/tokenization_utils_fast.py:284, in PreTrainedTokenizerFast._convert_token_to_id_with_added_voc(self, token)
282 index = self._tokenizer.token_to_id(token)
283 if index is None:
--> 284 return self.unk_token_id
285 return index
RecursionError: maximum recursion depth exceeded