mdk8888 / gptfast Goto Github PK
View Code? Open in Web Editor NEWAccelerate your Hugging Face Transformers 6-8.5x. Native to Hugging Face and PyTorch.
License: Apache License 2.0
Accelerate your Hugging Face Transformers 6-8.5x. Native to Hugging Face and PyTorch.
License: Apache License 2.0
pip install gpt-fast
!pip install numpy --upgrade
fixes the numpy error, but then there is an additional error---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[6], line 3
1 import os
2 import torch
----> 3 from transformers import AutoTokenizer
4 from GPTFast.Core import gpt_fast
5 from GPTFast.Helpers import timed
File /opt/conda/lib/python3.10/site-packages/transformers/__init__.py:26
23 from typing import TYPE_CHECKING
25 # Check the dependencies satisfy the minimal versions required.
---> 26 from . import dependency_versions_check
27 from .utils import (
28 OptionalDependencyNotAvailable,
29 _LazyModule,
(...)
48 logging,
49 )
52 logger = logging.get_logger(__name__) # pylint: disable=invalid-name
File /opt/conda/lib/python3.10/site-packages/transformers/dependency_versions_check.py:57
54 if not is_accelerate_available():
55 continue # not required, check version only if installed
---> 57 require_version_core(deps[pkg])
58 else:
59 raise ValueError(f"can't find {pkg} in {deps.keys()}, check dependency_versions_table.py")
File /opt/conda/lib/python3.10/site-packages/transformers/utils/versions.py:117, in require_version_core(requirement)
115 """require_version wrapper which emits a core-specific hint on failure"""
116 hint = "Try: `pip install transformers -U` or `pip install -e '.[dev]'` if you're working with git main"
--> 117 return require_version(requirement, hint)
File /opt/conda/lib/python3.10/site-packages/transformers/utils/versions.py:111, in require_version(requirement, hint)
109 if want_ver is not None:
110 for op, want_ver in wanted.items():
--> 111 _compare_versions(op, got_ver, want_ver, requirement, pkg, hint)
File /opt/conda/lib/python3.10/site-packages/transformers/utils/versions.py:39, in _compare_versions(op, got_ver, want_ver, requirement, pkg, hint)
37 def _compare_versions(op, got_ver, want_ver, requirement, pkg, hint):
38 if got_ver is None or want_ver is None:
---> 39 raise ValueError(
40 f"Unable to compare versions for {requirement}: need={want_ver} found={got_ver}. This is unusual. Consider"
41 f" reinstalling {pkg}."
42 )
43 if not ops[op](version.parse(got_ver), version.parse(want_ver)):
44 raise ImportError(
45 f"{requirement} is required for a normal functioning of this module, but found {pkg}=={got_ver}.{hint}"
46 )
ValueError: Unable to compare versions for numpy>=1.17: need=1.17 found=None. This is unusual. Consider reinstalling numpy.
---------------------------------------------------------------------------
OSError Traceback (most recent call last)
File /opt/conda/lib/python3.10/site-packages/transformers/utils/import_utils.py:1472, in _LazyModule._get_module(self, module_name)
1471 try:
-> 1472 return importlib.import_module("." + module_name, self.__name__)
1473 except Exception as e:
File /opt/conda/lib/python3.10/importlib/__init__.py:126, in import_module(name, package)
125 level += 1
--> 126 return _bootstrap._gcd_import(name[level:], package, level)
File <frozen importlib._bootstrap>:1050, in _gcd_import(name, package, level)
File <frozen importlib._bootstrap>:1027, in _find_and_load(name, import_)
File <frozen importlib._bootstrap>:1006, in _find_and_load_unlocked(name, import_)
File <frozen importlib._bootstrap>:688, in _load_unlocked(spec)
File <frozen importlib._bootstrap_external>:883, in exec_module(self, module)
File <frozen importlib._bootstrap>:241, in _call_with_frames_removed(f, *args, **kwds)
File /opt/conda/lib/python3.10/site-packages/transformers/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.py:30
29 if is_speech_available():
---> 30 import torchaudio.compliance.kaldi as ta_kaldi
32 if is_torch_available():
File /opt/conda/lib/python3.10/site-packages/torchaudio/__init__.py:1
----> 1 from . import ( # noqa: F401
2 _extension,
3 compliance,
4 datasets,
5 functional,
6 io,
7 kaldi_io,
8 models,
9 pipelines,
10 sox_effects,
11 transforms,
12 utils,
13 )
14 from ._backend.common import AudioMetaData # noqa
File /opt/conda/lib/python3.10/site-packages/torchaudio/_extension/__init__.py:45
44 if _IS_TORCHAUDIO_EXT_AVAILABLE:
---> 45 _load_lib("libtorchaudio")
47 import torchaudio.lib._torchaudio # noqa
File /opt/conda/lib/python3.10/site-packages/torchaudio/_extension/utils.py:64, in _load_lib(lib)
63 return False
---> 64 torch.ops.load_library(path)
65 torch.classes.load_library(path)
File /opt/conda/lib/python3.10/site-packages/torch/_ops.py:933, in _Ops.load_library(self, path)
929 with dl_open_guard():
930 # Import the shared library into the process, thus running its
931 # static (global) initialization code in order to register custom
932 # operators with the JIT.
--> 933 ctypes.CDLL(path)
934 self.loaded_libraries.add(path)
File /opt/conda/lib/python3.10/ctypes/__init__.py:374, in CDLL.__init__(self, name, mode, handle, use_errno, use_last_error, winmode)
373 if handle is None:
--> 374 self._handle = _dlopen(self._name, mode)
375 else:
OSError: /opt/conda/lib/python3.10/site-packages/torchaudio/lib/libtorchaudio.so: undefined symbol: _ZNK5torch8autograd4Node4nameB5cxx11Ev
The above exception was the direct cause of the following exception:
RuntimeError Traceback (most recent call last)
Cell In[10], line 77
44 MAX_TOKENS=50
46 cache_config = {
47 "model_config": {
48 "path_to_blocks": ["transformer", "h"],
(...)
74 "from torch import nn"]
75 }
---> 77 gpt_fast_model = gpt_fast(model_name, sample_function=argmax, max_length=60, cache_config=cache_config, draft_model_name=draft_model_name)
78 gpt_fast_model.to(device)
80 fast_compile_times = []
File /opt/conda/lib/python3.10/site-packages/GPTFast/Core/GPTFast.py:11, in gpt_fast(model_name, sample_function, max_length, cache_config, **spec_dec_kwargs)
9 def gpt_fast(model_name:str, sample_function:Callable, max_length:int, cache_config:dict, **spec_dec_kwargs):
10 model = load_int8(model_name)
---> 11 model = add_kv_cache(model, sample_function, max_length, cache_config, dtype=torch.float16)
12 spec_decode = False
13 if spec_dec_kwargs:
File /opt/conda/lib/python3.10/site-packages/GPTFast/Core/KVCache/KVCacheModel.py:208, in add_kv_cache(transformer, sampling_fn, max_length, cache_config, dtype)
207 def add_kv_cache(transformer:nn.Module, sampling_fn:Callable, max_length:int, cache_config:dict, dtype) -> KVCacheModel:
--> 208 model = KVCacheModel(transformer, sampling_fn, max_length, cache_config, dtype)
209 return model
File /opt/conda/lib/python3.10/site-packages/GPTFast/Core/KVCache/KVCacheModel.py:21, in KVCacheModel.__init__(self, model, sample_fn, max_length, cache_config, dtype)
17 self.sample = types.MethodType(sample_fn, self)
19 assert not isinstance(model, BloomForCausalLM), "Bloom models currently have an unsupported kv cache shape."
---> 21 self._model = self.add_static_cache_to_model(model, cache_config, max_length, dtype, self.device)
22 config = self._model.config
23 self._max_length = max_length
File /opt/conda/lib/python3.10/site-packages/GPTFast/Core/KVCache/KVCacheModel.py:50, in KVCacheModel.add_static_cache_to_model(cls, model, cache_config, max_generated_length, dtype, device)
48 module_forward_str_kv_cache = add_input_pos_to_func_str(module_forward_str, forward_prop_ref, "input_pos=input_pos")
49 module_forward_str_kv_cache = add_default_parameter(module_forward_str_kv_cache, "forward", "input_pos", "Optional[torch.Tensor]", None, True)
---> 50 add_str_as_func(module_with_input_pos, "forward", module_forward_str_kv_cache, imports)
52 module_with_input_pos = getattr(module_with_input_pos, prop)
54 assert isinstance(module_with_input_pos, nn.ModuleList), "Once we finish iterating through 'path_to_blocks', the property that you arrive at must be a nn.ModuleList."
File /opt/conda/lib/python3.10/site-packages/GPTFast/Helpers/Class/add_str_as_func.py:11, in add_str_as_func(obj, method_name, func_str, imports)
9 func_code = compile(complete_func_str, "<string>", "exec")
10 namespace = {}
---> 11 exec(func_code, namespace)
13 # Extract the func from the namespace
14 my_func = namespace[method_name]
File <string>:3
File <frozen importlib._bootstrap>:1073, in _handle_fromlist(module, fromlist, import_, recursive)
File <frozen importlib._bootstrap>:1075, in _handle_fromlist(module, fromlist, import_, recursive)
File /opt/conda/lib/python3.10/site-packages/transformers/utils/import_utils.py:1463, in _LazyModule.__getattr__(self, name)
1461 elif name in self._class_to_module.keys():
1462 module = self._get_module(self._class_to_module[name])
-> 1463 value = getattr(module, name)
1464 else:
1465 raise AttributeError(f"module {self.__name__} has no attribute {name}")
File /opt/conda/lib/python3.10/site-packages/transformers/utils/import_utils.py:1462, in _LazyModule.__getattr__(self, name)
1460 value = self._get_module(name)
1461 elif name in self._class_to_module.keys():
-> 1462 module = self._get_module(self._class_to_module[name])
1463 value = getattr(module, name)
1464 else:
File /opt/conda/lib/python3.10/site-packages/transformers/utils/import_utils.py:1474, in _LazyModule._get_module(self, module_name)
1472 return importlib.import_module("." + module_name, self.__name__)
1473 except Exception as e:
-> 1474 raise RuntimeError(
1475 f"Failed to import {self.__name__}.{module_name} because of the following error (look up to see its"
1476 f" traceback):\n{e}"
1477 ) from e
RuntimeError: Failed to import transformers.models.audio_spectrogram_transformer.feature_extraction_audio_spectrogram_transformer because of the following error (look up to see its traceback):
/opt/conda/lib/python3.10/site-packages/torchaudio/lib/libtorchaudio.so: undefined symbol: _ZNK5torch8autograd4Node4nameB5cxx11Ev
Hi!
I don't quite understand how this project works, I guess my main question is : what is a draft model ?
For example, I would like to speed-up the inference of OwlVit (https://huggingface.co/google/owlvit-base-patch32) which I use through the transformers
library. Can I do that with GPTFast ?
Thanks !
Hi there,
Thanks for creating this repo.
I wanted to know what should be for Llama-2-7b-chat-hf if its the below for gpt and opt arhitectures ?
"gpt": {
"path_to_blocks": ["transformer", "h"],
"child_ref_in_parent_forward": ["transformer", "block"],
},
"opt": {
"path_to_blocks": ["model", "decoder", "layers"],
"child_ref_in_parent_forward": ["model.decoder", "decoder", "decoder_layer"],
}
I think it's something close to
"llama": {
"path_to_blocks": ["model", "layers"],
"child_ref_in_parent_forward": ["model", "decoder_layer"],
}
but running into the following error
File "/GPTFast/Helpers/Class/add_str_as_func.py", line 9, in add_str_as_func
func_code = compile(complete_func_str, "", "exec")
File "", line 19
input_pos: Optional[torch.Tensor] = None
So the parsing of the code string is somehow getting miscorrectly matched at "decoder_layer".
Any help would be appreciated for this to be able to work on the LLama architectures using this code.
Hello,
I am having difficulties running GPTFast on Mistral-7b-v0.1, encountering the same errors as reported here: #25.
My assumption is that the model_config is not set properly (I am currently using the variant proposed in the llama example (https://github.com/MDK8888/GPTFast/blob/LlamaIntegration/Examples/llama.py).
Could you please help me out with the appropriate configuration?
Thank you!
Hello, I'm trying to run demo in my local PyCharm. I try to install requirements but receiving error:
Traceback:
Traceback (most recent call last):
File "C:\Users\user\OneDrive\Desktop\pythonProject\main.py", line 4, in <module>
from GPTFast.Core import gpt_fast
ModuleNotFoundError: No module named 'GPTFast'
What I tried:
(venv) PSC:\Users\user\OneDrive\Desktop\pythonProject>C:\Users\user\OneDrive\Desktop\pythonProject\venv\Scripts\activate.bat
(venv) PS C:\Users\user\OneDrive\Desktop\pythonProject> pip install GPTFast
Issue output:
Collecting GPTFast
Obtaining dependency information for GPTFast from https://files.pythonhosted.org/packages/4d/e6/cd51a5643dd17b8f2f8c870d384ff11912e717eb3976f720db2a78f25500/gptfast-0.1.0-py3-none-any.whl.metadata
Using cached gptfast-0.1.0-py3-none-any.whl.metadata (1.0 kB)
Collecting torch==2.1.2 (from GPTFast)
Obtaining dependency information for torch==2.1.2 from https://files.pythonhosted.org/packages/16/bf/2ba0f0f7c07b9a14c027e181e44c58824e13f7352607ed32db18321599a2/torch-2.1.2-cp310-cp310-win_amd64.whl.metadata
Using cached torch-2.1.2-cp310-cp310-win_amd64.whl.metadata (26 kB)
Requirement already satisfied: sympy==1.12 in c:\users\user\onedrive\desktop\pythonproject\venv\lib\site-packages (from GPTFast) (1.12)
Collecting typing-extensions==4.9.0 (from GPTFast)
Obtaining dependency information for typing-extensions==4.9.0 from https://files.pythonhosted.org/packages/b7/f4/6a90020cd2d93349b442bfcb657d0dc91eee65491600b2cb1d388bc98e6b/typing_extensions-4.9.0-py3-none-any.whl.metadata
Using cached typing_extensions-4.9.0-py3-none-any.whl.metadata (3.0 kB)
Requirement already satisfied: networkx==3.2.1 in c:\users\user\onedrive\desktop\pythonproject\venv\lib\site-packages (from GPTFast) (3.2.1)
Requirement already satisfied: jinja2==3.1.3 in c:\users\user\onedrive\desktop\pythonproject\venv\lib\site-packages (from GPTFast) (3.1.3)
INFO: pip is looking at multiple versions of gptfast to determine which version is compatible with other requirements. This could take a while.
ERROR: Could not find a version that satisfies the requirement triton==2.1.0 (from gptfast) (from versions: none)
ERROR: No matching distribution found for triton==2.1.0
Very interesting work! I see you pinned torch==2.1.2
. PyTorch 2.2 promises a 2x improvement to scaled_dot_product_attention
and a few torch.compile
improvements:
https://pytorch.org/blog/pytorch2-2/
I don't think using PyTorch 2.2 will result in a big speed increase for GPTFast, but still it may be nice to try. What do you think?
Could you help to give an example code to run GPTFast on Mixtral-8x7B-Instruct-v0.1?
I load the model with GPTFast with empty draft_model_name. Error shows when loading the model as following.
model_name = "./Mixtral-8x7B-v0.1"
draft_model_name = ""
tokenizer = AutoTokenizer.from_pretrained(model_name)
initial_string = "Write me a short story."
input_tokens = tokenizer.encode(initial_string, return_tensors="pt").to(device)
# ....
Traceback (most recent call last):
File "/data/gptfast.py", line 77, in
gpt_fast_model = gpt_fast(model_name, sample_function=argmax, max_length=60, cache_config=cache_config, draft_model_name=draft_model_name)
File "/root/anaconda3/envs/llm/lib/python3.10/site-packages/GPTFast/Core/GPTFast.py", line 11, in gpt_fast
model = add_kv_cache(model, sample_function, max_length, cache_config, dtype=torch.float16)
File "/root/anaconda3/envs/llm/lib/python3.10/site-packages/GPTFast/Core/KVCache/KVCacheModel.py", line 208, in add_kv_cache
model = KVCacheModel(transformer, sampling_fn, max_length, cache_config, dtype)
File "/root/anaconda3/envs/llm/lib/python3.10/site-packages/GPTFast/Core/KVCache/KVCacheModel.py", line 21, in init
self._model = self.add_static_cache_to_model(model, cache_config, max_length, dtype, self.device)
File "/root/anaconda3/envs/llm/lib/python3.10/site-packages/GPTFast/Core/KVCache/KVCacheModel.py", line 48, in add_static_cache_to_model
module_forward_str_kv_cache = add_input_pos_to_func_str(module_forward_str, forward_prop_ref, "input_pos=input_pos")
File "/root/anaconda3/envs/llm/lib/python3.10/site-packages/GPTFast/Helpers/String/add_input_pos_to_func_str.py", line 18, in add_input_pos_to_func_str
raise ValueError("Submodule forward pass not found.")
ValueError: Submodule forward pass not found.
I am trying to use this project with a vision-language model like https://huggingface.co/docs/transformers/en/model_doc/llava_next but currently this repo does not support vision part of the model. I have a separate script that works by just splitting the vision tower and compile them separately. Do you think it will be possible to do the same using your project? My separate script is not fully using gptfast yet especially the int8 part so I really wanted to use your awesome work here.
I am using https://huggingface.co/llava-hf/llava-v1.6-mistral-7b-hf specifically.
Dear Sir,
I checked the demo code of GPTFast 0.2.1 and found that the function argmax_variation(...) is not used at all.
Could you please expain for this ?
Many thanks.
The current requirements makes it such as installing makes it incompatible with newer pytorch or transformers version
They should be set to minimal requirements
A declarative, efficient, and flexible JavaScript library for building user interfaces.
๐ Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
An Open Source Machine Learning Framework for Everyone
The Web framework for perfectionists with deadlines.
A PHP framework for web artisans
Bring data to life with SVG, Canvas and HTML. ๐๐๐
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
Some thing interesting about web. New door for the world.
A server is a program made to process requests and deliver data to clients.
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
Some thing interesting about visualization, use data art
Some thing interesting about game, make everyone happy.
We are working to build community through open source technology. NB: members must have two-factor auth.
Open source projects and samples from Microsoft.
Google โค๏ธ Open Source for everyone.
Alibaba Open Source for everyone
Data-Driven Documents codes.
China tencent open source team.