diff --git a/evalplus/codegen.py b/evalplus/codegen.py index 821b906320..cd4a20b5af 100644 --- a/evalplus/codegen.py +++ b/evalplus/codegen.py @@ -136,7 +136,7 @@ def run_codegen( enable_prefix_caching: bool = False, enable_chunked_prefill: bool = False, dtype: str = "bfloat16", - gptqmodel_backend: str = "AUTO", # For GPTQModel + gptqmodel_backend: str = "auto", # For GPTQModel ): assert dataset in ["humaneval", "mbpp", "evalperf"], f"Invalid dataset {dataset}" assert evalperf_type is None or evalperf_type in [ diff --git a/evalplus/provider/__init__.py b/evalplus/provider/__init__.py index 2db76c4cc7..4702973b78 100644 --- a/evalplus/provider/__init__.py +++ b/evalplus/provider/__init__.py @@ -24,7 +24,7 @@ def make_model( attn_implementation="eager", device_map=None, # gptqmodel only - gptqmodel_backend: str = 'AUTO', + gptqmodel_backend: str = 'auto', ) -> DecoderBase: if backend == "vllm": from evalplus.provider.vllm import VllmDecoder diff --git a/evalplus/provider/gptqmodel.py b/evalplus/provider/gptqmodel.py index bb2bed7e29..51926329c1 100644 --- a/evalplus/provider/gptqmodel.py +++ b/evalplus/provider/gptqmodel.py @@ -4,7 +4,7 @@ from transformers import AutoTokenizer try: - from gptqmodel import GPTQModel, get_backend + from gptqmodel import GPTQModel except ModuleNotFoundError as exception: raise type(exception)( "Tried to load gptqmodel, but gptqmodel is not installed ", @@ -23,23 +23,27 @@ def __init__( self, name: str, dataset: str, - gptqmodel_backend: str = 'AUTO', + gptqmodel_backend: str = 'auto', force_base_prompt: bool = False, **kwargs, ): super().__init__(name=name, **kwargs) - try: - backend = get_backend(gptqmodel_backend) - except Exception: - raise ValueError("GPTQModel support backend: AUTO, TRITON, EXLLAMA_V2, MARLIN, BITBLAS, QBITS, VLLM, SGLANG") + if hasattr(torch, "mps") and torch.mps.is_available(): + device = torch.device("mps") + elif hasattr(torch, "xpu") and torch.xpu.is_available(): + device = torch.device("xpu") + elif hasattr(torch, "cuda") and torch.cuda.is_available(): + device = torch.device("cuda") + else: + device = torch.device("cpu") - self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + self.device = device kwargs = { "model_id_or_path": name, "trust_remote_code": self.trust_remote_code, - "backend": backend + "backend": gptqmodel_backend } self.skip_special_tokens = True self.force_base_prompt = force_base_prompt diff --git a/setup.cfg b/setup.cfg index 5b0e15ff34..c8f8e96e13 100644 --- a/setup.cfg +++ b/setup.cfg @@ -49,4 +49,4 @@ console_scripts = perf = Pympler>=1.0.1 cirron>=0.4 vllm = vllm>=0.5.1 -gptqmodel = gptqmodel>=1.3.1 +gptqmodel = gptqmodel>=1.4.1