CL-ModelCloud · CL-ModelCloud · Dec 16, 2024 · Dec 16, 2024 · Dec 16, 2024 · Dec 16, 2024
diff --git a/evalplus/codegen.py b/evalplus/codegen.py
@@ -136,7 +136,7 @@ def run_codegen(
     enable_prefix_caching: bool = False,
     enable_chunked_prefill: bool = False,
     dtype: str = "bfloat16",
-    gptqmodel_backend: str = "AUTO",  # For GPTQModel
+    gptqmodel_backend: str = "auto",  # For GPTQModel
 ):
     assert dataset in ["humaneval", "mbpp", "evalperf"], f"Invalid dataset {dataset}"
     assert evalperf_type is None or evalperf_type in [

diff --git a/evalplus/provider/__init__.py b/evalplus/provider/__init__.py
@@ -24,7 +24,7 @@ def make_model(
     attn_implementation="eager",
     device_map=None,
     # gptqmodel only
-    gptqmodel_backend: str = 'AUTO',
+    gptqmodel_backend: str = 'auto',
 ) -> DecoderBase:
     if backend == "vllm":
         from evalplus.provider.vllm import VllmDecoder

diff --git a/evalplus/provider/gptqmodel.py b/evalplus/provider/gptqmodel.py
@@ -4,7 +4,7 @@
 from transformers import AutoTokenizer
 
 try:
-    from gptqmodel import GPTQModel, get_backend
+    from gptqmodel import GPTQModel
 except ModuleNotFoundError as exception:
     raise type(exception)(
         "Tried to load gptqmodel, but gptqmodel is not installed ",
@@ -23,23 +23,27 @@ def __init__(
         self,
         name: str,
         dataset: str,
-        gptqmodel_backend: str = 'AUTO',
+        gptqmodel_backend: str = 'auto',
         force_base_prompt: bool = False,
         **kwargs,
     ):
         super().__init__(name=name, **kwargs)
 
-        try:
-            backend = get_backend(gptqmodel_backend)
-        except Exception:
-            raise ValueError("GPTQModel support backend: AUTO, TRITON, EXLLAMA_V2, MARLIN, BITBLAS, QBITS, VLLM, SGLANG")
+        if hasattr(torch, "mps") and torch.mps.is_available():
+            device = torch.device("mps")
+        elif hasattr(torch, "xpu") and torch.xpu.is_available():
+            device = torch.device("xpu")
+        elif hasattr(torch, "cuda") and torch.cuda.is_available():
+            device = torch.device("cuda")
+        else:
+            device = torch.device("cpu")
 
-        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.device = device
 
         kwargs = {
             "model_id_or_path": name,
             "trust_remote_code": self.trust_remote_code,
-            "backend": backend
+            "backend": gptqmodel_backend
         }
         self.skip_special_tokens = True
         self.force_base_prompt = force_base_prompt

diff --git a/setup.cfg b/setup.cfg
@@ -49,4 +49,4 @@ console_scripts =
 perf = Pympler>=1.0.1
        cirron>=0.4
 vllm = vllm>=0.5.1
-gptqmodel = gptqmodel>=1.3.1
+gptqmodel = gptqmodel>=1.4.1