intel · wenhuach21 · Jul 21, 2025 · Jul 18, 2025 · Jul 18, 2025 · Jul 18, 2025
diff --git a/auto_round/__main__.py b/auto_round/__main__.py
@@ -58,10 +58,14 @@ def run_fast():
 
 def run_mllm():
     if "--eval" in sys.argv:
-        from auto_round.script.mllm import setup_lmeval_parser, eval
+        from auto_round.script.llm import setup_eval_parser, eval
         sys.argv.remove("--eval")
-        args = setup_lmeval_parser()
+        args = setup_eval_parser()
+        args.mllm = True
         eval(args)
+    elif "--vlmeval" in sys.argv:
+        sys.argv.remove("--vlmeval")
+        run_vlmeavl()
     elif "--lmms" in sys.argv:
         sys.argv.remove("--lmms")
         run_lmms()
@@ -76,6 +80,12 @@ def run_lmms():
     args = setup_lmms_parser()
     lmms_eval(args)
 
+def run_vlmeavl():
+    from auto_round.script.mllm import setup_lmeval_parser, vlmeval
+    args = setup_lmeval_parser()
+    vlmeval(args)
+
+
 def switch():
     if "--mllm" in sys.argv:
         sys.argv.remove("--mllm")

diff --git a/auto_round/script/llm.py b/auto_round/script/llm.py
@@ -210,6 +210,9 @@ def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.add_argument(
             "--model", "--model_name", "--model_name_or_path", default="facebook/opt-125m", help="model name or path")
+        self.add_argument(
+            "--mllm", default=False, help="whether to eval multi-model."
+        )
         self.add_argument(
             "--device",
             "--devices",
@@ -672,8 +675,15 @@ def eval(args):
         print("evaluation running time=%ds" % (time.time() - st))
     else:
         st = time.time()
+        if "auto" in batch_size and args.mllm:
+            logger.warning("Batch size 'auto' is not yet supported for hf-multimodal models, reset to 16")
+            batch_size = 16
         res = simple_evaluate(
-            model="hf", model_args=model_args, tasks=tasks, device=device_str, batch_size=batch_size)
+            model="hf" if not args.mllm else "hf-multimodal",
+            model_args=model_args,
+            tasks=tasks,
+            device=device_str,
+            batch_size=batch_size)
         from lm_eval.utils import make_table  # pylint: disable=E0401
         print(make_table(res))
         print("evaluation running time=%ds" % (time.time() - st))

diff --git a/auto_round/script/mllm.py b/auto_round/script/mllm.py
@@ -474,7 +474,7 @@ def tune(args):
     clear_memory()
 
 
-def eval(args):
+def vlmeval(args):
     set_cuda_visible_devices(args.device)
     device_str, parallelism = get_device_and_parallelism(args.device)
     if parallelism: