From b0c5b47e102e0f75d596b977f4a72dce89f62677 Mon Sep 17 00:00:00 2001
From: "wang.yuqi" <noooop@126.com>
Date: Wed, 30 Jul 2025 13:30:46 +0800
Subject: [PATCH] fix

Signed-off-by: wang.yuqi <noooop@126.com>
---
 vllm/config.py | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/vllm/config.py b/vllm/config.py
index d9f356c5c60a..0f2f61197c57 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -3628,16 +3628,22 @@ def _get_and_verify_max_len(
                 f"User-specified max_model_len ({max_model_len}) is greater "
                 f"than the derived max_model_len ({max_len_key}="
                 f"{derived_max_model_len} or model_max_length="
-                f"{model_max_length} in model's config.json). This may lead "
-                "to incorrect model outputs or CUDA errors.")
+                f"{model_max_length} in model's config.json).")
+
             if envs.VLLM_ALLOW_LONG_MAX_MODEL_LEN:
-                logger.warning(
-                    "%s Make sure the value is correct and within the "
-                    "model context size.", msg)
+                msg += ("\nPlease using VLLM_ALLOW_LONG_MAX_MODEL_LEN "
+                        "with great caution.\n"
+                        "  - If the model uses rope position encoding, "
+                        "positions exceeding derived_max_model_len "
+                        "lead to nan.\n"
+                        "  - If the model uses absolute position encoding, "
+                        "positions exceeding derived_max_model_len will "
+                        "cause a CUDA array out-of-bounds error.")
+                logger.warning_once(msg)
             else:
-                raise ValueError(
-                    f"{msg} To allow overriding this maximum, set "
-                    "the env var VLLM_ALLOW_LONG_MAX_MODEL_LEN=1")
+                msg += (" This may lead to incorrect model outputs "
+                        "or CUDA errors.")
+                raise ValueError(msg)
     return int(max_model_len)