From b0c5b47e102e0f75d596b977f4a72dce89f62677 Mon Sep 17 00:00:00 2001 From: "wang.yuqi" Date: Wed, 30 Jul 2025 13:30:46 +0800 Subject: [PATCH] fix Signed-off-by: wang.yuqi --- vllm/config.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/vllm/config.py b/vllm/config.py index d9f356c5c60a..0f2f61197c57 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -3628,16 +3628,22 @@ def _get_and_verify_max_len( f"User-specified max_model_len ({max_model_len}) is greater " f"than the derived max_model_len ({max_len_key}=" f"{derived_max_model_len} or model_max_length=" - f"{model_max_length} in model's config.json). This may lead " - "to incorrect model outputs or CUDA errors.") + f"{model_max_length} in model's config.json).") + if envs.VLLM_ALLOW_LONG_MAX_MODEL_LEN: - logger.warning( - "%s Make sure the value is correct and within the " - "model context size.", msg) + msg += ("\nPlease using VLLM_ALLOW_LONG_MAX_MODEL_LEN " + "with great caution.\n" + " - If the model uses rope position encoding, " + "positions exceeding derived_max_model_len " + "lead to nan.\n" + " - If the model uses absolute position encoding, " + "positions exceeding derived_max_model_len will " + "cause a CUDA array out-of-bounds error.") + logger.warning_once(msg) else: - raise ValueError( - f"{msg} To allow overriding this maximum, set " - "the env var VLLM_ALLOW_LONG_MAX_MODEL_LEN=1") + msg += (" This may lead to incorrect model outputs " + "or CUDA errors.") + raise ValueError(msg) return int(max_model_len)