[Bugfix][ROCm] Fix ViT rotary embeddings for torch.compile compatibility on ROCm (#27748)

vllmellm · web-flow · commit b13a44754674 · 2025-11-03T17:12:19.000-08:00
Signed-off-by: vllmellm &lt;vllm.ellm@embeddedllm.com&gt;
diff --git a/vllm/model_executor/layers/rotary_embedding/common.py b/vllm/model_executor/layers/rotary_embedding/common.py
@@ -77,7 +77,11 @@ def dispatch_rotary_emb_function(
     if current_platform.is_cuda():
         return apply_rotary_emb
 
-    if current_platform.is_rocm():
+    # if torch compile is not enabled
+    # use rotary embedding function from flash_attn package
+    # otherwise use the naive pytorch embedding implementation
+    # is faster when torch compile is enabled.
+    if current_platform.is_rocm() and not torch.compiler.is_compiling():
         if find_spec("flash_attn") is not None:
             from flash_attn.ops.triton.rotary import apply_rotary
 
@@ -87,11 +91,10 @@ def dispatch_rotary_emb_function(
                 "flash_attn is not installed. Falling back to PyTorch "
                 "implementation for rotary embeddings."
             )
-
     if default is not None:
         return default
-    else:
-        return apply_rotary_emb_torch
+
+    return apply_rotary_emb_torch
 
 
 # yarn functions
diff --git a/vllm/model_executor/models/glm4_1v.py b/vllm/model_executor/models/glm4_1v.py
@@ -370,7 +370,7 @@ def forward(
                 cu_seqlens_k=cu_seqlens,
                 max_seqlen_q=max_seqlen,
                 max_seqlen_k=max_seqlen,
-                dropout_p=0,
+                dropout_p=0.0,
                 causal=False,
             )