vllm-project · ywang96 · Nov 4, 2025 · Oct 29, 2025 · Oct 29, 2025 · Oct 30, 2025
diff --git a/vllm/model_executor/layers/rotary_embedding/common.py b/vllm/model_executor/layers/rotary_embedding/common.py
@@ -77,7 +77,11 @@ def dispatch_rotary_emb_function(
     if current_platform.is_cuda():
         return apply_rotary_emb
 
-    if current_platform.is_rocm():
 def apply_rotary_pos_emb_vision(t: torch.Tensor, freqs: torch.Tensor) -> torch.Tensor: 
     rotary_emb_function = dispatch_rotary_emb_function(default=apply_rotary_emb_torch) 
     t_ = t.float() 
     cos = freqs.cos() 
     sin = freqs.sin() 
     output = rotary_emb_function(t_, cos, sin).type_as(t) 
     return output 
 def apply_rotary_pos_emb_vision(t: torch.Tensor, freqs: torch.Tensor) -> torch.Tensor: 
     rotary_emb_function = dispatch_rotary_emb_function(default=apply_rotary_emb_torch) 
     t_ = t.float() 
     cos = freqs.cos() 
     sin = freqs.sin() 
     output = rotary_emb_function(t_, cos, sin).type_as(t) 
     return output 
+    # if torch compile is not enabled
+    # use rotary embedding function from flash_attn package
+    # otherwise use the naive pytorch embedding implementation
+    # is faster when torch compile is enabled.
+    if current_platform.is_rocm() and not torch.compiler.is_compiling():
         if find_spec("flash_attn") is not None:
             from flash_attn.ops.triton.rotary import apply_rotary
 
@@ -87,11 +91,10 @@ def dispatch_rotary_emb_function(
                 "flash_attn is not installed. Falling back to PyTorch "
                 "implementation for rotary embeddings."
             )
-
     if default is not None:
         return default
-    else:
-        return apply_rotary_emb_torch
+
+    return apply_rotary_emb_torch
 
 
 # yarn functions

diff --git a/vllm/model_executor/models/glm4_1v.py b/vllm/model_executor/models/glm4_1v.py
@@ -370,7 +370,7 @@ def forward(
                 cu_seqlens_k=cu_seqlens,
                 max_seqlen_q=max_seqlen,
                 max_seqlen_k=max_seqlen,
-                dropout_p=0,
+                dropout_p=0.0,
                 causal=False,
             )