cherry pick 1759

Potabk · Potabk · commit f50c268d4edb · 2025-07-28T16:06:15.000+08:00
Signed-off-by: wangli &lt;wangli858794774@gmail.com&gt;
diff --git a/vllm_ascend/models/deepseek_dbo.py b/vllm_ascend/models/deepseek_dbo.py
@@ -170,38 +170,6 @@ def __init__(
         ascend_config = get_ascend_config()
         self.torchair_graph_enabled = ascend_config.torchair_graph_config.enabled
 
-    def forward(
-            self,
-            hidden_states: torch.Tensor,
-            attn_metadata: Optional[AttentionMetadata] = None) -> torch.Tensor:
-        forward_context = get_forward_context()
-        # when profile runs, force experts to load balanced tokens
-        # to avoid high memory consumption on a single rank.
-        enable_force_load_balance = forward_context.in_profile_run
-
-        is_prefill = forward_context.with_prefill
-
-        old_hidden_states = hidden_states.clone()
-
-        # router_logits: (num_tokens, n_experts)
-        router_logits, _ = self.gate(hidden_states)
-
-        hidden_states = self.experts(
-            hidden_states=hidden_states,
-            router_logits=router_logits,
-            is_prefill=is_prefill,
-            top_k=CustomDeepseekDBOMoE.top_k,
-            enable_force_load_balance=enable_force_load_balance,
-        ) * self.routed_scaling_factor
-
-        if self.n_shared_experts is not None:
-            shared_output = self.shared_experts(old_hidden_states)
-
-        if shared_output is not None:
-            hidden_states = hidden_states + shared_output
-
-        return hidden_states
-
     # ----------------------------------------- TBO-related --------------------------------------------
     def _forward_ms_op_shared_expert(
         self,