lint

jiahanc · jiahanc · commit fccb4d0495b8 · 2025-10-29T14:09:56.000-07:00
Signed-off-by: jiahanc &lt;173873397+jiahanc@users.noreply.github.com&gt;
diff --git a/vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py b/vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py
@@ -38,11 +38,7 @@ def flashinfer_fused_moe_blockscale_fp8(
     topk_group = topk_group if topk_group is not None else 0
     assert top_k <= global_num_experts
     assert top_k <= 10
-    # assert topk_group <= 4
-    # assert global_num_experts > num_expert_group
-    # assert global_num_experts % num_expert_group == 0
     assert global_num_experts % 4 == 0
-    # assert top_k < (topk_group * global_num_experts / num_expert_group)
     assert block_shape == [128, 128]
     # Routing kernel expects #experts <= #threads 512
     assert global_num_experts <= 512
diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py
@@ -1214,15 +1214,10 @@ def apply(
             assert activation == "silu", (
                 f"Expected 'silu' activation but got {activation}"
             )
-            # assert scoring_func == "sigmoid", (
-            #     f"Expected 'sigmoid' scoring func but got {scoring_func}"
-            # )
+
             if self.block_quant:
                 import vllm.model_executor.layers.fused_moe.flashinfer_trtllm_moe  # noqa: E501, F401
 
-                # assert (
-                #     renormalize and use_grouped_topk and custom_routing_function is None
-                # )
                 e_score_correction_bias = (
                     e_score_correction_bias.to(x.dtype)
                     if e_score_correction_bias is not None
@@ -1296,10 +1291,6 @@ def apply(
         # can override fused_experts or cutlass but not rocm or marlin.
         #
         topk_weights, topk_ids, zero_expert_result = select_result
-        # if (topk_ids.shape[0] <100):
-        #     print("=== MoE Routing Results ===")
-        #     print(f"topk_ids: {topk_ids}")
-        #     print(f"topk_weights: {topk_weights}")
         if self.rocm_aiter_moe_enabled:
             from vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe import (  # noqa: E501
                 rocm_aiter_fused_experts,
diff --git a/vllm/model_executor/models/qwen3_next.py b/vllm/model_executor/models/qwen3_next.py
@@ -206,9 +206,6 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
 >>>>>>> 9d88f1762 (update work)
 
         if self.shared_expert is not None:
-            # if ("model.layers.0." in self.prefix_print or "model.layers.1." in self.prefix_print or "model.layers.47." in self.prefix_print):
-            #     print(f"shared_expert: {final_hidden_states[0]}")
-            #     print(f"routed_expert: {final_hidden_states[1]}")
             final_hidden_states = final_hidden_states[0] + final_hidden_states[1]
 
         if self.is_sequence_parallel: