PaddlePaddle · yuanlehome · May 22, 2025 · May 21, 2025 · May 21, 2025
diff --git a/csrc/gpu/moe/fused_moe/cutlass_kernels/moe_gemm/fused_moe_gemm_kernels_template.h b/csrc/gpu/moe/fused_moe/cutlass_kernels/moe_gemm/fused_moe_gemm_kernels_template.h
@@ -69,7 +69,8 @@ void generic_moe_gemm_kernelLauncher(const T* A,
                                      cudaStream_t stream,
                                      int* kernel_occupancy = nullptr) {
   if (gemm_config.split_k_style != SplitKStyle::NO_SPLIT_K) {
-    PADDLE_FATAL("[MoeGemm] Grouped gemm does not support split-k");
+    throw std::runtime_error(
+        "[MoeGemm] Grouped gemm does not support split-k");
   }
 
 #ifdef PADDLE_CUDA_BF16
@@ -169,7 +170,7 @@ void generic_moe_gemm_kernelLauncher(const T* A,
   int occupancy = std::min(2, GemmGrouped::maximum_active_blocks());
 
   if (occupancy == 0) {
-    PADDLE_FATAL(
+    throw std::runtime_error(
         "[MoE Runner] GPU lacks the shared memory resources to run "
         "GroupedGEMM kernel");
   }
@@ -197,7 +198,7 @@ void generic_moe_gemm_kernelLauncher(const T* A,
   if (can_implement != cutlass::Status::kSuccess) {
     std::string err_msg = "MoEFC kernel will fail for params. Error: " +
                           std::string(cutlassGetStatusString(can_implement));
-    PADDLE_FATAL("[MoE Runner] " + err_msg);
+    throw std::runtime_error("[MoE Runner] " + err_msg);
   }
 
   auto init_status = gemm.initialize(args);
@@ -243,7 +244,7 @@ struct dispatch_stages {
     std::string err_msg = "Cutlass fpA_intB gemm. Not instantiates for arch " +
                           std::to_string(arch::kMinComputeCapability) +
                           " with stages set to " + std::to_string(Stages);
-    PADDLE_FATAL("[dispatch_stages::dispatch] " + err_msg);
+    throw std::runtime_error("[dispatch_stages::dispatch] " + err_msg);
   }
 };
 
@@ -394,7 +395,8 @@ void dispatch_gemm_config(const T* A,
     default:
       std::string err_msg = "dispatch_gemm_config does not support stages " +
                             std::to_string(gemm_config.stages);
-      PADDLE_FATAL("[MoE][dispatch_gemm_config] " + err_msg);
+      throw std::runtime_error(
+          "[MoE][dispatch_gemm_config] " + err_msg);
       break;
   }
 }
@@ -452,15 +454,16 @@ void dispatch_moe_gemm_to_cutlass(const T* A,
     dispatch_gemm_config_macro(64, 128, 64, 32, 64, 64);
     dispatch_gemm_config_macro(128, 128, 64, 64, 32, 64);
     case CutlassTileConfig::Undefined:
-      PADDLE_FATAL("[dispatch_moe_gemm_to_cutlass] gemm config undefined.");
+      throw std::runtime_error(
+          "[dispatch_moe_gemm_to_cutlass] gemm config undefined.");
       break;
     case CutlassTileConfig::ChooseWithHeuristic:
-      PADDLE_FATAL(
+      throw std::runtime_error(
           "[dispatch_moe_gemm_to_cutlass] gemm config should have "
           "already been set by heuristic.");
       break;
     default:
-      PADDLE_FATAL(
+      throw std::runtime_error(
           "[dispatch_moe_gemm_to_cutlass] Config is invalid for same "
           "type MoE tensorop GEMM.");
       break;
@@ -497,38 +500,44 @@ void dispatch_moe_gemm_to_cutlass(const T* A,
       dispatch_gemm_config_macro(32, 128, 64, 32, 32, 64);
       dispatch_gemm_config_macro(64, 128, 64, 64, 64, 64);
       case CutlassTileConfig::Undefined:
-        PADDLE_FATAL("[dispatch_moe_gemm_to_cutlass] gemm config undefined.");
+        throw std::runtime_error(
+            "[dispatch_moe_gemm_to_cutlass] gemm config undefined.");
         break;
       case CutlassTileConfig::ChooseWithHeuristic:
-        PADDLE_FATAL(
+        throw std::runtime_error(
             "[dispatch_moe_gemm_to_cutlass] gemm config should have "
             "already been set by heuristic.");
         break;
       default:
-        PADDLE_FATAL(
+        throw std::runtime_error(
             "[dispatch_moe_gemm_to_cutlass] Config is invalid for "
             "mixed type tensorop GEMM.");
         break;
     }
   } else {
     switch (gemm_config.tile_config) {
       dispatch_gemm_config_macro(16, 128, 64, 16, 32, 64);
+      dispatch_gemm_config_macro(16, 256, 64, 16, 64, 64);
+      dispatch_gemm_config_macro(64, 64, 64, 32, 32, 64);
       dispatch_gemm_config_macro(32, 128, 64, 32, 32, 64);
+      dispatch_gemm_config_macro(128, 64, 64, 64, 32, 64);
       dispatch_gemm_config_macro(64, 128, 64, 64, 64, 64);
       dispatch_gemm_config_macro(128, 128, 64, 64, 64, 64);
       dispatch_gemm_config_macro(128, 128, 64, 128, 32, 64);
       dispatch_gemm_config_macro(128, 256, 64, 64, 64, 64);
       dispatch_gemm_config_macro(64, 128, 64, 64, 32, 64);
+      dispatch_gemm_config_macro(256, 128, 64, 64, 64, 64);
       case CutlassTileConfig::Undefined:
-        PADDLE_FATAL("[dispatch_moe_gemm_to_cutlass] gemm config undefined.");
+        throw std::runtime_error(
+            "[dispatch_moe_gemm_to_cutlass] gemm config undefined.");
         break;
       case CutlassTileConfig::ChooseWithHeuristic:
-        PADDLE_FATAL(
+        throw std::runtime_error(
             "[dispatch_moe_gemm_to_cutlass] gemm config should have "
             "already been set by heuristic.");
         break;
       default:
-        PADDLE_FATAL(
+        throw std::runtime_error(
             "[dispatch_moe_gemm_to_cutlass] Config is invalid for "
             "mixed type tensorop GEMM.");
         break;
@@ -561,17 +570,17 @@ void dispatch_moe_gemm_to_cutlass(const T* A,
   switch (gemm_config.tile_config) {
     dispatch_gemm_config_macro(128, 128, 8, 64, 64, 8);
     case CutlassTileConfig::Undefined:
-      PADDLE_FATAL(
+      throw std::runtime_error(
           "[dispatch_moe_gemm_to_cutlass][SIMT] gemm config "
           "undefined.");
       break;
     case CutlassTileConfig::ChooseWithHeuristic:
-      PADDLE_FATAL(
+      throw std::runtime_error(
           "[dispatch_moe_gemm_to_cutlass][SIMT] gemm config should "
           "have already been set by heuristic.");
       break;
     default:
-      PADDLE_FATAL(
+      throw std::runtime_error(
           "[dispatch_moe_gemm_to_cutlass][SIMT] Unsupported config "
           "for float MoE gemm.");
       break;