Skip to content

Commit efd0c8b

Browse files
committed
fix the bug that some gemm config did not be handled
1 parent 6f80ea1 commit efd0c8b

File tree

1 file changed

+35
-28
lines changed

1 file changed

+35
-28
lines changed

csrc/gpu/moe/fused_moe/cutlass_kernels/moe_gemm/fused_moe_gemm_kernels_template.h

Lines changed: 35 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ void generic_moe_gemm_kernelLauncher(const T* A,
6969
cudaStream_t stream,
7070
int* kernel_occupancy = nullptr) {
7171
if (gemm_config.split_k_style != SplitKStyle::NO_SPLIT_K) {
72-
PADDLE_FATAL("[MoeGemm] Grouped gemm does not support split-k");
72+
PADDLE_THROW(
73+
phi::errors::Fatal("[MoeGemm] Grouped gemm does not support split-k"));
7374
}
7475

7576
#ifdef PADDLE_CUDA_BF16
@@ -169,9 +170,9 @@ void generic_moe_gemm_kernelLauncher(const T* A,
169170
int occupancy = std::min(2, GemmGrouped::maximum_active_blocks());
170171

171172
if (occupancy == 0) {
172-
PADDLE_FATAL(
173+
PADDLE_THROW(phi::errors::Fatal(
173174
"[MoE Runner] GPU lacks the shared memory resources to run "
174-
"GroupedGEMM kernel");
175+
"GroupedGEMM kernel"));
175176
}
176177
const int threadblock_count = multi_processor_count * occupancy;
177178

@@ -197,7 +198,7 @@ void generic_moe_gemm_kernelLauncher(const T* A,
197198
if (can_implement != cutlass::Status::kSuccess) {
198199
std::string err_msg = "MoEFC kernel will fail for params. Error: " +
199200
std::string(cutlassGetStatusString(can_implement));
200-
PADDLE_FATAL("[MoE Runner] " + err_msg);
201+
PADDLE_THROW(phi::errors::Fatal("[MoE Runner] " + err_msg));
201202
}
202203

203204
auto init_status = gemm.initialize(args);
@@ -243,7 +244,7 @@ struct dispatch_stages {
243244
std::string err_msg = "Cutlass fpA_intB gemm. Not instantiates for arch " +
244245
std::to_string(arch::kMinComputeCapability) +
245246
" with stages set to " + std::to_string(Stages);
246-
PADDLE_FATAL("[dispatch_stages::dispatch] " + err_msg);
247+
PADDLE_THROW(phi::errors::Fatal("[dispatch_stages::dispatch] " + err_msg));
247248
}
248249
};
249250

@@ -394,7 +395,8 @@ void dispatch_gemm_config(const T* A,
394395
default:
395396
std::string err_msg = "dispatch_gemm_config does not support stages " +
396397
std::to_string(gemm_config.stages);
397-
PADDLE_FATAL("[MoE][dispatch_gemm_config] " + err_msg);
398+
PADDLE_THROW(
399+
phi::errors::Fatal("[MoE][dispatch_gemm_config] " + err_msg));
398400
break;
399401
}
400402
}
@@ -452,17 +454,18 @@ void dispatch_moe_gemm_to_cutlass(const T* A,
452454
dispatch_gemm_config_macro(64, 128, 64, 32, 64, 64);
453455
dispatch_gemm_config_macro(128, 128, 64, 64, 32, 64);
454456
case CutlassTileConfig::Undefined:
455-
PADDLE_FATAL("[dispatch_moe_gemm_to_cutlass] gemm config undefined.");
457+
PADDLE_THROW(common::errors::InvalidArgument(
458+
"[dispatch_moe_gemm_to_cutlass] gemm config undefined."));
456459
break;
457460
case CutlassTileConfig::ChooseWithHeuristic:
458-
PADDLE_FATAL(
461+
PADDLE_THROW(common::errors::InvalidArgument(
459462
"[dispatch_moe_gemm_to_cutlass] gemm config should have "
460-
"already been set by heuristic.");
463+
"already been set by heuristic."));
461464
break;
462465
default:
463-
PADDLE_FATAL(
466+
PADDLE_THROW(common::errors::InvalidArgument(
464467
"[dispatch_moe_gemm_to_cutlass] Config is invalid for same "
465-
"type MoE tensorop GEMM.");
468+
"type MoE tensorop GEMM."));
466469
break;
467470
}
468471
}
@@ -497,40 +500,44 @@ void dispatch_moe_gemm_to_cutlass(const T* A,
497500
dispatch_gemm_config_macro(32, 128, 64, 32, 32, 64);
498501
dispatch_gemm_config_macro(64, 128, 64, 64, 64, 64);
499502
case CutlassTileConfig::Undefined:
500-
PADDLE_FATAL("[dispatch_moe_gemm_to_cutlass] gemm config undefined.");
503+
PADDLE_THROW(common::errors::InvalidArgument(
504+
"[dispatch_moe_gemm_to_cutlass] gemm config undefined."));
501505
break;
502506
case CutlassTileConfig::ChooseWithHeuristic:
503-
PADDLE_FATAL(
507+
PADDLE_THROW(common::errors::InvalidArgument(
504508
"[dispatch_moe_gemm_to_cutlass] gemm config should have "
505-
"already been set by heuristic.");
509+
"already been set by heuristic."));
506510
break;
507511
default:
508-
PADDLE_FATAL(
512+
PADDLE_THROW(common::errors::InvalidArgument(
509513
"[dispatch_moe_gemm_to_cutlass] Config is invalid for "
510-
"mixed type tensorop GEMM.");
514+
"mixed type tensorop GEMM."));
511515
break;
512516
}
513517
} else {
514518
switch (gemm_config.tile_config) {
515519
dispatch_gemm_config_macro(16, 128, 64, 16, 32, 64);
520+
dispatch_gemm_config_macro(16, 256, 64, 16, 64, 64);
521+
dispatch_gemm_config_macro(64, 64, 64, 32, 32, 64);
516522
dispatch_gemm_config_macro(32, 128, 64, 32, 32, 64);
523+
dispatch_gemm_config_macro(128, 64, 64, 64, 32, 64);
517524
dispatch_gemm_config_macro(64, 128, 64, 64, 64, 64);
518525
dispatch_gemm_config_macro(128, 128, 64, 64, 64, 64);
519526
dispatch_gemm_config_macro(128, 128, 64, 128, 32, 64);
520527
dispatch_gemm_config_macro(128, 256, 64, 64, 64, 64);
521528
dispatch_gemm_config_macro(64, 128, 64, 64, 32, 64);
522529
case CutlassTileConfig::Undefined:
523-
PADDLE_FATAL("[dispatch_moe_gemm_to_cutlass] gemm config undefined.");
530+
PADDLE_THROW(common::errors::InvalidArgument(
531+
"[dispatch_moe_gemm_to_cutlass] gemm config undefined."));
524532
break;
525533
case CutlassTileConfig::ChooseWithHeuristic:
526-
PADDLE_FATAL(
534+
PADDLE_THROW(common::errors::InvalidArgument(
527535
"[dispatch_moe_gemm_to_cutlass] gemm config should have "
528-
"already been set by heuristic.");
536+
"already been set by heuristic."));
529537
break;
530538
default:
531-
PADDLE_FATAL(
532-
"[dispatch_moe_gemm_to_cutlass] Config is invalid for "
533-
"mixed type tensorop GEMM.");
539+
PADDLE_THROW(common::errors::InvalidArgument(
540+
"[dispatch_moe_gemm_to_cutlass] gemm config undefined."));
534541
break;
535542
}
536543
}
@@ -561,19 +568,19 @@ void dispatch_moe_gemm_to_cutlass(const T* A,
561568
switch (gemm_config.tile_config) {
562569
dispatch_gemm_config_macro(128, 128, 8, 64, 64, 8);
563570
case CutlassTileConfig::Undefined:
564-
PADDLE_FATAL(
571+
PADDLE_THROW(common::errors::InvalidArgument(
565572
"[dispatch_moe_gemm_to_cutlass][SIMT] gemm config "
566-
"undefined.");
573+
"undefined."));
567574
break;
568575
case CutlassTileConfig::ChooseWithHeuristic:
569-
PADDLE_FATAL(
576+
PADDLE_THROW(common::errors::InvalidArgument(
570577
"[dispatch_moe_gemm_to_cutlass][SIMT] gemm config should "
571-
"have already been set by heuristic.");
578+
"have already been set by heuristic."));
572579
break;
573580
default:
574-
PADDLE_FATAL(
581+
PADDLE_THROW(common::errors::InvalidArgument(
575582
"[dispatch_moe_gemm_to_cutlass][SIMT] Unsupported config "
576-
"for float MoE gemm.");
583+
"for float MoE gemm."));
577584
break;
578585
}
579586
}

0 commit comments

Comments
 (0)