Skip to content

Commit 93298de

Browse files
Revert "Fix the memory verification issue within intranode dispatch (#83)" (#88)
This reverts commit 50903a9.
1 parent 0e59a90 commit 93298de

File tree

3 files changed

+14
-28
lines changed

3 files changed

+14
-28
lines changed

csrc/deepep/ops/op_host/cam_moe_combine_normal_tiling.cc

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,9 @@
1919
#include "graph/utils/type_utils.h"
2020
#include "register/op_def_registry.h"
2121
#include "../op_kernel/cam_moe_combine_normal_tiling.h"
22-
#include "tiling_args.h"
2322

2423
using namespace AscendC;
2524
using namespace ge;
26-
using namespace Moe;
2725

2826
namespace {
2927
class Mc2TilingUtils
@@ -85,6 +83,7 @@ constexpr uint64_t MB_SIZE = 1024UL * 1024UL;
8583
constexpr uint64_t TRIPLE = 3;
8684
constexpr uint64_t WIN_ADDR_ALIGN = 512UL;
8785
constexpr uint64_t SCALE_RECV_IDX_BUFFER = 44UL; // scale32B + 3*4 src info
86+
constexpr uint64_t COMBINE_STATE_WIN_OFFSET = 3U * 1024UL * 1024UL;
8887
constexpr uint64_t DOUBLE_DATA_BUFFER = 2UL;
8988
constexpr uint64_t MAX_OUT_DTYPE_SIZE = 2UL;
9089
constexpr uint64_t UB_ALIGN = 32UL;
@@ -515,17 +514,20 @@ static ge::graphStatus CamMoeCombineNormalA3TilingFuncImpl(gert::TilingContext *
515514
uint64_t maxBs = static_cast<uint64_t>(tilingData->camMoeCombineNormalInfo.globalBs) / epWorldSize;
516515
// combine数据区 token首地址对齐512
517516
uint64_t tokenNeedSizeCombine = ((h * MAX_OUT_DTYPE_SIZE + WIN_ADDR_ALIGN - 1UL) / WIN_ADDR_ALIGN) * WIN_ADDR_ALIGN;
517+
// dispatch数据区 token首对齐512,有效token长度h_align_32b + scale(32b) + 三元组(3*4b)
518+
uint64_t tokenActualLen = ((h * MAX_OUT_DTYPE_SIZE + UB_ALIGN - 1UL) / UB_ALIGN) * UB_ALIGN + SCALE_RECV_IDX_BUFFER;
519+
uint64_t tokenNeedSizeDispatch = ((tokenActualLen + WIN_ADDR_ALIGN - 1UL) / WIN_ADDR_ALIGN) * WIN_ADDR_ALIGN;
518520
uint64_t actualSize =
519-
(maxBs * k * tokenNeedSizeCombine + COMBINE_STATE_WIN_OFFSET + NOTIFY_DISPATCH_WIN_OFFSET) * DOUBLE_DATA_BUFFER;
521+
(maxBs * k * (tokenNeedSizeCombine + tokenNeedSizeDispatch) + COMBINE_STATE_WIN_OFFSET) * DOUBLE_DATA_BUFFER;
520522
OP_TILING_CHECK(
521523
(actualSize > maxWindowSize),
522524
OP_LOGE(nodeName,
523525
"HCCL_BUFFSIZE is too SMALL, maxBs = %lu, h = %lu, epWorldSize = %lu, localMoeExpertNum = %u,"
524-
" tokenNeedSizeCombine = %lu, k = %lu, NEEDED_HCCL_BUFFSIZE("
525-
"((maxBs * k * tokenNeedSizeCombine)) + 3MB + 204MB) * 2) = %luMB, "
526+
" tokenNeedSizeDispatch = %lu, tokenNeedSizeCombine = %lu, k = %lu, NEEDED_HCCL_BUFFSIZE("
527+
"((maxBs * tokenNeedSizeDispatch) + (maxBs * tokenNeedSizeCombine * k) + 3MB) * 2) = %luMB, "
526528
"HCCL_BUFFSIZE=%luMB.",
527-
maxBs, h, epWorldSize, localMoeExpertNum, tokenNeedSizeCombine, k, actualSize / MB_SIZE + 1UL,
528-
maxWindowSize / MB_SIZE),
529+
maxBs, h, epWorldSize, localMoeExpertNum, tokenNeedSizeDispatch, tokenNeedSizeCombine, k,
530+
actualSize / MB_SIZE + 1UL, maxWindowSize / MB_SIZE),
529531
return ge::GRAPH_FAILED);
530532
tilingData->camMoeCombineNormalInfo.totalWinSize = maxWindowSize;
531533

csrc/deepep/ops/op_host/cam_moe_dispatch_normal_tiling.cc

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,9 @@
1818
#include "graph/utils/type_utils.h"
1919
#include "register/op_def_registry.h"
2020
#include "../op_kernel/cam_moe_dispatch_normal_tiling.h"
21-
#include "tiling_args.h"
2221

2322
using namespace AscendC;
2423
using namespace ge;
25-
using namespace Moe;
26-
2724
namespace {
2825
class Mc2TilingUtils
2926
{
@@ -94,7 +91,6 @@ constexpr uint32_t WORKSPACE_ELEMENT_OFFSET = 512;
9491
constexpr int64_t H_MIN = 1024;
9592
constexpr int64_t H_MAX = 7168;
9693
constexpr uint64_t MB_SIZE = 1024UL * 1024UL;
97-
9894
constexpr uint64_t TRIPLE = 3;
9995
constexpr uint64_t WIN_ADDR_ALIGN = 512UL;
10096
constexpr uint64_t SCALE_EXPAND_IDX_BUFFER = 44UL; // scale32B + 3*4expandIdx
@@ -543,18 +539,15 @@ static ge::graphStatus CamMoeDispatchNormalA3TilingFuncImpl(gert::TilingContext
543539
uint64_t tokenActualLen =
544540
((h * MAX_OUT_DTYPE_SIZE + UB_ALIGN - 1UL) / UB_ALIGN) * UB_ALIGN + SCALE_EXPAND_IDX_BUFFER;
545541
uint64_t tokenNeedSizeDispatch = ((tokenActualLen + WIN_ADDR_ALIGN - 1UL) / WIN_ADDR_ALIGN) * WIN_ADDR_ALIGN;
546-
uint64_t tokenNeedSizeCombine = ((h * MAX_OUT_DTYPE_SIZE + WIN_ADDR_ALIGN - 1UL) / WIN_ADDR_ALIGN) * WIN_ADDR_ALIGN;
547542
// 未考虑双流时大小
548-
uint64_t actualSize = (maxBs * k * (tokenNeedSizeCombine + tokenNeedSizeDispatch) + COMBINE_STATE_WIN_OFFSET +
549-
NOTIFY_DISPATCH_WIN_OFFSET) *
550-
DOUBLE_DATA_BUFFER;
543+
uint64_t actualSize = maxBs * k * tokenNeedSizeDispatch * DOUBLE_DATA_BUFFER;
551544
OP_TILING_CHECK((actualSize > maxWindowSize),
552545
OP_LOGE(nodeName,
553546
"HCCL_BUFFSIZE is too SMALL, maxBs = %lu, h = %lu, epWorldSize = %lu,"
554-
" localMoeExpertNum = %u, tokenNeedSizeDispatch = %lu, tokenNeedSizeCombine = %lu,"
555-
" k = %lu, NEEDED_HCCL_BUFFSIZE((maxBs * k * (tokenNeedSizeDispatch"
556-
" + tokenNeedSizeCombine) + 3MB + 204MB) * 2) = %luMB, HCCL_BUFFSIZE=%luMB.",
557-
maxBs, h, epWorldSize, localMoeExpertNum, tokenNeedSizeDispatch, tokenNeedSizeCombine, k,
547+
" localMoeExpertNum = %u, tokenNeedSizeDispatch = %lu,"
548+
" k = %lu, NEEDED_HCCL_BUFFSIZE(maxBs * k * tokenNeedSizeDispatch) = %luMB,"
549+
" HCCL_BUFFSIZE=%luMB.",
550+
maxBs, h, epWorldSize, localMoeExpertNum, tokenNeedSizeDispatch, k,
558551
actualSize / MB_SIZE + 1UL, maxWindowSize / MB_SIZE),
559552
return ge::GRAPH_FAILED);
560553
tilingData->camMoeDispatchNormalInfo.totalWinSize = maxWindowSize;

csrc/deepep/ops/op_host/tiling_args.h

Lines changed: 0 additions & 9 deletions
This file was deleted.

0 commit comments

Comments
 (0)