File tree Expand file tree Collapse file tree 3 files changed +3
-7
lines changed
csrc/deepep/ops/op_kernel Expand file tree Collapse file tree 3 files changed +3
-7
lines changed Original file line number Diff line number Diff line change 55#include " kernel_tiling/kernel_tiling.h"
66#include " moe_distribute_base.h"
77#include " cam_moe_combine_normal_tiling.h"
8- #include " comm_args.h"
98
109namespace CamMoeCombineNormalImpl {
1110constexpr uint32_t RANK_ID_OFFSET_IN_SRC_INFO = 0U ;
@@ -64,7 +63,7 @@ class CamMoeCombineNormal
6463 } else {
6564 bufferAddr = (GM_ADDR)((HcclRankRelationResV2 *)epWinContext_->remoteRes [rankId].nextDevicePtr )->windowsIn ;
6665 }
67- return (GM_ADDR)(bufferAddr + winDataSizeOffset_ + Moe::NOTIFY_DISPATCH_BUFF_OFFSET );
66+ return (GM_ADDR)(bufferAddr + winDataSizeOffset_);
6867 }
6968
7069 __aicore__ GM_ADDR GetBufferAddrByRankId (const int32_t rankId)
Original file line number Diff line number Diff line change 55#include " kernel_tiling/kernel_tiling.h"
66#include " moe_distribute_base.h"
77#include " cam_moe_dispatch_normal_tiling.h"
8- #include " comm_args.h"
98
109namespace CamMoeDispatchNormalImpl {
1110constexpr uint8_t BUFFER_NUM = 2 ;
@@ -60,11 +59,10 @@ class CamMoeDispatchNormal
6059 {
6160 uint32_t curRankId = ((ctxIdx == COMM_EP_IDX) ? epRankId : tpRankId);
6261 if (curRankId == rankId) {
63- return (GM_ADDR)(winContext_[ctxIdx]->localWindowsIn ) + winDataSizeOffset + COMBINE_STATE_WIN_OFFSET +
64- Moe::NOTIFY_DISPATCH_BUFF_OFFSET;
62+ return (GM_ADDR)(winContext_[ctxIdx]->localWindowsIn ) + winDataSizeOffset + COMBINE_STATE_WIN_OFFSET;
6563 }
6664 return (GM_ADDR)(((HcclRankRelationResV2 *)(winContext_[ctxIdx]->remoteRes [rankId].nextDevicePtr ))->windowsIn ) +
67- winDataSizeOffset + COMBINE_STATE_WIN_OFFSET + Moe::NOTIFY_DISPATCH_BUFF_OFFSET ;
65+ winDataSizeOffset + COMBINE_STATE_WIN_OFFSET;
6866 }
6967
7068 __aicore__ inline GM_ADDR GetWindStateAddrByRankId (uint8_t ctxIdx, const int32_t rankId)
Original file line number Diff line number Diff line change 88namespace Moe {
99constexpr int CAM_MAX_RANK_SIZE = 384 ; // Maximum number of NPU cards supported by the communication library
1010
11- constexpr int64_t NOTIFY_DISPATCH_BUFF_OFFSET = 204UL * 1024UL * 1024UL ;
1211constexpr int64_t IPC_BUFF_MAX_SIZE = 100 * 1024 * 1024 ;
1312constexpr int64_t IPC_DATA_OFFSET = 2 * 1024 * 1024 ; // First 2MB as flag, then 100MB as data storage
1413constexpr int64_t PING_PONG_SIZE = 2 ;
You can’t perform that action at this time.
0 commit comments