Skip to content

Commit 4113c5d

Browse files
authored
fix bug of expert init (#10347)
1 parent 1433482 commit 4113c5d

File tree

1 file changed

+7
-0
lines changed

1 file changed

+7
-0
lines changed

paddlenlp/transformers/deepseek_v2/modeling.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2238,6 +2238,13 @@ def _init_weights(self, layer):
22382238
if isinstance(layer, MoEGate):
22392239
kaiming_uniform_(layer.weight, a=math.sqrt(5))
22402240

2241+
moe_grad_group = fleet.get_hybrid_communicate_group().expert_grad_comm_group
2242+
if moe_grad_group is not None and moe_grad_group.nranks > 1:
2243+
for p in layer.parameters():
2244+
if hasattr(p, "color") and "color" in p.color:
2245+
if p.color["color"] == "moe_expert":
2246+
paddle.distributed.broadcast(p, src=moe_grad_group.ranks[0], group=moe_grad_group)
2247+
22412248
def step_flex_token(self, cur_step):
22422249
set_global_step(cur_step)
22432250

0 commit comments

Comments
 (0)