We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 1433482 commit 4113c5dCopy full SHA for 4113c5d
paddlenlp/transformers/deepseek_v2/modeling.py
@@ -2238,6 +2238,13 @@ def _init_weights(self, layer):
2238
if isinstance(layer, MoEGate):
2239
kaiming_uniform_(layer.weight, a=math.sqrt(5))
2240
2241
+ moe_grad_group = fleet.get_hybrid_communicate_group().expert_grad_comm_group
2242
+ if moe_grad_group is not None and moe_grad_group.nranks > 1:
2243
+ for p in layer.parameters():
2244
+ if hasattr(p, "color") and "color" in p.color:
2245
+ if p.color["color"] == "moe_expert":
2246
+ paddle.distributed.broadcast(p, src=moe_grad_group.ranks[0], group=moe_grad_group)
2247
+
2248
def step_flex_token(self, cur_step):
2249
set_global_step(cur_step)
2250
0 commit comments