We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 8a8dd82 commit 9d825ebCopy full SHA for 9d825eb
moe_benchmarks/megablocks_yamoe/torch_profile.md
@@ -1026,7 +1026,7 @@ def build_megablocks_model(device: torch.device):
1026
# Attach loaded expert weights to the experts container
1027
e = model.experts
1028
e.alpha = 1.702
1029
- e.capacity_factor = 64
+ e.capacity_factor = 32
1030
e.gate_up_proj = torch.nn.Parameter(gate_up_proj.clone().to(device))
1031
e.gate_up_proj_bias = torch.nn.Parameter(gate_up_proj_bias.clone().to(device))
1032
e.down_proj = torch.nn.Parameter(down_proj.clone().to(device))
0 commit comments