We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 8184c95 commit 7a12ae8Copy full SHA for 7a12ae8
sota-implementations/llm/config/grpo.yaml
@@ -37,9 +37,9 @@ inference_model:
37
# Reference model configuration
38
ref_model:
39
quantization:
40
- enabled: true # Enable quantization for memory efficiency
+ enabled: false # Enable quantization for memory efficiency
41
gradient_checkpointing: false # Not needed for reference model
42
- attn_implementation: flex_attention
+ attn_implementation:
43
torch_dtype: bfloat16
44
45
# Policy configuration
0 commit comments