Skip to content

Commit de8a625

Browse files
authored
make e2e tests a bit faster by reducing test split size (axolotl-ai-cloud#2522) [skip ci]
* [ci] make e2e tests a bit faster by reducing test split size * use 10% split of alpaca dataset to speed up dataset loading/tokenization * reduce gas 4->2 for most e2e tests * increase val set size for packing
1 parent 51267de commit de8a625

24 files changed

+54
-44
lines changed

tests/e2e/integrations/test_cut_cross_entropy.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def min_cfg(temp_dir):
2525
],
2626
"cut_cross_entropy": True,
2727
"sequence_len": 1024,
28-
"val_set_size": 0.1,
28+
"val_set_size": 0.02,
2929
"special_tokens": {
3030
"pad_token": "<|endoftext|>",
3131
},
@@ -79,7 +79,7 @@ def test_qwen2_w_cce(self, temp_dir):
7979
],
8080
"cut_cross_entropy": True,
8181
"sequence_len": 1024,
82-
"val_set_size": 0.1,
82+
"val_set_size": 0.02,
8383
"special_tokens": {
8484
"pad_token": "<|endoftext|>",
8585
},

tests/e2e/multigpu/solo/test_flex.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def test_loss_llama(self, temp_dir):
5555
],
5656
"num_epochs": 1,
5757
"micro_batch_size": 2,
58-
"gradient_accumulation_steps": 4,
58+
"gradient_accumulation_steps": 2,
5959
"output_dir": temp_dir,
6060
"learning_rate": 0.00001,
6161
"optimizer": "adamw_torch_fused",

tests/e2e/multigpu/test_llama.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -58,12 +58,13 @@ def test_lora_ddp(self, temp_dir):
5858
{
5959
"path": "tatsu-lab/alpaca",
6060
"type": "alpaca",
61+
"split": "train[:10%]",
6162
},
6263
],
6364
"num_epochs": 1,
6465
"max_steps": 2,
6566
"micro_batch_size": 1,
66-
"gradient_accumulation_steps": 4,
67+
"gradient_accumulation_steps": 2,
6768
# "gradient_checkpointing": True,
6869
"output_dir": temp_dir,
6970
"learning_rate": 0.00001,
@@ -201,7 +202,7 @@ def test_dpo_lora_ddp(self, temp_dir):
201202
"num_epochs": 1,
202203
"max_steps": 2,
203204
"micro_batch_size": 2,
204-
"gradient_accumulation_steps": 4,
205+
"gradient_accumulation_steps": 2,
205206
# "gradient_checkpointing": True,
206207
"output_dir": temp_dir,
207208
"warmup_steps": 0,
@@ -279,7 +280,7 @@ def test_dpo_qlora_ddp(self, temp_dir):
279280
"num_epochs": 1,
280281
"max_steps": 2,
281282
"micro_batch_size": 2,
282-
"gradient_accumulation_steps": 4,
283+
"gradient_accumulation_steps": 2,
283284
# "gradient_checkpointing": True,
284285
"output_dir": temp_dir,
285286
"warmup_steps": 0,
@@ -335,6 +336,7 @@ def test_fsdp(self, temp_dir, gradient_accumulation_steps):
335336
{
336337
"path": "tatsu-lab/alpaca",
337338
"type": "alpaca",
339+
"split": "train[:10%]",
338340
},
339341
],
340342
"num_epochs": 1,
@@ -398,14 +400,15 @@ def test_fsdp_packed(self, temp_dir, fsdp_state_dict_type):
398400
"sample_packing": True,
399401
"pad_to_sequence_len": True,
400402
"sequence_len": 1024,
401-
"val_set_size": 0.01,
403+
"val_set_size": 0.05,
402404
"special_tokens": {
403405
"pad_token": "<|endoftext|>",
404406
},
405407
"datasets": [
406408
{
407409
"path": "tatsu-lab/alpaca",
408410
"type": "alpaca",
411+
"split": "train[:10%]",
409412
},
410413
],
411414
"num_epochs": 1,
@@ -484,6 +487,7 @@ def test_fsdp2_packed(
484487
{
485488
"path": "tatsu-lab/alpaca",
486489
"type": "alpaca",
490+
"split": "train[:10%]",
487491
},
488492
],
489493
"num_epochs": 1,
@@ -565,7 +569,7 @@ def test_fsdp_qlora_prequant_packed(self, temp_dir):
565569
{
566570
"path": "tatsu-lab/alpaca",
567571
"type": "alpaca",
568-
"split": "train[:25%]",
572+
"split": "train[:10%]",
569573
},
570574
],
571575
"num_epochs": 1,
@@ -660,14 +664,15 @@ def test_ds_zero3_packed(
660664
"sample_packing": True,
661665
"pad_to_sequence_len": True,
662666
"sequence_len": 1024,
663-
"val_set_size": 0.01,
667+
"val_set_size": 0.05,
664668
"special_tokens": {
665669
"pad_token": "<|endoftext|>",
666670
},
667671
"datasets": [
668672
{
669673
"path": "tatsu-lab/alpaca",
670674
"type": "alpaca",
675+
"split": "train[:10%]",
671676
},
672677
],
673678
"num_epochs": 1,
@@ -741,6 +746,7 @@ def test_ds_zero2_packed(self, temp_dir, gradient_accumulation_steps, qlora):
741746
{
742747
"path": "tatsu-lab/alpaca",
743748
"type": "alpaca",
749+
"split": "train[:10%]",
744750
},
745751
],
746752
"num_epochs": 1,
@@ -814,6 +820,7 @@ def test_ds_zero1_packed(self, temp_dir, gradient_accumulation_steps, qlora):
814820
{
815821
"path": "tatsu-lab/alpaca",
816822
"type": "alpaca",
823+
"split": "train[:10%]",
817824
},
818825
],
819826
"num_epochs": 1,

tests/e2e/multigpu/test_ray.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ def test_lora_ddp(self, temp_dir):
4545
{
4646
"path": "tatsu-lab/alpaca",
4747
"type": "alpaca",
48+
"split": "train[:10%]",
4849
},
4950
],
5051
"num_epochs": 1,
@@ -103,6 +104,7 @@ def test_ds_zero2_packed(self, temp_dir, gradient_accumulation_steps):
103104
{
104105
"path": "tatsu-lab/alpaca",
105106
"type": "alpaca",
107+
"split": "train[:10%]",
106108
},
107109
],
108110
"num_epochs": 1,

tests/e2e/multigpu/test_sp.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ def test_sequence_parallel_training(self, temp_dir):
4040
{
4141
"path": "tatsu-lab/alpaca",
4242
"type": "alpaca",
43+
"split": "train[:10%]",
4344
},
4445
],
4546
"num_epochs": 1,

tests/e2e/patched/test_llama_s2_attention.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def test_lora_s2_attn(self, temp_dir):
4343
"lora_alpha": 16,
4444
"lora_dropout": 0.05,
4545
"lora_target_linear": True,
46-
"val_set_size": 0.1,
46+
"val_set_size": 0.02,
4747
"special_tokens": {},
4848
"datasets": [
4949
{
@@ -83,7 +83,7 @@ def test_fft_s2_attn(self, temp_dir):
8383
"sample_packing": False,
8484
"flash_attention": True,
8585
"s2_attention": True,
86-
"val_set_size": 0.1,
86+
"val_set_size": 0.02,
8787
"special_tokens": {},
8888
"datasets": [
8989
{

tests/e2e/patched/test_model_patches.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def test_mixtral_multipack(self, temp_dir):
2727
"flash_attention": True,
2828
"sample_packing": True,
2929
"sequence_len": 2048,
30-
"val_set_size": 0.1,
30+
"val_set_size": 0.02,
3131
"special_tokens": {},
3232
"datasets": [
3333
{
@@ -59,7 +59,7 @@ def test_mistral_multipack(self, temp_dir):
5959
"flash_attention": True,
6060
"sample_packing": True,
6161
"sequence_len": 2048,
62-
"val_set_size": 0.1,
62+
"val_set_size": 0.02,
6363
"special_tokens": {},
6464
"datasets": [
6565
{

tests/e2e/patched/test_phi_multipack.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ def test_qlora_packed(self, temp_dir):
8888
"lora_alpha": 32,
8989
"lora_dropout": 0.05,
9090
"lora_target_linear": True,
91-
"val_set_size": 0.1,
91+
"val_set_size": 0.02,
9292
"special_tokens": {
9393
"pad_token": "<|endoftext|>",
9494
},

tests/e2e/solo/test_flex.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def test_loss_llama(self, temp_dir):
4747
],
4848
"num_epochs": 1,
4949
"micro_batch_size": 2,
50-
"gradient_accumulation_steps": 4,
50+
"gradient_accumulation_steps": 2,
5151
"output_dir": temp_dir,
5252
"learning_rate": 0.00001,
5353
"optimizer": "adamw_torch_fused",

tests/e2e/test_deepseekv3.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ def test_lora_deepseekv3(self, temp_dir, sample_packing):
6565
"chat_template": "deepseek_v3",
6666
"num_epochs": 1,
6767
"micro_batch_size": 1,
68-
"gradient_accumulation_steps": 4,
68+
"gradient_accumulation_steps": 2,
6969
"output_dir": temp_dir,
7070
"learning_rate": 0.00001,
7171
"optimizer": "adamw_bnb_8bit",
@@ -115,7 +115,7 @@ def test_fft_deepseekv3(self, temp_dir, sample_packing):
115115
},
116116
"num_epochs": 1,
117117
"micro_batch_size": 1,
118-
"gradient_accumulation_steps": 4,
118+
"gradient_accumulation_steps": 2,
119119
"output_dir": temp_dir,
120120
"learning_rate": 0.00001,
121121
"optimizer": "adamw_bnb_8bit",

0 commit comments

Comments
 (0)