Skip to content

Commit 4b051fe

Browse files
authored
Merge pull request #35 from foundation-model-stack/model_consistency_testing
fixed bug in test_model_expectations compilation and updated models to test
2 parents 2f07090 + cef617b commit 4b051fe

15 files changed

+47
-29
lines changed

tests/models/test_model_expectations.py

Lines changed: 41 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
from fms.models import get_model
2+
from fms.utils.generation import pad_input_ids
3+
from fms.utils.tokenizers import get_tokenizer
24
import pytest
5+
from aiu_fms_testing_utils.utils import sample_squad_v2_qa_requests
36
import torch
47

58
from fms.testing._internal.model_test_suite import (
@@ -8,71 +11,90 @@
811
)
912
import os
1013

14+
os.environ["COMPILATION_MODE"] = "offline"
15+
1116
if "HF_HOME" not in os.environ:
1217
os.environ["HF_HOME"] = "/tmp/models/hf_cache"
1318

1419
model_dir = os.environ.get("FMS_TESTING_MODEL_DIR", "/tmp/models")
15-
LLAMA_194M = f"{model_dir}/llama-194m"
16-
GRANITE_7B_BASE = f"{model_dir}/granite-7b-base"
17-
GRANITE_8B_CODE_BASE = f"{model_dir}/granite-8b-code-base"
18-
GRANITE_3_8B_CODE_BASE = f"{model_dir}/granite-3-8b-base"
20+
LLAMA_3p1_8B_INSTRUCT = "meta-llama/Llama-3.1-8B-Instruct"
21+
GRANITE_3p2_8B_INSTRUCT = "ibm-granite/granite-3.2-8b-instruct"
22+
ROBERTA_SQUAD_v2 = "deepset/roberta-base-squad2"
23+
torch.manual_seed(42)
1924

20-
models = [LLAMA_194M, GRANITE_7B_BASE, GRANITE_8B_CODE_BASE, GRANITE_3_8B_CODE_BASE]
21-
mini_models = {LLAMA_194M, GRANITE_7B_BASE, GRANITE_8B_CODE_BASE, GRANITE_3_8B_CODE_BASE}
25+
micro_models = {LLAMA_3p1_8B_INSTRUCT, GRANITE_3p2_8B_INSTRUCT}
2226

23-
class AIUModelFixtureMixin(ModelFixtureMixin):
2427

28+
class AIUModelFixtureMixin(ModelFixtureMixin):
2529
@pytest.fixture(scope="class", autouse=True)
2630
def uninitialized_model(self, model_id):
27-
if model_id in mini_models:
31+
if model_id in micro_models:
2832
get_model_kwargs = {"architecture": "hf_configured", "nlayers": 3}
2933
else:
3034
get_model_kwargs = {"architecture": "hf_pretrained"}
3135

3236
aiu_model = get_model(
3337
variant=model_id,
3438
device_type="cpu",
35-
unfuse_weights=True,
36-
**get_model_kwargs
39+
fused_weights=False,
40+
data_type=torch.float16,
41+
**get_model_kwargs,
3742
)
38-
torch.compile(aiu_model, backend="sendnn")
43+
3944
return aiu_model
4045

41-
class TestAIUModels(
46+
@pytest.fixture(scope="class", autouse=True)
47+
def model(self, uninitialized_model):
48+
# we want to use reset parameter initialization here rather than the default random initialization
49+
uninitialized_model.eval()
50+
torch.set_grad_enabled(False)
51+
uninitialized_model.compile(backend="sendnn")
52+
return uninitialized_model
53+
54+
55+
decoder_models = [LLAMA_3p1_8B_INSTRUCT, GRANITE_3p2_8B_INSTRUCT]
56+
57+
58+
class TestAIUDecoderModels(
4259
ModelConsistencyTestSuite,
4360
AIUModelFixtureMixin,
4461
):
45-
4662
# x is the main parameter for this model which is the input tensor
4763
_get_signature_params = ["x"]
64+
_get_signature_input_ids, _get_signature_optional_params = pad_input_ids(
65+
[torch.arange(start=5, end=65, dtype=torch.int64)], min_pad_length=64
66+
)
4867

49-
@pytest.fixture(scope="class", autouse=True, params=models)
68+
@pytest.fixture(scope="class", autouse=True, params=decoder_models)
5069
def model_id(self, request):
5170
return request.param
5271

5372
def test_model_unfused(self, model, signature):
5473
pytest.skip("All AIU models are already unfused")
5574

5675

57-
ROBERTA_SQUAD_v2 = "deepset/roberta-base-squad2"
5876
tuple_output_models = [ROBERTA_SQUAD_v2]
5977

78+
6079
class TestAIUModelsTupleOutput(
6180
ModelConsistencyTestSuite,
6281
AIUModelFixtureMixin,
6382
):
64-
6583
# x is the main parameter for this model which is the input tensor
6684
_get_signature_params = ["x"]
85+
_get_signature_input_ids, _get_signature_optional_params = pad_input_ids(
86+
[torch.arange(start=5, end=65, dtype=torch.int64)],
87+
min_pad_length=64,
88+
is_causal_mask=False,
89+
)
6790

6891
@pytest.fixture(scope="class", autouse=True, params=tuple_output_models)
6992
def model_id(self, request):
7093
return request.param
71-
94+
7295
@staticmethod
7396
def _get_signature_logits_getter_fn(f_out) -> torch.Tensor:
7497
return torch.cat([f_out[0], f_out[1]], dim=-1)
75-
98+
7699
def test_model_unfused(self, model, signature):
77100
pytest.skip("All AIU models are already unfused")
78-
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
12.65625,12.65625,12.59375,12.625,8.78125,37.96875,14.5625,16.90625,5.0,13.4375,14.71875,20.6875,11.03125,26.15625,39.53125,8.1875,7.0625,35.03125,6.28125,5.1875,13.25,5.15625,12.96875,8.65625,6.96875,19.375,7.21875,15.78125,14.53125,29.40625,8.5625,9.0625,8.5,1.375,16.21875,18.90625,20.34375,13.8125,8.53125,7.75,16.375,17.96875,7.1875,10.65625,11.625,56.15625,11.96875,5.3125,12.21875,4.1875,7.0625,0.0,10.34375,17.3125,32.84375,40.65625,40.78125,12.84375,8.4375,10.53125,8.5,9.125,8.625,14.34375
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
dec_norm.weight,layers.0.attn.dense.weight,layers.0.attn.in_proj.key.weight,layers.0.attn.in_proj.query.weight,layers.0.attn.in_proj.value.weight,layers.0.ff_ln.weight,layers.0.ff_sub_layer.w1.weight,layers.0.ff_sub_layer.w2.weight,layers.0.ff_sub_layer.wg.weight,layers.0.ln.weight,layers.1.attn.dense.weight,layers.1.attn.in_proj.key.weight,layers.1.attn.in_proj.query.weight,layers.1.attn.in_proj.value.weight,layers.1.ff_ln.weight,layers.1.ff_sub_layer.w1.weight,layers.1.ff_sub_layer.w2.weight,layers.1.ff_sub_layer.wg.weight,layers.1.ln.weight,layers.2.attn.dense.weight,layers.2.attn.in_proj.key.weight,layers.2.attn.in_proj.query.weight,layers.2.attn.in_proj.value.weight,layers.2.ff_ln.weight,layers.2.ff_sub_layer.w1.weight,layers.2.ff_sub_layer.w2.weight,layers.2.ff_sub_layer.wg.weight,layers.2.ln.weight,shared.emb.weight,shared.head.weight
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
0.010009765625,0.010009765625,0.009521484375,0.009521484375,0.0234375,0.024658203125,0.014892578125,0.032958984375,0.01611328125,0.00732421875,0.054443359375,0.011474609375,0.013427734375,0.03173828125,0.016357421875,0.015869140625,0.022705078125,0.0205078125,0.025390625,0.017333984375,0.017333984375,0.005615234375,0.012451171875,0.002685546875,0.04296875,0.011962890625,0.017822265625,0.032470703125,0.00244140625,0.025390625,0.013671875,0.07177734375,0.035888671875,0.026611328125,0.0263671875,0.021240234375,0.0263671875,0.007080078125,0.02978515625,0.033203125,0.028564453125,0.031982421875,0.01318359375,0.0263671875,0.0166015625,0.00927734375,0.04345703125,0.028564453125,0.01416015625,0.041748046875,0.0185546875,0.01611328125,0.0166015625,0.0341796875,0.01220703125,0.0,0.01611328125,0.017578125,0.0146484375,0.031005859375,0.021484375,0.02978515625,0.006103515625,0.032470703125
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
base_model.dec_norm.weight,base_model.embedding.weight,base_model.layers.0.attn.dense.weight,base_model.layers.0.attn.in_proj.key.weight,base_model.layers.0.attn.in_proj.query.weight,base_model.layers.0.attn.in_proj.value.weight,base_model.layers.0.ff_ln.weight,base_model.layers.0.ff_sub_layer.w1.weight,base_model.layers.0.ff_sub_layer.w2.weight,base_model.layers.0.ff_sub_layer.wg.weight,base_model.layers.0.ln.weight,base_model.layers.1.attn.dense.weight,base_model.layers.1.attn.in_proj.key.weight,base_model.layers.1.attn.in_proj.query.weight,base_model.layers.1.attn.in_proj.value.weight,base_model.layers.1.ff_ln.weight,base_model.layers.1.ff_sub_layer.w1.weight,base_model.layers.1.ff_sub_layer.w2.weight,base_model.layers.1.ff_sub_layer.wg.weight,base_model.layers.1.ln.weight,base_model.layers.2.attn.dense.weight,base_model.layers.2.attn.in_proj.key.weight,base_model.layers.2.attn.in_proj.query.weight,base_model.layers.2.attn.in_proj.value.weight,base_model.layers.2.ff_ln.weight,base_model.layers.2.ff_sub_layer.w1.weight,base_model.layers.2.ff_sub_layer.w2.weight,base_model.layers.2.ff_sub_layer.wg.weight,base_model.layers.2.ln.weight,head.weight

tests/resources/expectations/models.test_model_expectations.TestAIUModels.granite-3-8b-base.test_model_output

Lines changed: 0 additions & 1 deletion
This file was deleted.

tests/resources/expectations/models.test_model_expectations.TestAIUModels.granite-3-8b-base.test_model_weight_keys

Lines changed: 0 additions & 1 deletion
This file was deleted.

tests/resources/expectations/models.test_model_expectations.TestAIUModels.granite-7b-base.test_model_output

Lines changed: 0 additions & 1 deletion
This file was deleted.

tests/resources/expectations/models.test_model_expectations.TestAIUModels.granite-7b-base.test_model_weight_keys

Lines changed: 0 additions & 1 deletion
This file was deleted.

tests/resources/expectations/models.test_model_expectations.TestAIUModels.granite-8b-code-base.test_model_output

Lines changed: 0 additions & 1 deletion
This file was deleted.

0 commit comments

Comments
 (0)