Skip to content

Commit 73bcb07

Browse files
authored
Merge pull request #44 from foundation-model-stack/roberta_expectation_test_token_type_ids
updated roberta squad v2 expectation test
2 parents 942e9dd + 8d8af58 commit 73bcb07

2 files changed

+2
-2
lines changed
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
7.92578125,7.92578125,7.92578125,7.92578125,7.1953125,0.0,1.265625,0.109375,0.421875,1.359375,2.03125,0.40625,0.8671875,0.65625,1.265625,0.734375,1.7578125,0.46875,0.8203125,2.5546875,1.375,1.484375,0.9140625,0.671875,1.1640625,3.1328125,2.46875,3.171875,0.7578125,1.4765625,2.6171875,2.96875,1.3515625,1.3046875,1.0625,2.84375,3.375,1.828125,2.484375,1.703125,1.0703125,1.8359375,1.4375,2.84375,3.9453125,2.6015625,3.65625,1.5546875,2.9375,1.0859375,1.765625,1.8515625,0.90625,2.0234375,2.015625,1.390625,0.7890625,1.375,1.6015625,2.171875,0.9453125,0.7421875,1.3671875,2.1640625,9.0,9.0,9.0,9.0,7.5859375,1.78125,1.0546875,1.453125,0.9765625,1.453125,0.8125,2.8046875,1.3359375,1.4453125,1.4453125,1.3984375,3.1796875,3.3984375,1.359375,2.6171875,1.3515625,2.859375,0.9296875,2.109375,1.6015625,1.3046875,3.90625,0.9140625,2.875,1.90625,1.6953125,2.5390625,2.5625,3.109375,3.046875,2.4296875,2.171875,1.8671875,1.2578125,1.8046875,1.671875,3.1796875,3.328125,3.328125,1.9921875,4.0703125,1.015625,3.21875,1.9921875,0.984375,1.65625,2.578125,1.5390625,0.9453125,1.8359375,0.9140625,2.3046875,1.046875,3.0859375,4.5,1.0625,0.9375,1.3125,4.6328125
1+
7.796875,7.796875,7.796875,7.796875,6.953125,0.0,1.234375,0.03125,0.375,1.3515625,1.9375,0.390625,0.8125,0.6015625,1.2109375,0.6875,1.6953125,0.453125,0.7734375,2.421875,1.3359375,1.4609375,0.84375,0.640625,1.1328125,3.0,2.3828125,3.0234375,0.734375,1.4453125,2.53125,2.8203125,1.3046875,1.203125,1.0234375,2.75,3.234375,1.734375,2.359375,1.6171875,1.0546875,1.75,1.390625,2.734375,3.796875,2.546875,3.4921875,1.53125,2.8125,1.0234375,1.7265625,1.8046875,0.8828125,1.921875,1.9140625,1.34375,0.78125,1.3203125,1.5234375,2.0546875,0.890625,0.6640625,1.28125,2.0546875,8.875,8.875,8.875,8.875,7.359375,1.6953125,1.015625,1.3828125,0.9609375,1.40625,0.7734375,2.6875,1.2578125,1.375,1.3828125,1.2890625,3.0703125,3.296875,1.2890625,2.53125,1.359375,2.78125,0.921875,2.0390625,1.546875,1.265625,3.796875,0.8515625,2.7890625,1.8515625,1.6640625,2.4296875,2.5,2.9140625,2.90625,2.3515625,2.1171875,1.8046875,1.203125,1.7734375,1.59375,3.0859375,3.21875,3.1875,1.8984375,3.9453125,0.9375,3.140625,1.9765625,0.9296875,1.65625,2.515625,1.484375,0.921875,1.796875,0.9140625,2.25,1.0234375,2.9921875,4.34375,1.0234375,0.875,1.21875,4.4296875
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
base_model.embedding.weight,base_model.enc_norm.bias,base_model.enc_norm.weight,base_model.layers.0.attn.dense.bias,base_model.layers.0.attn.dense.weight,base_model.layers.0.attn.in_proj.key.bias,base_model.layers.0.attn.in_proj.key.weight,base_model.layers.0.attn.in_proj.query.bias,base_model.layers.0.attn.in_proj.query.weight,base_model.layers.0.attn.in_proj.value.bias,base_model.layers.0.attn.in_proj.value.weight,base_model.layers.0.ff_ln.bias,base_model.layers.0.ff_ln.weight,base_model.layers.0.ff_sub_layer.w1.bias,base_model.layers.0.ff_sub_layer.w1.weight,base_model.layers.0.ff_sub_layer.w2.bias,base_model.layers.0.ff_sub_layer.w2.weight,base_model.layers.0.ln.bias,base_model.layers.0.ln.weight,base_model.layers.1.attn.dense.bias,base_model.layers.1.attn.dense.weight,base_model.layers.1.attn.in_proj.key.bias,base_model.layers.1.attn.in_proj.key.weight,base_model.layers.1.attn.in_proj.query.bias,base_model.layers.1.attn.in_proj.query.weight,base_model.layers.1.attn.in_proj.value.bias,base_model.layers.1.attn.in_proj.value.weight,base_model.layers.1.ff_ln.bias,base_model.layers.1.ff_ln.weight,base_model.layers.1.ff_sub_layer.w1.bias,base_model.layers.1.ff_sub_layer.w1.weight,base_model.layers.1.ff_sub_layer.w2.bias,base_model.layers.1.ff_sub_layer.w2.weight,base_model.layers.1.ln.bias,base_model.layers.1.ln.weight,base_model.layers.10.attn.dense.bias,base_model.layers.10.attn.dense.weight,base_model.layers.10.attn.in_proj.key.bias,base_model.layers.10.attn.in_proj.key.weight,base_model.layers.10.attn.in_proj.query.bias,base_model.layers.10.attn.in_proj.query.weight,base_model.layers.10.attn.in_proj.value.bias,base_model.layers.10.attn.in_proj.value.weight,base_model.layers.10.ff_ln.bias,base_model.layers.10.ff_ln.weight,base_model.layers.10.ff_sub_layer.w1.bias,base_model.layers.10.ff_sub_layer.w1.weight,base_model.layers.10.ff_sub_layer.w2.bias,base_model.layers.10.ff_sub_layer.w2.weight,base_model.layers.10.ln.bias,base_model.layers.10.ln.weight,base_model.layers.11.attn.dense.bias,base_model.layers.11.attn.dense.weight,base_model.layers.11.attn.in_proj.key.bias,base_model.layers.11.attn.in_proj.key.weight,base_model.layers.11.attn.in_proj.query.bias,base_model.layers.11.attn.in_proj.query.weight,base_model.layers.11.attn.in_proj.value.bias,base_model.layers.11.attn.in_proj.value.weight,base_model.layers.11.ff_ln.bias,base_model.layers.11.ff_ln.weight,base_model.layers.11.ff_sub_layer.w1.bias,base_model.layers.11.ff_sub_layer.w1.weight,base_model.layers.11.ff_sub_layer.w2.bias,base_model.layers.11.ff_sub_layer.w2.weight,base_model.layers.11.ln.bias,base_model.layers.11.ln.weight,base_model.layers.2.attn.dense.bias,base_model.layers.2.attn.dense.weight,base_model.layers.2.attn.in_proj.key.bias,base_model.layers.2.attn.in_proj.key.weight,base_model.layers.2.attn.in_proj.query.bias,base_model.layers.2.attn.in_proj.query.weight,base_model.layers.2.attn.in_proj.value.bias,base_model.layers.2.attn.in_proj.value.weight,base_model.layers.2.ff_ln.bias,base_model.layers.2.ff_ln.weight,base_model.layers.2.ff_sub_layer.w1.bias,base_model.layers.2.ff_sub_layer.w1.weight,base_model.layers.2.ff_sub_layer.w2.bias,base_model.layers.2.ff_sub_layer.w2.weight,base_model.layers.2.ln.bias,base_model.layers.2.ln.weight,base_model.layers.3.attn.dense.bias,base_model.layers.3.attn.dense.weight,base_model.layers.3.attn.in_proj.key.bias,base_model.layers.3.attn.in_proj.key.weight,base_model.layers.3.attn.in_proj.query.bias,base_model.layers.3.attn.in_proj.query.weight,base_model.layers.3.attn.in_proj.value.bias,base_model.layers.3.attn.in_proj.value.weight,base_model.layers.3.ff_ln.bias,base_model.layers.3.ff_ln.weight,base_model.layers.3.ff_sub_layer.w1.bias,base_model.layers.3.ff_sub_layer.w1.weight,base_model.layers.3.ff_sub_layer.w2.bias,base_model.layers.3.ff_sub_layer.w2.weight,base_model.layers.3.ln.bias,base_model.layers.3.ln.weight,base_model.layers.4.attn.dense.bias,base_model.layers.4.attn.dense.weight,base_model.layers.4.attn.in_proj.key.bias,base_model.layers.4.attn.in_proj.key.weight,base_model.layers.4.attn.in_proj.query.bias,base_model.layers.4.attn.in_proj.query.weight,base_model.layers.4.attn.in_proj.value.bias,base_model.layers.4.attn.in_proj.value.weight,base_model.layers.4.ff_ln.bias,base_model.layers.4.ff_ln.weight,base_model.layers.4.ff_sub_layer.w1.bias,base_model.layers.4.ff_sub_layer.w1.weight,base_model.layers.4.ff_sub_layer.w2.bias,base_model.layers.4.ff_sub_layer.w2.weight,base_model.layers.4.ln.bias,base_model.layers.4.ln.weight,base_model.layers.5.attn.dense.bias,base_model.layers.5.attn.dense.weight,base_model.layers.5.attn.in_proj.key.bias,base_model.layers.5.attn.in_proj.key.weight,base_model.layers.5.attn.in_proj.query.bias,base_model.layers.5.attn.in_proj.query.weight,base_model.layers.5.attn.in_proj.value.bias,base_model.layers.5.attn.in_proj.value.weight,base_model.layers.5.ff_ln.bias,base_model.layers.5.ff_ln.weight,base_model.layers.5.ff_sub_layer.w1.bias,base_model.layers.5.ff_sub_layer.w1.weight,base_model.layers.5.ff_sub_layer.w2.bias,base_model.layers.5.ff_sub_layer.w2.weight,base_model.layers.5.ln.bias,base_model.layers.5.ln.weight,base_model.layers.6.attn.dense.bias,base_model.layers.6.attn.dense.weight,base_model.layers.6.attn.in_proj.key.bias,base_model.layers.6.attn.in_proj.key.weight,base_model.layers.6.attn.in_proj.query.bias,base_model.layers.6.attn.in_proj.query.weight,base_model.layers.6.attn.in_proj.value.bias,base_model.layers.6.attn.in_proj.value.weight,base_model.layers.6.ff_ln.bias,base_model.layers.6.ff_ln.weight,base_model.layers.6.ff_sub_layer.w1.bias,base_model.layers.6.ff_sub_layer.w1.weight,base_model.layers.6.ff_sub_layer.w2.bias,base_model.layers.6.ff_sub_layer.w2.weight,base_model.layers.6.ln.bias,base_model.layers.6.ln.weight,base_model.layers.7.attn.dense.bias,base_model.layers.7.attn.dense.weight,base_model.layers.7.attn.in_proj.key.bias,base_model.layers.7.attn.in_proj.key.weight,base_model.layers.7.attn.in_proj.query.bias,base_model.layers.7.attn.in_proj.query.weight,base_model.layers.7.attn.in_proj.value.bias,base_model.layers.7.attn.in_proj.value.weight,base_model.layers.7.ff_ln.bias,base_model.layers.7.ff_ln.weight,base_model.layers.7.ff_sub_layer.w1.bias,base_model.layers.7.ff_sub_layer.w1.weight,base_model.layers.7.ff_sub_layer.w2.bias,base_model.layers.7.ff_sub_layer.w2.weight,base_model.layers.7.ln.bias,base_model.layers.7.ln.weight,base_model.layers.8.attn.dense.bias,base_model.layers.8.attn.dense.weight,base_model.layers.8.attn.in_proj.key.bias,base_model.layers.8.attn.in_proj.key.weight,base_model.layers.8.attn.in_proj.query.bias,base_model.layers.8.attn.in_proj.query.weight,base_model.layers.8.attn.in_proj.value.bias,base_model.layers.8.attn.in_proj.value.weight,base_model.layers.8.ff_ln.bias,base_model.layers.8.ff_ln.weight,base_model.layers.8.ff_sub_layer.w1.bias,base_model.layers.8.ff_sub_layer.w1.weight,base_model.layers.8.ff_sub_layer.w2.bias,base_model.layers.8.ff_sub_layer.w2.weight,base_model.layers.8.ln.bias,base_model.layers.8.ln.weight,base_model.layers.9.attn.dense.bias,base_model.layers.9.attn.dense.weight,base_model.layers.9.attn.in_proj.key.bias,base_model.layers.9.attn.in_proj.key.weight,base_model.layers.9.attn.in_proj.query.bias,base_model.layers.9.attn.in_proj.query.weight,base_model.layers.9.attn.in_proj.value.bias,base_model.layers.9.attn.in_proj.value.weight,base_model.layers.9.ff_ln.bias,base_model.layers.9.ff_ln.weight,base_model.layers.9.ff_sub_layer.w1.bias,base_model.layers.9.ff_sub_layer.w1.weight,base_model.layers.9.ff_sub_layer.w2.bias,base_model.layers.9.ff_sub_layer.w2.weight,base_model.layers.9.ln.bias,base_model.layers.9.ln.weight,base_model.position_embedding.weight,qa_head.bias,qa_head.weight
1+
base_model.embedding.weight,base_model.enc_norm.bias,base_model.enc_norm.weight,base_model.layers.0.attn.dense.bias,base_model.layers.0.attn.dense.weight,base_model.layers.0.attn.in_proj.key.bias,base_model.layers.0.attn.in_proj.key.weight,base_model.layers.0.attn.in_proj.query.bias,base_model.layers.0.attn.in_proj.query.weight,base_model.layers.0.attn.in_proj.value.bias,base_model.layers.0.attn.in_proj.value.weight,base_model.layers.0.ff_ln.bias,base_model.layers.0.ff_ln.weight,base_model.layers.0.ff_sub_layer.w1.bias,base_model.layers.0.ff_sub_layer.w1.weight,base_model.layers.0.ff_sub_layer.w2.bias,base_model.layers.0.ff_sub_layer.w2.weight,base_model.layers.0.ln.bias,base_model.layers.0.ln.weight,base_model.layers.1.attn.dense.bias,base_model.layers.1.attn.dense.weight,base_model.layers.1.attn.in_proj.key.bias,base_model.layers.1.attn.in_proj.key.weight,base_model.layers.1.attn.in_proj.query.bias,base_model.layers.1.attn.in_proj.query.weight,base_model.layers.1.attn.in_proj.value.bias,base_model.layers.1.attn.in_proj.value.weight,base_model.layers.1.ff_ln.bias,base_model.layers.1.ff_ln.weight,base_model.layers.1.ff_sub_layer.w1.bias,base_model.layers.1.ff_sub_layer.w1.weight,base_model.layers.1.ff_sub_layer.w2.bias,base_model.layers.1.ff_sub_layer.w2.weight,base_model.layers.1.ln.bias,base_model.layers.1.ln.weight,base_model.layers.10.attn.dense.bias,base_model.layers.10.attn.dense.weight,base_model.layers.10.attn.in_proj.key.bias,base_model.layers.10.attn.in_proj.key.weight,base_model.layers.10.attn.in_proj.query.bias,base_model.layers.10.attn.in_proj.query.weight,base_model.layers.10.attn.in_proj.value.bias,base_model.layers.10.attn.in_proj.value.weight,base_model.layers.10.ff_ln.bias,base_model.layers.10.ff_ln.weight,base_model.layers.10.ff_sub_layer.w1.bias,base_model.layers.10.ff_sub_layer.w1.weight,base_model.layers.10.ff_sub_layer.w2.bias,base_model.layers.10.ff_sub_layer.w2.weight,base_model.layers.10.ln.bias,base_model.layers.10.ln.weight,base_model.layers.11.attn.dense.bias,base_model.layers.11.attn.dense.weight,base_model.layers.11.attn.in_proj.key.bias,base_model.layers.11.attn.in_proj.key.weight,base_model.layers.11.attn.in_proj.query.bias,base_model.layers.11.attn.in_proj.query.weight,base_model.layers.11.attn.in_proj.value.bias,base_model.layers.11.attn.in_proj.value.weight,base_model.layers.11.ff_ln.bias,base_model.layers.11.ff_ln.weight,base_model.layers.11.ff_sub_layer.w1.bias,base_model.layers.11.ff_sub_layer.w1.weight,base_model.layers.11.ff_sub_layer.w2.bias,base_model.layers.11.ff_sub_layer.w2.weight,base_model.layers.11.ln.bias,base_model.layers.11.ln.weight,base_model.layers.2.attn.dense.bias,base_model.layers.2.attn.dense.weight,base_model.layers.2.attn.in_proj.key.bias,base_model.layers.2.attn.in_proj.key.weight,base_model.layers.2.attn.in_proj.query.bias,base_model.layers.2.attn.in_proj.query.weight,base_model.layers.2.attn.in_proj.value.bias,base_model.layers.2.attn.in_proj.value.weight,base_model.layers.2.ff_ln.bias,base_model.layers.2.ff_ln.weight,base_model.layers.2.ff_sub_layer.w1.bias,base_model.layers.2.ff_sub_layer.w1.weight,base_model.layers.2.ff_sub_layer.w2.bias,base_model.layers.2.ff_sub_layer.w2.weight,base_model.layers.2.ln.bias,base_model.layers.2.ln.weight,base_model.layers.3.attn.dense.bias,base_model.layers.3.attn.dense.weight,base_model.layers.3.attn.in_proj.key.bias,base_model.layers.3.attn.in_proj.key.weight,base_model.layers.3.attn.in_proj.query.bias,base_model.layers.3.attn.in_proj.query.weight,base_model.layers.3.attn.in_proj.value.bias,base_model.layers.3.attn.in_proj.value.weight,base_model.layers.3.ff_ln.bias,base_model.layers.3.ff_ln.weight,base_model.layers.3.ff_sub_layer.w1.bias,base_model.layers.3.ff_sub_layer.w1.weight,base_model.layers.3.ff_sub_layer.w2.bias,base_model.layers.3.ff_sub_layer.w2.weight,base_model.layers.3.ln.bias,base_model.layers.3.ln.weight,base_model.layers.4.attn.dense.bias,base_model.layers.4.attn.dense.weight,base_model.layers.4.attn.in_proj.key.bias,base_model.layers.4.attn.in_proj.key.weight,base_model.layers.4.attn.in_proj.query.bias,base_model.layers.4.attn.in_proj.query.weight,base_model.layers.4.attn.in_proj.value.bias,base_model.layers.4.attn.in_proj.value.weight,base_model.layers.4.ff_ln.bias,base_model.layers.4.ff_ln.weight,base_model.layers.4.ff_sub_layer.w1.bias,base_model.layers.4.ff_sub_layer.w1.weight,base_model.layers.4.ff_sub_layer.w2.bias,base_model.layers.4.ff_sub_layer.w2.weight,base_model.layers.4.ln.bias,base_model.layers.4.ln.weight,base_model.layers.5.attn.dense.bias,base_model.layers.5.attn.dense.weight,base_model.layers.5.attn.in_proj.key.bias,base_model.layers.5.attn.in_proj.key.weight,base_model.layers.5.attn.in_proj.query.bias,base_model.layers.5.attn.in_proj.query.weight,base_model.layers.5.attn.in_proj.value.bias,base_model.layers.5.attn.in_proj.value.weight,base_model.layers.5.ff_ln.bias,base_model.layers.5.ff_ln.weight,base_model.layers.5.ff_sub_layer.w1.bias,base_model.layers.5.ff_sub_layer.w1.weight,base_model.layers.5.ff_sub_layer.w2.bias,base_model.layers.5.ff_sub_layer.w2.weight,base_model.layers.5.ln.bias,base_model.layers.5.ln.weight,base_model.layers.6.attn.dense.bias,base_model.layers.6.attn.dense.weight,base_model.layers.6.attn.in_proj.key.bias,base_model.layers.6.attn.in_proj.key.weight,base_model.layers.6.attn.in_proj.query.bias,base_model.layers.6.attn.in_proj.query.weight,base_model.layers.6.attn.in_proj.value.bias,base_model.layers.6.attn.in_proj.value.weight,base_model.layers.6.ff_ln.bias,base_model.layers.6.ff_ln.weight,base_model.layers.6.ff_sub_layer.w1.bias,base_model.layers.6.ff_sub_layer.w1.weight,base_model.layers.6.ff_sub_layer.w2.bias,base_model.layers.6.ff_sub_layer.w2.weight,base_model.layers.6.ln.bias,base_model.layers.6.ln.weight,base_model.layers.7.attn.dense.bias,base_model.layers.7.attn.dense.weight,base_model.layers.7.attn.in_proj.key.bias,base_model.layers.7.attn.in_proj.key.weight,base_model.layers.7.attn.in_proj.query.bias,base_model.layers.7.attn.in_proj.query.weight,base_model.layers.7.attn.in_proj.value.bias,base_model.layers.7.attn.in_proj.value.weight,base_model.layers.7.ff_ln.bias,base_model.layers.7.ff_ln.weight,base_model.layers.7.ff_sub_layer.w1.bias,base_model.layers.7.ff_sub_layer.w1.weight,base_model.layers.7.ff_sub_layer.w2.bias,base_model.layers.7.ff_sub_layer.w2.weight,base_model.layers.7.ln.bias,base_model.layers.7.ln.weight,base_model.layers.8.attn.dense.bias,base_model.layers.8.attn.dense.weight,base_model.layers.8.attn.in_proj.key.bias,base_model.layers.8.attn.in_proj.key.weight,base_model.layers.8.attn.in_proj.query.bias,base_model.layers.8.attn.in_proj.query.weight,base_model.layers.8.attn.in_proj.value.bias,base_model.layers.8.attn.in_proj.value.weight,base_model.layers.8.ff_ln.bias,base_model.layers.8.ff_ln.weight,base_model.layers.8.ff_sub_layer.w1.bias,base_model.layers.8.ff_sub_layer.w1.weight,base_model.layers.8.ff_sub_layer.w2.bias,base_model.layers.8.ff_sub_layer.w2.weight,base_model.layers.8.ln.bias,base_model.layers.8.ln.weight,base_model.layers.9.attn.dense.bias,base_model.layers.9.attn.dense.weight,base_model.layers.9.attn.in_proj.key.bias,base_model.layers.9.attn.in_proj.key.weight,base_model.layers.9.attn.in_proj.query.bias,base_model.layers.9.attn.in_proj.query.weight,base_model.layers.9.attn.in_proj.value.bias,base_model.layers.9.attn.in_proj.value.weight,base_model.layers.9.ff_ln.bias,base_model.layers.9.ff_ln.weight,base_model.layers.9.ff_sub_layer.w1.bias,base_model.layers.9.ff_sub_layer.w1.weight,base_model.layers.9.ff_sub_layer.w2.bias,base_model.layers.9.ff_sub_layer.w2.weight,base_model.layers.9.ln.bias,base_model.layers.9.ln.weight,base_model.position_embedding.weight,base_model.token_type_embeddings.weight,qa_head.bias,qa_head.weight

0 commit comments

Comments
 (0)