Add ONNX export support for granite models (#2043)

gabe-l-hart · web-flow · commit 7e8d857d1ed6 · 2024-10-31T22:54:05.000+02:00
* feat(exporters/onnx): Add GraniteOnnxConfig and task support list Branch: OnnxGranite Signed-off-by: Gabe Goodhart <ghart@us.ibm.com> * feat: Add granite's normalized config for inference Branch: OnnxGranite Signed-off-by: Gabe Goodhart <ghart@us.ibm.com> * feat(onnx opt): Add onnx optimization support for granite Branch: OnnxGranite Signed-off-by: Gabe Goodhart <ghart@us.ibm.com> * fix(onnx/granite): Use LlamaOnnxConfig as the base for GraniteOnnxConfig Branch: OnnxGranite Signed-off-by: Gabe Goodhart <ghart@us.ibm.com> * fix(onnxruntime): Add "granite" to list of model types with grouped attention Branch: OnnxGranite Signed-off-by: Gabe Goodhart <ghart@us.ibm.com> * fix: Add granite to the list of models that require position_ids Branch: OnnxGranite Signed-off-by: Gabe Goodhart <ghart@us.ibm.com> * fix(granite): Add MIN_TORCH_VERSION for recently fixed torch bug #2043 (comment) Branch: OnnxGranite Signed-off-by: Gabe Goodhart <ghart@us.ibm.com> * test(granite): Add tiny random granite test for onnx exporter Branch: OnnxGranite Signed-off-by: Gabe Goodhart <ghart@us.ibm.com> * tests(onnxruntime): Add granite to onnxruntime tests Branch: OnnxGranite Signed-off-by: Gabe Goodhart <ghart@us.ibm.com> --------- Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py
@@ -298,6 +298,11 @@ class GemmaOnnxConfig(LlamaOnnxConfig):
     pass
 
 
+class GraniteOnnxConfig(LlamaOnnxConfig):
+    MIN_TRANSFORMERS_VERSION = version.parse("4.45.0")
+    MIN_TORCH_VERSION = version.parse("2.5.0")
+
+
 class PhiOnnxConfig(TextDecoderWithPositionIdsOnnxConfig):
     DEFAULT_ONNX_OPSET = 14  # Phi now uses F.scaled_dot_product_attention by default for torch>=2.1.1.
     NORMALIZED_CONFIG_CLASS = NormalizedTextConfig
diff --git a/optimum/exporters/onnx/utils.py b/optimum/exporters/onnx/utils.py
@@ -86,6 +86,7 @@
     "phi",
     "phi3",
     "qwen2",
+    "granite",
 }
 
 
diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py
@@ -915,6 +915,13 @@ class TasksManager:
             "text-classification",
             onnx="LlamaOnnxConfig",
         ),
+        "granite": supported_tasks_mapping(
+            "feature-extraction",
+            "feature-extraction-with-past",
+            "text-generation",
+            "text-generation-with-past",
+            onnx="GraniteOnnxConfig",
+        ),
         "pegasus": supported_tasks_mapping(
             "feature-extraction",
             "feature-extraction-with-past",
diff --git a/optimum/onnxruntime/modeling_decoder.py b/optimum/onnxruntime/modeling_decoder.py
@@ -340,7 +340,7 @@ def prepare_past_key_values(
             if self.model_type == "gemma":
                 num_attention_heads = self.normalized_config.num_key_value_heads
                 embed_size_per_head = self.normalized_config.head_dim
-            elif self.model_type in {"mistral", "llama", "qwen2"}:
+            elif self.model_type in {"mistral", "llama", "qwen2", "granite"}:
                 num_attention_heads = self.normalized_config.num_key_value_heads
             else:
                 num_attention_heads = self.normalized_config.num_attention_heads
diff --git a/optimum/onnxruntime/utils.py b/optimum/onnxruntime/utils.py
@@ -128,6 +128,7 @@ class ORTConfigManager:
         "gpt-neo": "gpt2",
         "gpt-neox": "gpt2",
         "gptj": "gpt2",
+        "granite": "gpt2",
         # longt5 with O4 results in segmentation fault
         "longt5": "bert",
         "llama": "gpt2",
diff --git a/optimum/utils/normalized_config.py b/optimum/utils/normalized_config.py
@@ -281,6 +281,7 @@ class NormalizedConfigManager:
         "xlm-roberta": NormalizedTextConfig,
         "yolos": NormalizedVisionConfig,
         "qwen2": NormalizedTextConfig,
+        "granite": NormalizedTextConfigWithGQA,
     }
 
     @classmethod
diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py
@@ -100,6 +100,7 @@
     "gpt-neo": "hf-internal-testing/tiny-random-GPTNeoModel",
     "gpt-neox": "hf-internal-testing/tiny-random-GPTNeoXForCausalLM",
     "gptj": "hf-internal-testing/tiny-random-GPTJModel",
+    "granite": "hf-internal-testing/tiny-random-GraniteForCausalLM",
     "groupvit": "hf-internal-testing/tiny-random-groupvit",
     "ibert": "hf-internal-testing/tiny-random-IBertModel",
     "imagegpt": "hf-internal-testing/tiny-random-ImageGPTModel",
diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py
@@ -2324,6 +2324,7 @@ class ORTModelForCausalLMIntegrationTest(ORTModelTestMixin):
         "gpt_neo",
         "gpt_neox",
         "gptj",
+        "granite",
         "llama",
         "mistral",
         "mpt",
diff --git a/tests/onnxruntime/utils_onnxruntime_tests.py b/tests/onnxruntime/utils_onnxruntime_tests.py
@@ -104,6 +104,7 @@
     "gpt_neo": "hf-internal-testing/tiny-random-GPTNeoModel",
     "gpt_neox": "hf-internal-testing/tiny-random-GPTNeoXForCausalLM",
     "gptj": "hf-internal-testing/tiny-random-GPTJForCausalLM",
+    "granite": "hf-internal-testing/tiny-random-GraniteForCausalLM",
     "groupvit": "hf-internal-testing/tiny-random-groupvit",
     "hubert": "hf-internal-testing/tiny-random-HubertModel",
     "ibert": "hf-internal-testing/tiny-random-IBertModel",

Original file line number	Diff line number	Diff line change
`@@ -86,6 +86,7 @@`
`86`	`86`	`"phi",`
`87`	`87`	`"phi3",`
`88`	`88`	`"qwen2",`
	`89`	`+ "granite",`
`89`	`90`	`}`
`90`	`91`
`91`	`92`
Original file line number	Diff line number	Diff line change
`@@ -281,6 +281,7 @@ class NormalizedConfigManager:`
`281`	`281`	`"xlm-roberta": NormalizedTextConfig,`
`282`	`282`	`"yolos": NormalizedVisionConfig,`
`283`	`283`	`"qwen2": NormalizedTextConfig,`
	`284`	`+ "granite": NormalizedTextConfigWithGQA,`
`284`	`285`	`}`
`285`	`286`
`286`	`287`	`@classmethod`