Add support for PADV2 op for int8 and int16

marialyu · copybara-github · commit c57e0992d33c · 2025-05-14T16:53:30.000-07:00
PiperOrigin-RevId: 755936791
diff --git a/ai_edge_quantizer/algorithm_manager.py b/ai_edge_quantizer/algorithm_manager.py
@@ -108,6 +108,7 @@ class AlgorithmName(str, enum.Enum):
     ),
     _TFLOpName.STABLEHLO_COMPOSITE: common_quantize.materialize_composite,
     _TFLOpName.PAD: common_quantize.materialize_pad,
+    _TFLOpName.PADV2: common_quantize.materialize_padv2,
 }
 for op_name, materialize_func in MIN_MAX_OP_NAME_MATERIALIZE_FUNC_DICT.items():
   register_quantized_op(
@@ -242,6 +243,7 @@ class AlgorithmName(str, enum.Enum):
     ),
     _TFLOpName.STABLEHLO_COMPOSITE: common_quantize.materialize_composite,
     _TFLOpName.PAD: common_quantize.materialize_pad,
+    _TFLOpName.PADV2: common_quantize.materialize_padv2,
 })
 
 for op_name, materialize_func in _OCTAV_OP_NAME_MATERIALIZE_FUNC_DICT.items():
diff --git a/ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py b/ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py
@@ -693,7 +693,24 @@ def materialize_pad(
       tensor_name_to_qsv,
       get_tensor_quant_params_fn,
       constraint=_OpQuantConstraint.SAME_AS_INPUT_SCALE,
-      inputs_to_ignore=[1],  # Padding value does not need to be quantized.
+      inputs_to_ignore=[1],  # Paddings tensor does not need to be quantized.
+  )
+
+
+def materialize_padv2(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.padv2."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+      constraint=_OpQuantConstraint.SAME_AS_OUTPUT_SCALE,
+      inputs_to_ignore=[1],  # Paddings tensor does not need to be quantized.
   )
 
 
diff --git a/ai_edge_quantizer/algorithms/uniform_quantize/op_architecture_tests/padv2_test.py b/ai_edge_quantizer/algorithms/uniform_quantize/op_architecture_tests/padv2_test.py
@@ -0,0 +1,106 @@
+# Copyright 2024 The AI Edge Quantizer Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import os
+
+from absl.testing import parameterized
+import numpy as np
+
+from tensorflow.python.platform import googletest
+from ai_edge_quantizer import qtyping
+from ai_edge_quantizer.algorithms.uniform_quantize import common_quantize
+from ai_edge_quantizer.algorithms.uniform_quantize import naive_min_max_quantize
+from ai_edge_quantizer.algorithms.uniform_quantize import octav
+from ai_edge_quantizer.algorithms.uniform_quantize.op_architecture_tests import test_utils as op_test_utils
+from ai_edge_quantizer.utils import test_utils
+from ai_edge_quantizer.utils import tfl_flatbuffer_utils
+
+
+_TEST_DATA_PREFIX_PATH = test_utils.get_path_to_datafile(
+    "../../../tests/models"
+)
+
+
+class PadV2Test(op_test_utils.BaseQuantizeTest):
+
+  def setUp(self):
+    super().setUp()
+    np.random.seed(666)
+    self._test_model_path = os.path.join(
+        _TEST_DATA_PREFIX_PATH, "single_padv2.tflite"
+    )
+    self._op_test_info = op_test_utils.OpTestInfo(
+        test_model=tfl_flatbuffer_utils.read_model(self._test_model_path),
+        op_tensor_names={},
+        input_range=(np.array([[-10]]), np.array([[10]])),
+        output_range=(np.array([[-10]]), np.array([[10]])),
+    )
+    # The test model has one subgraph for now.
+    self._graph_info = qtyping.GraphInfo(
+        subgraph_tensors=self._op_test_info.test_model.subgraphs[0].tensors,
+        buffers=self._op_test_info.test_model.buffers,
+    )
+
+  @parameterized.product(
+      get_tensor_quant_params_func=(
+          naive_min_max_quantize.get_tensor_quant_params,
+          octav.get_tensor_quant_params,
+      ),
+      activations_num_bits_and_symmetric=[
+          (8, False),
+          (8, True),
+          (16, True),
+      ],
+  )
+  def test_materialize_padv2_succeeds(
+      self, get_tensor_quant_params_func, activations_num_bits_and_symmetric
+  ):
+    activation_config = test_utils.get_static_activation_quant_setting(
+        *activations_num_bits_and_symmetric
+    )
+    op_quant_config = test_utils.get_static_op_quant_config(activation_config)
+
+    # Read from Model Explorer.
+    subgraph0 = self._op_test_info.test_model.subgraphs[0]
+    subgraph_op_id = 0
+    op = subgraph0.operators[subgraph_op_id]
+    op_info = qtyping.OpInfo(
+        op=op,
+        op_name=qtyping.TFLOperationName.PADV2,
+        subgraph_op_index=subgraph_op_id,
+        op_quant_config=op_quant_config,
+    )
+
+    # Test settings.
+    op_tensor_names = {}
+    op_tensor_names["input"] = "serving_default_input:0"
+    op_tensor_names["input2"] = "Const_1"
+    op_tensor_names["input3"] = "Const"
+    op_tensor_names["output"] = "PartitionedCall:0"
+    self._op_test_info.op_tensor_names = op_tensor_names
+    self._test_no_weights_op(
+        op_info,
+        self._graph_info,
+        self._op_test_info,
+        common_quantize.materialize_padv2,
+        get_tensor_quant_params_func,
+        same_input_output_params=True,
+        inputs_to_ignore=[1],  # Padding tensor does not need to be quantized.
+        constant_inputs=[2],  # constant_values (padding value) is quantized.
+    )
+
+
+if __name__ == "__main__":
+  googletest.main()
diff --git a/ai_edge_quantizer/algorithms/uniform_quantize/op_architecture_tests/test_utils.py b/ai_edge_quantizer/algorithms/uniform_quantize/op_architecture_tests/test_utils.py
@@ -124,6 +124,7 @@ def _test_same_direction_tensors(
       num_tensors,
       indices_to_ignore,
       is_inbounding_tensor,
+      constant_inputs=None,
   ):
     """Tests all input or output tensors in provided quant params.
 
@@ -135,7 +136,13 @@ def _test_same_direction_tensors(
       num_tensors: Number of tensors to test.
       indices_to_ignore: Indices of tensors to ignore.
       is_inbounding_tensor: Whether to test all inbounding tensors.
+      constant_inputs: Constant inputs indices.
     """
+    if not is_inbounding_tensor and constant_inputs is not None:
+      raise ValueError(
+          "Constant inputs should only be used for inbounding tensors."
+      )
+    constant_inputs = constant_inputs or []
     tensor_base_name = "input" if is_inbounding_tensor else "output"
     tensor_names = [tensor_base_name] + [
         f"{tensor_base_name}{i+2}" for i in range(num_tensors - 1)
@@ -148,9 +155,15 @@ def _test_same_direction_tensors(
           and i not in indices_to_ignore
       ):
         if is_inbounding_tensor:
-          transformations = [_QuantTransformation.ADD_QUANTIZE]
+          if i in constant_inputs:
+            transformations = [_QuantTransformation.QUANTIZE_TENSOR]
+          else:
+            transformations = [_QuantTransformation.ADD_QUANTIZE]
         else:
-          transformations = [_QuantTransformation.ADD_DEQUANTIZE]
+          if i in constant_inputs:
+            transformations = []
+          else:
+            transformations = [_QuantTransformation.ADD_DEQUANTIZE]
       else:
         transformations = [_QuantTransformation.NO_QUANTIZE]
       self._test_tensor_transformation_params(
@@ -172,6 +185,8 @@ def _test_same_input_output_params(
   ):
     """Tests input and output tensor transformation parameters are the same.
 
+    Assumes that each of non-ignored input tensors has exactly one consumer.
+
     Args:
       tensor_quant_params: Tensor transformation parameters.
       num_inputs: Number of inputs in materialization function result.
@@ -189,11 +204,17 @@ def _test_same_input_output_params(
     for i in range(num_inputs):
       if i not in inputs_to_ignore:
         if expected_params is None:
-          expected_params = tensor_quant_params[i].consumers[0].parameters  # pytype: disable=attribute-error
+          # Intputs can be constants and therefore have different quantized
+          # data. Ignoring `quantized_data` in comparison.
+          expected_params = dataclasses.replace(
+              tensor_quant_params[i].consumers[0].parameters,  # pytype: disable=attribute-error
+              quantized_data=None,
+          )
         else:
-          input_tensor_quant_params = (
-              tensor_quant_params[i].consumers[0].parameters
-          )  # pytype: disable=attribute-error
+          input_tensor_quant_params = dataclasses.replace(
+              tensor_quant_params[i].consumers[0].parameters,  # pytype: disable=attribute-error
+              quantized_data=None,
+          )
           self.assertEqual(input_tensor_quant_params, expected_params)
 
     # Test outputs.
@@ -215,6 +236,7 @@ def _test_no_weights_op(
       same_input_output_params=False,
       inputs_to_ignore=None,
       outputs_to_ignore=None,
+      constant_inputs=None,
   ):
     """Test an op without weights and bias.
 
@@ -232,6 +254,7 @@ def _test_no_weights_op(
         transformation parameters are the same.
       inputs_to_ignore: Inputs to ignore.
       outputs_to_ignore: Outputs to ignore.
+      constant_inputs: Indices of constant inputs.
     """
     num_inputs = len(op_info.op.inputs)
     num_outputs = len(op_info.op.outputs)
@@ -261,6 +284,7 @@ def _test_no_weights_op(
         num_inputs,
         inputs_to_ignore,
         is_inbounding_tensor=True,
+        constant_inputs=constant_inputs,
     )
     # Test output tensor settings.
     outputs_to_ignore = outputs_to_ignore or []
diff --git a/ai_edge_quantizer/algorithms/utils/common_utils.py b/ai_edge_quantizer/algorithms/utils/common_utils.py
@@ -351,18 +351,52 @@ def _materialize_standard_op_with_same_as_output_scale(
   # Use output quantization params for all input tensors.
   if output_tensor_params.producer is None:
     quant_params = None
+    _materialize_op_tensors(
+        op_tensor_params,
+        input_tensors,
+        is_inbounding_tensor=True,
+        op_info=op_info,
+        graph_info=graph_info,
+        tensor_name_to_qsv=tensor_name_to_qsv,
+        get_tensor_quant_params_fn=get_tensor_quant_params_fn,
+        quant_params=quant_params,
+    )
   else:
-    quant_params = output_tensor_params.producer.parameters
-  _materialize_op_tensors(
-      op_tensor_params,
-      input_tensors,
-      is_inbounding_tensor=True,
-      op_info=op_info,
-      graph_info=graph_info,
-      tensor_name_to_qsv=tensor_name_to_qsv,
-      get_tensor_quant_params_fn=get_tensor_quant_params_fn,
-      quant_params=quant_params,
-  )
+    output_quant_params = output_tensor_params.producer.parameters
+    if not isinstance(output_quant_params, qtyping.UniformQuantParams):
+      raise ValueError(
+          "_materialize_standard_op_with_same_as_output_scale only supports"
+          f" UniformQuantParams. For tensor {output_tensor_params.tensor_name},"
+          f" got {type(output_quant_params)}"
+      )
+    # Materialize each of the input tensors separately in case there are
+    # constants among them, requiring updating `quantized_data` first.
+    for input_tensor in input_tensors:
+      input_tensor_data = tfl_flatbuffer_utils.get_tensor_data(
+          input_tensor, graph_info.buffers
+      )
+      # Quantize constant inputs' data with the output quantization params.
+      if input_tensor_data is None:
+        quant_params = output_quant_params
+      else:
+        quantized_data = uniform_quantize_tensor.uniform_quantize(
+            input_tensor_data, output_quant_params
+        )
+        quant_params = dataclasses.replace(
+            output_quant_params,
+            quantized_data=quantized_data,
+        )
+      _materialize_op_tensors(
+          op_tensor_params,
+          [input_tensor],
+          is_inbounding_tensor=True,
+          op_info=op_info,
+          graph_info=graph_info,
+          tensor_name_to_qsv=tensor_name_to_qsv,
+          get_tensor_quant_params_fn=get_tensor_quant_params_fn,
+          quant_params=quant_params,
+      )
+
   op_tensor_params.append(output_tensor_params)
 
   return op_tensor_params
diff --git a/ai_edge_quantizer/default_policy.py b/ai_edge_quantizer/default_policy.py
@@ -184,7 +184,8 @@
       "DYNAMIC_UPDATE_SLICE",
       "SELECT_V2",
       "STABLEHLO_COMPOSITE",
-      "PAD"
+      "PAD",
+      "PADV2"
     ],
     "static_wi8_ai8": [
       "ADD",
@@ -216,7 +217,8 @@
       "DYNAMIC_UPDATE_SLICE",
       "SELECT_V2",
       "STABLEHLO_COMPOSITE",
-      "PAD"
+      "PAD",
+      "PADV2"
     ],
     "static_wi4_ai8": ["FULLY_CONNECTED", "CONV_2D", "INPUT", "OUTPUT", "EMBEDDING_LOOKUP"],
     "static_wi4_ai16": ["FULLY_CONNECTED", "CONV_2D", "INPUT", "OUTPUT", "EMBEDDING_LOOKUP"],
diff --git a/ai_edge_quantizer/qtyping.py b/ai_edge_quantizer/qtyping.py
@@ -63,6 +63,7 @@ class TFLOperationName(str, enum.Enum):
   DYNAMIC_UPDATE_SLICE = 'DYNAMIC_UPDATE_SLICE'
   STABLEHLO_COMPOSITE = 'STABLEHLO_COMPOSITE'
   PAD = 'PAD'
+  PADV2 = 'PADV2'
 
 
 class QuantizeMode(enum.Enum):
diff --git a/ai_edge_quantizer/tests/end_to_end_tests/padv2_test.py b/ai_edge_quantizer/tests/end_to_end_tests/padv2_test.py
diff --git a/ai_edge_quantizer/tests/models/single_padv2.tflite b/ai_edge_quantizer/tests/models/single_padv2.tflite
diff --git a/ai_edge_quantizer/utils/tfl_flatbuffer_utils.py b/ai_edge_quantizer/utils/tfl_flatbuffer_utils.py