pytorch
diff --git a/‎backends/arm/_passes/annotate_channels_last_dim_order_pass.py
Lines changed: 2 additions & 24 deletions b/‎backends/arm/_passes/annotate_channels_last_dim_order_pass.py
Lines changed: 2 additions & 24 deletions
diff --git a/‎backends/arm/_passes/annotate_decomposed_matmul.py
Lines changed: 9 additions & 14 deletions b/‎backends/arm/_passes/annotate_decomposed_matmul.py
Lines changed: 9 additions & 14 deletions
diff --git a/‎backends/arm/_passes/arm_pass_manager.py
Lines changed: 3 additions & 3 deletions b/‎backends/arm/_passes/arm_pass_manager.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎backends/arm/_passes/cast_int64_pass.py
Lines changed: 2 additions & 0 deletions b/‎backends/arm/_passes/cast_int64_pass.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎backends/arm/_passes/decompose_linear_pass.py
Lines changed: 5 additions & 21 deletions b/‎backends/arm/_passes/decompose_linear_pass.py
Lines changed: 5 additions & 21 deletions
diff --git a/‎backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py
Lines changed: 36 additions & 16 deletions b/‎backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py
Lines changed: 36 additions & 16 deletions
@@ -5,15 +5,12 @@
 
 # pyre-unsafe
 
-from typing import cast
 
 import torch
 from executorch.backends.arm._passes.arm_pass_utils import (
     create_node,
     get_first_fake_tensor,
-    insert_q_dq_pair,
 )
-from executorch.backends.arm.tosa_quant_utils import dq_op, q_op
 from executorch.backends.arm.tosa_utils import is_consumer_node_depthwise_conv2d
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass, PassResult
@@ -59,20 +56,10 @@ class AnnotateChannelsLastDimOrder(ExportPass):
 
     def is_weight_node_for_depthwise_conv2d(self, node: torch.fx.Node):
         """
-        returns True for dq and w in the following sequences;
+        returns True for w in the following sequence;
         w -> depthwise_conv2d -> ...
-        w -> dq -> depthwise_conv2d -> ...
         """
-        if node.op == "call_function":
-            if node.target != dq_op:
-                return False
-            prev_node = node.args[0]
-            if cast(torch.fx.Node, prev_node).op != "placeholder":
-                return False
-            if is_consumer_node_depthwise_conv2d(node):
-                consumer_node = list(node.users)[0]
-                return consumer_node.args[1] == node
-        elif node.op == "placeholder":
+        if node.op == "placeholder":
             # node is an input, weight or bias node
             consumer_node = list(node.users)[0]
             if self.is_weight_node_for_depthwise_conv2d(consumer_node):
@@ -129,8 +116,6 @@ def is_channel_reshape(input_shape, output_shape):
 
     @staticmethod
     def insert_input_transpose(node, input_node, graph_module):
-        quantize = input_node.target == dq_op
-        q_params = input_node.args[1:] if quantize else None
         with graph_module.graph.inserting_before(node):
             permute_node = create_node(
                 graph_module.graph,
@@ -143,8 +128,6 @@ def insert_input_transpose(node, input_node, graph_module):
                         else AnnotateChannelsLastDimOrder.NHWC_inverse_order
                     ),
                 ),
-                quantize=quantize,
-                q_params=q_params,
             )
             node.replace_input_with(input_node, permute_node)
 
@@ -185,11 +168,6 @@ def insert_output_transpose(node, graph_module):
             for user in users:
                 user.replace_input_with(node, permute_node)
 
-            quantize = node.args[0] == q_op
-            if quantize:
-                q_params = node.args[0].args[1:]
-                insert_q_dq_pair(graph_module.graph, node, q_params)
-
     @staticmethod
     def _insert_view_transpose(
         input_shape, output_shape, node, input_node, graph_module
 
@@ -7,13 +7,14 @@
 
 import itertools
 import operator
-from typing import List
+from typing import cast, List
 
 import torch
 from executorch.backends.arm._passes.arm_pass_utils import create_node
 
-from executorch.backends.arm.tosa_quant_utils import dq_op, q_op, QuantArgs
+from executorch.backends.arm.tosa_quant_utils import dq_ops, q_ops
 from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.dialects.edge._ops import EdgeOpOverload
 from executorch.exir.pass_base import ExportPass, PassResult
 from torch.fx import GraphModule
 from torch.fx.passes.utils.source_matcher_utils import get_source_partitions
@@ -61,7 +62,7 @@ def call(self, graph_module: GraphModule) -> PassResult:
         }
         for partition in matmul_partitions:
             quantized_input = all(
-                input_node.target == dq_op for input_node in partition.input_nodes
+                input_node.target in dq_ops for input_node in partition.input_nodes
             )
             matmul_node = [
                 node for node in partition.nodes if node.target in matmul_targets
@@ -74,17 +75,14 @@ def call(self, graph_module: GraphModule) -> PassResult:
                     input_node = self._match_partition_to_node(
                         node, partition.input_nodes
                     )
-                    input_node_qargs = QuantArgs.from_operator(
-                        input_node.target, input_node.args
-                    )
                     # Insert new dq-node just before the mm/bmm with input_node's qparams
                     with graph_module.graph.inserting_before(matmul_node):
                         # Create new dq-node before matmul
                         dq_node = create_node(
                             graph=graph_module.graph,
-                            op_target=dq_op,
+                            op_target=cast(EdgeOpOverload, input_node.target),  # type: ignore[arg-type]
                         )
-                        dq_node.args = (node, *input_node_qargs)
+                        dq_node.args = (node, *input_node.args[1:])
                         matmul_node.replace_input_with(node, dq_node)
 
                 for partition_input in partition.input_nodes:
@@ -95,19 +93,16 @@ def call(self, graph_module: GraphModule) -> PassResult:
                     graph_module.graph.erase_node(partition_input)
 
             partition_output = list(partition.output_nodes[0].users)[0]
-            quantized_output = partition_output.target == q_op
+            quantized_output = partition_output.target in q_ops
             if quantized_output:
-                output_node_qargs = QuantArgs.from_operator(
-                    partition_output.target, partition_output.args
-                )
                 with graph_module.graph.inserting_after(matmul_node):
                     # Create q-node after matmul
                     q_node = create_node(
                         graph=graph_module.graph,
-                        op_target=q_op,
+                        op_target=cast(EdgeOpOverload, partition_output.target),  # type: ignore[arg-type]
                     )
                     matmul_node.replace_all_uses_with(q_node)
-                    q_node.args = (matmul_node, *output_node_qargs)
+                    q_node.args = (matmul_node, *partition_output.args[1:])
                 # Remove partition output q-node
                 partition_output.replace_all_uses_with(
                     partition_output.all_input_nodes[0]
 
@@ -93,7 +93,6 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(RemoveGetItemPass())
         self.add_pass(ConvertSplitToSlicePass())
         self.add_pass(ConvertMmToBmmPass())
-        self.add_pass(DecomposeLinearPass())
         self.add_pass(DecomposeLinearVectorNormPass())
         self.add_pass(
             DecomposeMeanDimPass(exported_program.graph_module, self.tosa_spec)
@@ -109,12 +108,13 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(ReplaceScalarWithTensorArgPassTOSABI())
         self.add_pass(AnnotateDecomposedMatmulPass())
         self.add_pass(QuantizeOperatorArguments())
-        self.add_pass(FoldAndAnnotateQParamsPass())  # type: ignore[call-arg]
+        self.add_pass(FoldAndAnnotateQParamsPass(exported_program))  # type: ignore[call-arg]
         self.add_pass(RetraceFoldedDtypesPass())
         self.add_pass(UnsqueezeScalarPlaceholdersPass(exported_program))
         self.add_pass(MatchArgRanksPass(exported_program))
         if self.tosa_spec.is_U55_subset:
             self.add_pass(BroadcastArgsPass())
+        self.add_pass(DecomposeLinearPass())
         self.add_pass(ComputeConstantOpsAOT(exported_program))
 
         self.add_pass(RemoveClonePass())
@@ -168,7 +168,7 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
 
         self.add_pass(AnnotateDecomposedMatmulPass())
         self.add_pass(QuantizeOperatorArguments())
-        self.add_pass(FoldAndAnnotateQParamsPass())  # type: ignore[call-arg]
+        self.add_pass(FoldAndAnnotateQParamsPass(exported_program))  # type: ignore[call-arg]
         self.add_pass(RetraceFoldedDtypesPass())
         self.add_pass(UnsqueezeScalarPlaceholdersPass(exported_program))
         self.add_pass(MatchArgRanksPass(exported_program))
 
@@ -35,6 +35,8 @@ def _assert_within_int32(self, tensor: torch.Tensor, node: torch.fx.Node):
 
     def _to_int32(self, graph_module: torch.fx.GraphModule):
         for node in graph_module.graph.nodes:
+            if len(node.users) == 0:
+                continue
             fake_tensor = node.meta["val"]
             if not isinstance(fake_tensor, torch._subclasses.fake_tensor.FakeTensor):
                 continue
 
@@ -1,30 +1,28 @@
-# Copyright 2024 Arm Limited and/or its affiliates.
-# All rights reserved.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
 # pyre-unsafe
 
 import numpy as np
+from executorch.backends.arm._passes import ArmPass
 from executorch.backends.arm._passes.arm_pass_utils import (
     create_node,
     get_first_fake_tensor,
 )
-from executorch.backends.arm.tosa_quant_utils import dq_op, q_op
 from executorch.exir.dialects._ops import ops as exir_ops
-from executorch.exir.pass_base import ExportPass, PassResult
+from executorch.exir.pass_base import PassResult
 
 
-class DecomposeLinearPass(ExportPass):
+class DecomposeLinearPass(ArmPass):
     """
     This pass decomposes linear into a Conv2D with the required view operations.
     linear(x, weights, bias) becomes:
         x_reshaped       = view(x)
         weights_reshaped = view(weights)
         conv2d           = conv2d(x_reshaped, weights_reshaped, bias)
         output           = view(conv2d)
-    It also inserts q/dq pairs if the linear node was quantized.
     """
 
     def call(self, graph_module):
@@ -47,35 +45,22 @@ def call(self, graph_module):
             weights_reshaped_shape = [weights_shape[0], weights_shape[1], 1, 1]
 
             with graph_module.graph.inserting_before(node):
-                quantize = input.op == "call_function" and input.target == dq_op
-                q_params = input.args[1:] if quantize else None
                 # Reshape input to 4D with shape (N, Ci, 1, 1)
                 input_reshaped = create_node(
                     graph=graph_module.graph,
                     op_target=exir_ops.edge.aten.view_copy.default,
                     args=(input, input_reshaped_shape),
                     kwargs={},
-                    quantize=quantize,
-                    q_params=q_params,
                 )
 
-                quantize = weights.op == "call_function" and weights.target == dq_op
-                q_params = weights.args[1:] if quantize else None
                 # Reshape weights to 4D with shape (Co, Ci, 1, 1)
                 weights_reshaped = create_node(
                     graph=graph_module.graph,
                     op_target=exir_ops.edge.aten.view_copy.default,
                     args=(weights, weights_reshaped_shape),
                     kwargs={},
-                    quantize=quantize,
-                    q_params=q_params,
                 )
 
-                consumer_node = list(node.users)[0]
-                quantize = (
-                    consumer_node.op == "call_function" and consumer_node.target == q_op
-                )
-                q_params = consumer_node.args[1:] if quantize else None
                 conv = create_node(
                     graph=graph_module.graph,
                     op_target=exir_ops.edge.aten.convolution.default,
@@ -91,8 +76,7 @@ def call(self, graph_module):
                         1,  # groups
                     ),
                     kwargs={},
-                    quantize=quantize,
-                    q_params=q_params,
+                    from_node=node,
                 )
 
             with graph_module.graph.inserting_after(conv):
 
@@ -10,7 +10,13 @@
 
 from typing import cast, Dict, Set, Tuple
 
-from executorch.backends.arm.tosa_quant_utils import QuantArgs
+from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes.arm_pass_utils import (
+    get_param_tensor,
+    is_param_node,
+)
+
+from executorch.backends.arm.tosa_quant_utils import dq_ops, q_ops, QuantArgs
 
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.dialects.edge._ops import EdgeOpOverload
@@ -24,9 +30,6 @@
 )
 from torch.fx import GraphModule, Node
 
-q_op: EdgeOpOverload = exir_ops.edge.quantized_decomposed.quantize_per_tensor.default
-dq_op: EdgeOpOverload = exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default
-
 
 def get_input_qparams(node: Node) -> dict[int, QuantArgs]:
     """
@@ -66,7 +69,7 @@ def get_output_qparams(node: Node) -> dict[int, QuantArgs]:
     return output_qparams
 
 
-class FoldAndAnnotateQParamsPass(ExportPass):
+class FoldAndAnnotateQParamsPass(ArmPass):
     """
     A pass that walks the graph and removes any DQ and Q nodes before and after the target
      node.
@@ -96,9 +99,6 @@ class FoldAndAnnotateQParamsPass(ExportPass):
 
     """
 
-    def __init__(self) -> None:
-        super().__init__()
-
     def fold_and_annotate_arg(
         self, graph_module: GraphModule, node: Node, arg_list: list[Node], i: int
     ) -> None:
@@ -109,8 +109,25 @@ def fold_and_annotate_arg(
                 return
 
             arg_quant_params = None
-            if arg.target == dq_op:
-                arg_quant_params = QuantArgs.from_operator(arg.target, arg.args)
+            if arg.target in dq_ops:
+                args = arg.args
+                scales = args[1]
+                if (
+                    isinstance(args[1], Node)
+                    and self.exported_program is not None
+                    and is_param_node(self.exported_program, args[1])
+                ):
+                    scales = get_param_tensor(self.exported_program, args[1])
+                zps = args[2]
+                if (
+                    isinstance(args[2], Node)
+                    and self.exported_program is not None
+                    and is_param_node(self.exported_program, args[2])
+                ):
+                    zps = get_param_tensor(self.exported_program, args[2])
+                arg_quant_params = QuantArgs.from_operator(
+                    arg.target, (args[0], scales, zps, *args[3:])
+                )
                 # add arg to nodes_to_remove to fold the dq-node
                 nodes_to_remove.add(arg)
             if input_qparams is not None and input_qparams != arg_quant_params:
@@ -120,10 +137,13 @@ def fold_and_annotate_arg(
         if input_qparams is not None:
             node.meta["input_qparams"][i] = input_qparams
             for n in nodes_to_remove:
-                if n.target != dq_op:
-                    raise RuntimeError(f"Expected {dq_op} dq_op, got {n.target}")
+                if n.target not in dq_ops:
+                    raise RuntimeError(
+                        f"Expected one of {dq_ops} dq_op, got {n.target}"
+                    )
 
-                n.replace_all_uses_with(n.args[0])  # type: ignore[arg-type]
+                if len(n.args) > 0:
+                    n.replace_all_uses_with(n.args[0])  # type: ignore[arg-type]
                 graph_module.graph.erase_node(n)
 
     def call(self, graph_module: GraphModule) -> PassResult:
@@ -134,7 +154,7 @@ def call(self, graph_module: GraphModule) -> PassResult:
             if n.op != "call_function":
                 continue
             # Don't fold chains of quant-ops into each other.
-            if n.target in (q_op, dq_op):
+            if n.target in (*q_ops, *dq_ops):
                 continue
 
             # Make sure we haven't already set qparams meta information on the node
@@ -164,7 +184,7 @@ def call(self, graph_module: GraphModule) -> PassResult:
             # Copy the users, since we are modifying it.
             users_copy = copy.copy(n.users)
             for i, user in enumerate(users_copy):
-                if user.target != q_op:
+                if user.target not in q_ops:
                     continue
 
                 # quantization node found here, store the quantization parameters in meta value
@@ -201,7 +221,7 @@ def call(self, graph_module: GraphModule) -> PassResult:
 
             # Make sure we have a quantized operator
             user = list(n.users)[0]
-            if user.target != q_op:
+            if user.target not in q_ops:
                 continue
 
             qargs = QuantArgs.from_operator(user.target, user.args)