Fix channels_last_tagged_reshape_pass to handle mixed memory format tuple outputs (#11647)

leafs1 · web-flow · commit 5f789c004bc5 · 2025-06-16T14:13:01.000-07:00
### Summary
This PR fixes the `channels_last_tagged_reshape_pass.py` to properly
handle tuple outputs with mixed memory formats. Previously, the pass
only checked and converted the first element of tuple outputs, which
could lead to incorrect memory formats for other elements in the tuple.
This fix is important for models that return multiple outputs with
different memory format requirements, such as a mix of convolution
outputs (which should be in NHWC format) and linear outputs (which
should be in standard format).

### Test plan
I added a new test class `ThreeOutputsModel` that has three outputs with
different memory format requirements. I ensured that this test output
given NCHW and NHWC inputs would evaluate properly. I also created a
simpler 2 input class `ConvAddConvOutput` which operated on different
inputs and returned two different dim order outputs.
diff --git a/backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py b/backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py
@@ -91,18 +91,10 @@ def is_nchw_node(self, node: torch.fx.Node) -> bool:
         return not self.is_nhwc_node(node)
 
     def requires_nhwc_input(self, node: torch.fx.Node) -> bool:
-        return (
-            node.target in self.memory_sensitive_ops_nhwc
-            or node.name == "output"
-            and not node.args[0][0].meta["val"].is_contiguous()
-        )
+        return node.target in self.memory_sensitive_ops_nhwc
 
     def requires_nchw_inputs(self, node: torch.fx.Node) -> bool:
-        return (
-            node.target in self.memory_sensitive_ops_nchw
-            or node.name == "output"
-            and node.args[0][0].meta["val"].is_contiguous()
-        )
+        return node.target in self.memory_sensitive_ops_nchw
 
     def can_be_converted_to_nhwc(self, node: torch.fx.Node) -> bool:
         # There are two conditions that must be met for a node to be able to
@@ -380,18 +372,21 @@ def call(self, graph_module: torch.fx.GraphModule):  # noqa: C901
                 # This node has no inputs so we don't need to change anything
                 continue
 
-            if self.requires_nhwc_input(node):
+            # Need special case for output node because it can have multiple output dim orders as we can output a tuple multiple nodes
+            if node.op == "output":
+                out_tuple = node.args[0]
+                for out_node in out_tuple:
+                    if out_node.meta["val"].is_contiguous():
+                        self.input_to_nchw(graph_module, out_node, node)
+                    else:
+                        self.input_to_nhwc(graph_module, out_node, node)
+            elif self.requires_nhwc_input(node):
                 # Nodes which enter this branch are ones that require their
                 # first input to be nhwc. This makes this node's output nhwc too
-                # Currently, all nodes like this should have all of their other
-                # inputs as nchw, so fail if this is not true
-                if node.name == "output":
-                    self.input_to_nhwc(graph_module, node.args[0][0], node)
-                else:
-                    self.input_to_nhwc(graph_module, node.args[0], node)
-
-                for input_node in node.all_input_nodes[1:]:
-                    if self.is_nhwc_node(input_node):
+
+                self.input_to_nhwc(graph_module, node.args[0], node)
+                for input_node in node.all_input_nodes:
+                    if input_node.op == "placeholder" and self.is_nhwc_node(input_node):
                         raise AssertionError(
                             f"Expected {input_node} to be NCHW in channels last reshape pass"
                         )
diff --git a/backends/xnnpack/test/passes/test_channels_last_tagged_reshape.py b/backends/xnnpack/test/passes/test_channels_last_tagged_reshape.py
@@ -335,3 +335,50 @@ def test_dq_conv2d_channels_last_tagged_reshape_pass(self) -> None:
             )
             .run_method_and_compare_outputs()
         )
+
+    class ConvAddConvOutput(torch.nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.conv1 = torch.nn.Conv2d(3, 16, 3)
+            self.conv2 = torch.nn.Conv2d(16, 16, 3)
+
+        def forward(self, x):
+            y = self.conv1(x)
+            z = torch.add(y, 1.0)
+            out1 = self.conv2(z)
+            out2 = z
+            return out1, out2
+
+    ConvAddConvOutputModule = ConvAddConvOutput()
+
+    def test_conv_add_conv_output(self):
+        x = torch.randn(1, 3, 8, 8)
+
+        self.run_tester(self.ConvAddConvOutput().eval(), (x,))
+
+        x_cl = x.to(memory_format=torch.channels_last)
+        self.run_tester(self.ConvAddConvOutput().eval(), (x_cl,))
+
+    class ThreeOutputsModel(torch.nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.conv1 = torch.nn.Conv2d(3, 3, 3)
+            self.conv2 = torch.nn.Conv2d(3, 3, 3)
+            self.linear = torch.nn.Linear(6, 6)
+
+        def forward(self, x):
+            conv1_out = self.conv1(x)
+            conv2_out = self.conv2(x)
+            linear_out = self.linear(x)
+
+            return linear_out, conv1_out, conv2_out
+
+    ThreeOutputsModelModule = ThreeOutputsModel()
+
+    def test_three_outputs_model(self):
+        x = torch.randn(1, 3, 6, 6)
+
+        self.run_tester(self.ThreeOutputsModelModule.eval(), (x,))
+
+        x_cl = x.to(memory_format=torch.channels_last)
+        self.run_tester(self.ThreeOutputsModelModule.eval(), (x_cl,))