Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
150 changes: 150 additions & 0 deletions backends/transforms/fuse_clamp_with_binary_op.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import sys

import executorch.backends.vulkan.custom_ops_lib # noqa

import torch

from executorch.exir.dialects._ops import ops as exir_ops
from executorch.exir.pass_base import ExportPass, PassResult


class FuseClampBinaryOpPass(ExportPass):

FUSEABLE_OPS = [
exir_ops.edge.aten.relu.default,
exir_ops.edge.aten.hardtanh.default,
exir_ops.edge.aten.clamp.default,
]
FUSEABLE_BINARY_OPS = [
exir_ops.edge.aten.add.Tensor,
exir_ops.edge.aten.sub.Tensor,
exir_ops.edge.aten.mul.Tensor,
exir_ops.edge.aten.div.Tensor,
]

def exists_before(self, graph_module, node_a, node_b):
seen_a = False
for n in graph_module.graph.nodes:
if n is node_a:
seen_a = True
if n is node_b:
return seen_a
return False

def get_output_min_max_from_activation(self, activation_node):
if activation_node.target == exir_ops.edge.aten.relu.default:
output_min = 0.0
output_max = sys.float_info.max
elif activation_node.target == exir_ops.edge.aten.hardtanh.default:
output_min = -1.0
output_max = 1.0
if len(activation_node.args) > 1:
output_min = activation_node.args[1]
output_max = activation_node.args[2]
elif activation_node.target == exir_ops.edge.aten.clamp.default:
output_min = None
output_max = None
if len(activation_node.args) >= 2:
output_min = activation_node.args[1]
if len(activation_node.args) >= 3:
output_max = activation_node.args[2]

return output_min, output_max

def fuse_clamp_with_binary_ops(self, graph_module: torch.fx.GraphModule, arg_idx):

fuseAdded = False
for binary_op_node in graph_module.graph.nodes:
if binary_op_node.op == "call_function":
if binary_op_node.target in self.FUSEABLE_BINARY_OPS:
preceding_op = binary_op_node.args[arg_idx]

if (
preceding_op.op == "call_function"
and preceding_op.target in self.FUSEABLE_OPS
):
# Ensure the shapes match
if (
"val" not in binary_op_node.args[0].meta
or "val" not in binary_op_node.args[1].meta
):
continue
if len(binary_op_node.args[1].meta["val"].shape) != len(
binary_op_node.args[0].meta["val"].shape
):
continue

# Get the texture to do the binary op
texture = binary_op_node.args[(arg_idx + 1) % 2]

# Fuse only if the texture exists before the preceding op
if not self.exists_before(graph_module, texture, preceding_op):
continue

new_args = list(preceding_op.args)

# insert the min/max at indices 1 and 2
output_min_max = self.get_output_min_max_from_activation(
preceding_op
)
new_args.insert(1, output_min_max[0])
new_args.insert(2, output_min_max[1])

# put the other texture at idx 3
new_args.insert(3, texture)
new_args = new_args[0:4]

new_args = tuple(new_args)
binary_op_node.replace_all_uses_with(preceding_op)
graph_module.graph.erase_node(binary_op_node)

new_op = None
match binary_op_node.target:
case exir_ops.edge.aten.add.Tensor:
new_op = (
exir_ops.edge.et_vk.clamp_with_binary_add.default
)
case exir_ops.edge.aten.sub.Tensor:
new_op = (
exir_ops.edge.et_vk.clamp_with_binary_sub.default
)
case exir_ops.edge.aten.mul.Tensor:
new_op = (
exir_ops.edge.et_vk.clamp_with_binary_mul.default
)
case exir_ops.edge.aten.div.Tensor:
new_op = (
exir_ops.edge.et_vk.clamp_with_binary_div.default
)

# Create and insert node of custom op `clamp_with_binary_op`
with graph_module.graph.inserting_before(preceding_op):
clamp_binary_op_node = graph_module.graph.create_node(
"call_function",
new_op,
new_args,
)

preceding_op.replace_all_uses_with(clamp_binary_op_node)
graph_module.graph.erase_node(preceding_op)

fuseAdded = True

graph_module.recompile()
graph_module = super().call(graph_module).graph_module
return [fuseAdded, graph_module]

def call(self, graph_module: torch.fx.GraphModule):
fuseAdded = True
while fuseAdded:
fuseAdded0, graph_module = self.fuse_clamp_with_binary_ops(graph_module, 0)
fuseAdded1, graph_module = self.fuse_clamp_with_binary_ops(graph_module, 1)
fuseAdded = fuseAdded0 or fuseAdded1

return PassResult(graph_module, True)
105 changes: 105 additions & 0 deletions backends/transforms/fuse_clamps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import sys

import executorch.backends.vulkan.custom_ops_lib # noqa

import torch

from executorch.exir.dialects._ops import ops as exir_ops
from executorch.exir.pass_base import ExportPass, PassResult


class FuseClampsPass(ExportPass):

FUSEABLE_CLAMPS = [
exir_ops.edge.aten.relu.default,
exir_ops.edge.aten.hardtanh.default,
exir_ops.edge.aten.clamp.default,
]

def get_output_min_max_from_activation(self, activation_node):
if activation_node.target == exir_ops.edge.aten.relu.default:
output_min = 0.0
output_max = sys.float_info.max
elif activation_node.target == exir_ops.edge.aten.hardtanh.default:
output_min = -1.0
output_max = 1.0
if len(activation_node.args) > 1:
output_min = activation_node.args[1]
output_max = activation_node.args[2]
elif activation_node.target == exir_ops.edge.aten.clamp.default:
output_min = None
output_max = None
if len(activation_node.args) >= 2:
output_min = activation_node.args[1]
if len(activation_node.args) >= 3:
output_max = activation_node.args[2]

return output_min, output_max

def call(self, graph_module: torch.fx.GraphModule):
fuseAdded = True
while fuseAdded:
fuseAdded = False
for clamp_2_node in graph_module.graph.nodes:
if clamp_2_node.op == "call_function":
if clamp_2_node.target in self.FUSEABLE_CLAMPS:
preceding_op = clamp_2_node.args[0]
if (
preceding_op.op == "call_function"
and preceding_op.target in self.FUSEABLE_CLAMPS
):
# Ensure the shapes match
if (
"val" not in clamp_2_node.args[0].meta
or "val" not in preceding_op.args[0].meta
):
continue
if len(clamp_2_node.args[0].meta["val"].shape) != len(
preceding_op.args[0].meta["val"].shape
):
continue

min_max1 = self.get_output_min_max_from_activation(
preceding_op
)
min_max2 = self.get_output_min_max_from_activation(
clamp_2_node
)

min_max = [None, None]

if min_max1[0] is None and min_max2[0] is not None:
min_max[0] = min_max2[0]
elif min_max1[0] is not None and min_max2[0] is None:
min_max[0] = min_max1[0]
else:
min_max[0] = min(min_max1[0], min_max2[0])

if min_max1[1] is None and min_max2[1] is not None:
min_max[1] = min_max2[1]
elif min_max1[1] is not None and min_max2[1] is None:
min_max[1] = min_max1[1]
else:
min_max[1] = max(min_max1[1], min_max2[1])

new_args = list(preceding_op.args)

# Insert the new min/max at indices 1 and 2
new_args.insert(1, min_max[0])
new_args.insert(2, min_max[1])
new_args = new_args[0:3]
preceding_op.args = tuple(new_args)
clamp_2_node.replace_all_uses_with(preceding_op)
graph_module.graph.erase_node(clamp_2_node)
fuseAdded = True

graph_module.recompile()
graph_module = super().call(graph_module).graph_module

return PassResult(graph_module, True)
122 changes: 122 additions & 0 deletions backends/transforms/fuse_conv_with_binary_op.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import executorch.backends.vulkan.custom_ops_lib # noqa

import torch

from executorch.exir.dialects._ops import ops as exir_ops
from executorch.exir.pass_base import ExportPass, PassResult


class FuseConvBinaryOpPass(ExportPass):
"""
Some activations like ReLU and hardtanh can be fused with certain operators (e.g. convolution) preceding it.
"""

FUSEABLE_OPS = [
exir_ops.edge.aten.convolution.default,
]
FUSEABLE_BINARY_OPS = [
exir_ops.edge.aten.add.Tensor,
exir_ops.edge.aten.sub.Tensor,
exir_ops.edge.aten.mul.Tensor,
exir_ops.edge.aten.div.Tensor,
]

def exists_before(self, graph_module, node_a, node_b):
seen_a = False
for n in graph_module.graph.nodes:
if n is node_a:
seen_a = True
if n is node_b:
return seen_a
return False

def fuse_conv_with_binary_ops(self, graph_module: torch.fx.GraphModule, arg_idx):

fuseAdded = False
for binary_op_node in graph_module.graph.nodes:
if (
binary_op_node.op == "call_function"
and binary_op_node.target in self.FUSEABLE_BINARY_OPS
):
preceding_op = binary_op_node.args[arg_idx]
if (
preceding_op.op == "call_function"
and preceding_op.target in self.FUSEABLE_OPS
):

# For now only pw conv2d s1p0 is supported
if not (
len(preceding_op.args[3]) == 2
and preceding_op.args[3][0] == 1
and preceding_op.args[3][1] == 1
and preceding_op.args[4][0] == 0
and preceding_op.args[4][1] == 0
):
continue

# Ensure the shapes match
if (
"val" not in binary_op_node.args[0].meta
or "val" not in binary_op_node.args[1].meta
):
continue
if len(binary_op_node.args[0].meta["val"].shape) != len(
binary_op_node.args[1].meta["val"].shape
):
continue

# Get the texture to do the binary op
texture = binary_op_node.args[(arg_idx + 1) % 2]

# Fuse only if the texture exists before the preceding op
if not self.exists_before(graph_module, texture, preceding_op):
continue

new_args = list(preceding_op.args)
new_args.append(texture)
new_args = tuple(new_args)
binary_op_node.replace_all_uses_with(preceding_op)
graph_module.graph.erase_node(binary_op_node)

new_op = None
if binary_op_node.target == exir_ops.edge.aten.add.Tensor:
new_op = exir_ops.edge.et_vk.conv_with_binary_add.default
if binary_op_node.target == exir_ops.edge.aten.sub.Tensor:
new_op = exir_ops.edge.et_vk.conv_with_binary_sub.default
if binary_op_node.target == exir_ops.edge.aten.mul.Tensor:
new_op = exir_ops.edge.et_vk.conv_with_binary_mul.default
if binary_op_node.target == exir_ops.edge.aten.div.Tensor:
new_op = exir_ops.edge.et_vk.conv_with_binary_div.default

# Create and insert node of custom op `conv_with_binary_op`
with graph_module.graph.inserting_before(preceding_op):
conv_binary_op_node = graph_module.graph.create_node(
"call_function",
new_op,
new_args,
)

preceding_op.replace_all_uses_with(conv_binary_op_node)
graph_module.graph.erase_node(preceding_op)

fuseAdded = True

graph_module.recompile()
graph_module = super().call(graph_module).graph_module
return [fuseAdded, graph_module]

def call(self, graph_module: torch.fx.GraphModule):

fuseAdded = True
while fuseAdded:
fuseAdded0, graph_module = self.fuse_conv_with_binary_ops(graph_module, 0)
fuseAdded1, graph_module = self.fuse_conv_with_binary_ops(graph_module, 1)
fuseAdded = fuseAdded0 or fuseAdded1

return PassResult(graph_module, True)
Loading
Loading