vllm-project · dsikka · Mar 11, 2025 · Mar 20, 2025 · Mar 21, 2025 · Mar 22, 2025
diff --git a/src/compressed_tensors/transforms/base.py b/src/compressed_tensors/transforms/base.py
@@ -17,6 +17,7 @@
 import torch
 from compressed_tensors.registry.registry import RegistryMixin
 from compressed_tensors.transforms.utils import apply_matrix_transform
+from compressed_tensors.utils import register_offload_parameter, update_parameter_data
 
 
 __all__ = ["Transforms"]
@@ -27,18 +28,16 @@
 # first or second matirx in torch.matmul depending on dimensions, can be inferred
 # by the layer time likely.
 
-MATIRX_TRANSFORMS = ["matrix-mul", "hadamard", "random-hadamard"]
-
 
 class Transforms(RegistryMixin):
-    def __new__(
-        cls,
+    def __init__(
+        self,
         transform: torch.Tensor,
+        learnable: Optional[bool] = True,
         device: Optional[Union[str, torch.device]] = "cuda",
         dtype: Optional[torch.dtype] = torch.bfloat16,
-        *args,
-        **kwargs,
     ):
+        self.learnable = learnable
         """
         Base class for setting up transforms. The registry creates transforms
         as parameters which can be attached to modules.
@@ -48,38 +47,58 @@ def __new__(
         size = 1024
         dtype = torch.bfloat16
         module = torch.nn.Linear(size, size)
+        name = "weight_transform"
 
         hadamard_transform = Transforms.load_from_registry(
             "random_hadamard", size=size, dtype=dtype
         )
-        hadamard_apply = Transforms.fetch_apply("random_hadamard")
-        module.weight_transform = hadamard_transform
 
-        transformed_output = hadamard_apply(input_tensor=module.weight,
-            transform=moduel.weight_transform)
+        hadamard_transform.register_to_module(name, module)
+        module.transform_data = {name: {"call_args": dict, "class": hadamard_transform}}
 
-        hadamard_inverse = Transforms.fetch_inverse_apply("random_hadamard")
-        original_weight = hadamard_inverse(input_tensor=transformed_output,
-            transform=model.weight_trainsform,
-            transpose=True)
+        transformed_output = hadamard_transform.apply(input_tensor=module.weight)
+        original_weight = hadamard_transform.inverse_apply(
+            input_tensor=transformed_output)
 
         :param transform: transform (e.g. torch.Tensor, scalar) to be applied
         """
-        return torch.nn.Parameter(transform.to(device).to(dtype), requires_grad=False)
-
-    @classmethod
-    def fetch_apply(cls, name: str):
-        if name in MATIRX_TRANSFORMS:
-            return apply_matrix_transform
-        raise NotImplementedError("Only matrix transforms are supported")
-
-    @classmethod
-    def fetch_inverse_apply(cls, name: str):
-        return cls.get_value_from_registry(name=name).inverse_apply
+        if self.learnable:
+            self.transform = torch.nn.Parameter(transform.to(dtype).to(device))
+        else:
+            self.transform = torch.nn.Buffer(transform.to(dtype).to(device))
+
+    # register to class for easy offloading, serialization, deserialization
+    def register_to_module(self, name: str, module: torch.nn.Module):
+        if self.learnable:
+            register_offload_parameter(module, name, self.transform)
+        else:
+            # TODO: have to verify serialization/offloading
+            module.register_buffer(name, self.transform)
+
+    def update_transform(
+        self,
+        data: torch.Tensor,
+        module: Optional[torch.nn.Module] = None,
+        name: Optional[str] = None,
+    ):
+        if module is None:
+            self.transform.data.copy_(data)
+        else:
+            # If updating the module parameter data, assumes this is also the transform
+            # data
+            if name is None:
+                raise ValueError("Name and module are required to update parma data")
+            update_parameter_data(module, data, name)
+
+    def apply(self, input_tensor: torch.Tensor, *args, **kwargs) -> torch.Tensor:
+        """
+        Apply the transform to the module
+        """
+        raise NotImplementedError()
 
-    @staticmethod
+    # TODO: potentially split into its own transform using the same shared set-up
     def inverse_apply(
-        transform: torch.Tensor, input_tensor: torch.Tensor, *args, **kwargs
+        self, input_tensor: torch.Tensor, *args, **kwargs
     ) -> torch.Tensor:
         """
         Apply the inverse operation applied by the apply method

diff --git a/src/compressed_tensors/transforms/hadamard.py b/src/compressed_tensors/transforms/hadamard.py
@@ -22,12 +22,14 @@
 
 @Transforms.register("hadamard")
 class Hadamard(Transforms):
-    def __new__(
-        cls,
+    def __init__(
+        self,
         size: int,
         empty: Optional[bool] = False,
         device: Optional[Union[str, torch.device]] = "cuda",
         dtype: Optional[torch.dtype] = torch.bfloat16,
+        *args,
+        **kwargs,
     ):
         """
         Produces a hadamard matrix with dims (size, size), with values
@@ -50,11 +52,23 @@ def __new__(
         else:
             transform = torch.empty((size, size))
 
-        return super().__new__(cls, transform=transform, device=device, dtype=dtype)
+        super().__init__(transform=transform, dtype=dtype, device=device)
+
+    def apply(
+        self,
+        input_tensor: torch.Tensor,
+        transpose: bool = False,
+        first: bool = True,
+    ) -> torch.Tensor:
+        return apply_matrix_transform(
+            transform=self.transform,
+            input_tensor=input_tensor,
+            transpose=transpose,
+            first=first,
+        )
 
-    @staticmethod
     def inverse_apply(
-        transform: torch.Tensor,
+        self,
         input_tensor: torch.Tensor,
         transpose: bool = False,
         first: bool = True,
@@ -73,10 +87,10 @@ def inverse_apply(
         # need to normalize before sending back
         return (
             apply_matrix_transform(
-                transform=transform,
+                transform=self.transform,
                 input_tensor=input_tensor,
                 transpose=transpose,
                 first=first,
             )
-            / transform.shape[0]
+            / self.transform.shape[0]
         )
diff --git a/src/compressed_tensors/transforms/matrix_multiply.py b/src/compressed_tensors/transforms/matrix_multiply.py
@@ -14,14 +14,27 @@
 
 import torch
 from compressed_tensors.transforms import Transforms
+from compressed_tensors.transforms.utils import apply_matrix_transform
 
 
 # TODO: fix loading
 @Transforms.register("matrix-mul")
 class MatrixMultiply(Transforms):
-    @staticmethod
+    def apply(
+        self,
+        input_tensor: torch.Tensor,
+        transpose: bool = False,
+        first: bool = True,
+    ) -> torch.Tensor:
+        return apply_matrix_transform(
+            transform=self.transform,
+            input_tensor=input_tensor,
+            transpose=transpose,
+            first=first,
+        )
+
     def inverse_apply(
-        transform: torch.Tensor,
+        self,
         input_tensor: torch.Tensor,
         transpose: bool = False,
         first: bool = True,
@@ -38,9 +51,8 @@ def inverse_apply(
         """
 
         # Note: not implemented for lower precision than float32
-        transform = torch.linalg.inv(transform)
         return apply_matrix_transform(
-            transform=transform,
+            transform=torch.linalg.inv(self.transform),
             input_tensor=input_tensor,
             transpose=transpose,
             first=first,

diff --git a/src/compressed_tensors/transforms/random_hadamard.py b/src/compressed_tensors/transforms/random_hadamard.py
@@ -22,8 +22,8 @@
 
 @Transforms.register("random-hadamard")
 class RandomHadamard(Transforms):
-    def __new__(
-        cls,
+    def __init__(
+        self,
         size: int,
         empty: Optional[bool] = False,
         device: Optional[Union[str, torch.device]] = "cuda",
@@ -58,11 +58,23 @@ def __new__(
         else:
             transform = torch.empty((size, size))
 
-        return super().__new__(cls, transform=transform, device=device, dtype=dtype)
+        super().__init__(transform=transform, device=device, dtype=dtype)
+
+    def apply(
+        self,
+        input_tensor: torch.Tensor,
+        transpose: bool = False,
+        first: bool = True,
+    ) -> torch.Tensor:
+        return apply_matrix_transform(
+            transform=self.transform,
+            input_tensor=input_tensor,
+            transpose=transpose,
+            first=first,
+        )
 
-    @staticmethod
     def inverse_apply(
-        transform: torch.Tensor,
+        self,
         input_tensor: torch.Tensor,
         transpose: bool = False,
         first: bool = True,
@@ -80,7 +92,7 @@ def inverse_apply(
 
         transpose = not transpose
         return apply_matrix_transform(
-            transform=transform,
+            transform=self.transform,
             input_tensor=input_tensor,
             transpose=transpose,
             first=first,

diff --git a/tests/test_transforms/test_hadamards.py b/tests/test_transforms/test_hadamards.py
@@ -51,7 +51,7 @@ def test_random_hadamard_matrix_compliant(size):
 
 @pytest.mark.parametrize(
     "size",
-    [1024, 2048],
+    [1024],
 )
 def test_deterministic_hadamard_compliant(size):
     had_matrix = deterministic_hadamard_matrix(size)

diff --git a/tests/test_transforms/test_transforms.py b/tests/test_transforms/test_transforms.py
@@ -44,19 +44,18 @@ def test_random_hadamard_transform(size: int, dtype: torch.dtype):
     # check initialize
     assert hadamard_transform is not None
 
-    val_1 = torch.round(hadamard_transform @ hadamard_transform.T)
+    val_1 = torch.round(hadamard_transform.transform @ hadamard_transform.transform.T)
 
     # output will be normalized, multiply by sqrt(size) to ensure form
-    normalized = math.sqrt(size) * hadamard_transform
+    normalized = math.sqrt(size) * hadamard_transform.transform
     # all values should be -1 or +1
     assert torch.all(torch.isin(normalized, torch.Tensor([-1, +1])))
     # check creation; HH.T == I
     assert torch.equal(val_1, torch.eye(size))
 
     # check apply
     x = torch.rand((size, size), dtype=dtype)
-    apply = Transforms.fetch_apply("random-hadamard")
-    transformed_value = apply(input_tensor=x, transform=hadamard_transform)
+    transformed_value = hadamard_transform.apply(input_tensor=x)
     # TODO: check to make sure the matrix was applied correctly?
     assert transformed_value.shape == (size, size)
 
@@ -75,16 +74,15 @@ def test_deterministic_hadamard_transform(size: int, dtype: torch.dtype):
 
     # check initialize
     assert hadamard_transform is not None
-    assert torch.all(torch.isin(hadamard_transform, torch.Tensor([-1, +1])))
+    assert torch.all(torch.isin(hadamard_transform.transform, torch.Tensor([-1, +1])))
 
-    val_1 = hadamard_transform @ hadamard_transform.T
+    val_1 = hadamard_transform.transform @ hadamard_transform.transform.T
     # check creation; HH.T == nI
     assert torch.equal(val_1 / size, torch.eye(size))
 
     # check apply
     x = torch.rand((size, size), dtype=dtype)
-    apply = Transforms.fetch_apply("hadamard")
-    transformed_value = apply(input_tensor=x, transform=hadamard_transform)
+    transformed_value = hadamard_transform.apply(input_tensor=x)
     # TODO: check to make sure the matrix was applied correctly?
     assert transformed_value.shape == (size, size)
 
@@ -103,9 +101,8 @@ def test_multiplier_transform(size: int, dtype: torch.dtype):
         "matrix-mul", transform=multiplier, device="cpu", dtype=dtype
     )
     assert multiplier_transform is not None
-    assert torch.equal(multiplier_transform, multiplier)
+    assert torch.equal(multiplier_transform.transform, multiplier)
 
     x = torch.rand((size, size), dtype=dtype)
-    apply = Transforms.fetch_apply("matrix-mul")
-    transformed_value = apply(input_tensor=x, transform=multiplier_transform)
-    assert torch.equal(transformed_value, x)
+    transformed_output = multiplier_transform.apply(x)
+    assert torch.equal(transformed_output, x)