Fixing bug in matrix_multiply.py (#507)

HDCharles · web-flow · commit ed002e922417 · 2025-10-28T16:17:19.000-04:00
* Fixing bug in matrix_multiply.py

generator is created on cpu by default so trying to create a tensor on
cuda with a cpu generator would cause an error, we can just move it over
instead.

added a test to demonstrate that this works and was broken before.
added random-matrix to the other correctness tests

(test was generated with claude-code and verified by me)

Signed-off-by: HDCharles &lt;charlesdavidhernandez@gmail.com&gt;

* formatting

Signed-off-by: HDCharles &lt;charlesdavidhernandez@gmail.com&gt;

* handing set default device

Signed-off-by: HDCharles &lt;charlesdavidhernandez@gmail.com&gt;

* formatting

Summary

Signed-off-by: HDCharles &lt;charlesdavidhernandez@gmail.com&gt;

---------

Signed-off-by: HDCharles &lt;charlesdavidhernandez@gmail.com&gt;
diff --git a/src/compressed_tensors/transform/factory/matrix_multiply.py b/src/compressed_tensors/transform/factory/matrix_multiply.py
@@ -68,8 +68,8 @@ def _create_weight(self, size: int, device: device, precision: dtype) -> Paramet
             (size, size),
             generator=self.generator,
             dtype=precision,
-            device=device,
-        )
+            device=self.generator.device,
+        ).to(device)
         return Parameter(data, requires_grad=self.scheme.requires_grad)
 
     def _create_inverse(self, weight: Parameter) -> Parameter:
diff --git a/tests/test_transform/factory/test_correctness.py b/tests/test_transform/factory/test_correctness.py
@@ -26,7 +26,7 @@
 from tests.testing_utils import requires_accelerate, requires_gpu
 
 
-@pytest.mark.parametrize("type", ("hadamard", "random-hadamard"))
+@pytest.mark.parametrize("type", ("hadamard", "random-hadamard", "random-matrix"))
 @pytest.mark.parametrize("randomize", (True, False))
 @pytest.mark.parametrize("head_dim", (None, 2, 4))
 @pytest.mark.parametrize("input_batch_size", (1, 5, 17))
@@ -57,7 +57,7 @@ def test_correctness_linear(type, randomize, head_dim, input_batch_size):
     assert torch.allclose(true_output, output, atol=1e-5, rtol=0.0)
 
 
-@pytest.mark.parametrize("type", ("hadamard", "random-hadamard"))
+@pytest.mark.parametrize("type", ("hadamard", "random-hadamard", "random-matrix"))
 @pytest.mark.parametrize("randomize", (True, False))
 @pytest.mark.parametrize("embed_loc", ("weight_output", "output"))
 @pytest.mark.parametrize("linear_loc", ("input", "weight_input"))
@@ -89,7 +89,7 @@ def test_correctness_embedding(type, randomize, embed_loc, linear_loc):
     assert torch.allclose(true_output, output, atol=1e-5, rtol=0.0)
 
 
-@pytest.mark.parametrize("type", ("hadamard", "random-hadamard"))
+@pytest.mark.parametrize("type", ("hadamard", "random-hadamard", "random-matrix"))
 @pytest.mark.parametrize("randomize", (True, False))
 @pytest.mark.parametrize("input_batch_size", (1, 5, 17))
 def test_correctness_model(
@@ -121,14 +121,14 @@ def test_correctness_model(
 
 @requires_gpu
 @requires_accelerate()
-@pytest.mark.parametrize("type", ("hadamard", "random-hadamard"))
+@pytest.mark.parametrize("type", ("hadamard", "random-hadamard", "random-matrix"))
 @pytest.mark.parametrize("randomize", (True, False))
 @pytest.mark.parametrize("input_batch_size", (1, 5, 17))
 def test_correctness_model_offload(type, randomize, input_batch_size, model_apply):
     test_correctness_model(type, randomize, input_batch_size, model_apply, offload=True)
 
 
-@pytest.mark.parametrize("type", ("hadamard", "random-hadamard"))
+@pytest.mark.parametrize("type", ("hadamard", "random-hadamard", "random-matrix"))
 @pytest.mark.parametrize("randomize", (True, False))
 @pytest.mark.parametrize("head_dim", (4, 8))
 @pytest.mark.parametrize("input_batch_size", (1, 5, 17))
@@ -164,3 +164,34 @@ def test_correctness_attention_heads(type, randomize, head_dim, input_batch_size
 
     output = attention(input)
     assert torch.allclose(true_output, output, atol=1e-5, rtol=0.0)
+
+
+@requires_gpu
+@pytest.mark.parametrize("cuda_default", (True, False))
+def test_random_matrix_device_handling(cuda_default):
+    """
+    Test that random-matrix transforms can be created
+    on CUDA.
+    """
+    seed = 0
+    size = (4, 8)
+
+    cur_default = torch.get_default_device()
+    if cuda_default:
+        torch.set_default_device("cuda")
+    module = torch.nn.Linear(*size, bias=False).cuda()
+    scheme = TransformScheme(type="random-matrix", randomize=True)
+    factory = TransformFactory.from_scheme(scheme, name="", seed=seed)
+
+    # Create transforms - this should work despite CPU generator and CUDA module
+    input_tfm = factory.create_transform(
+        module, TransformArgs(targets="Linear", location="input", inverse=True)
+    )
+
+    # Verify transforms work correctly on CUDA
+    input = torch.rand((5, 3, size[0])).cuda()
+    input_tfm(input)
+
+    # Verify that transforms were created on CUDA
+    assert input_tfm.weight.device.type == "cuda"
+    torch.set_default_device(cur_default)