diff --git a/test/quantization/test_qat.py b/test/quantization/test_qat.py index 3c29028898..58911683ee 100644 --- a/test/quantization/test_qat.py +++ b/test/quantization/test_qat.py @@ -1133,62 +1133,6 @@ def embedding_forward_4w(x: torch.Tensor, weight: torch.Tensor) -> torch.Tensor: baseline_out = embedding_forward_4w(x2, fq_embedding.weight) torch.testing.assert_close(baseline_out, fq_out, atol=0, rtol=0) - @unittest.skipIf( - not TORCH_VERSION_AT_LEAST_2_4, "skipping when torch version is 2.4 or lower" - ) - def test_qat_prototype_bc(self): - """ - Just to make sure we can import all the old prototype paths. - We will remove this test in the near future when we actually break BC. - """ - from torchao.quantization.prototype.qat import ( # noqa: F401, F811, I001 - disable_4w_fake_quant, - disable_8da4w_fake_quant, - enable_4w_fake_quant, - enable_8da4w_fake_quant, - ComposableQATQuantizer, - Int8DynActInt4WeightQATLinear, - Int4WeightOnlyEmbeddingQATQuantizer, - Int4WeightOnlyQATQuantizer, - Int8DynActInt4WeightQATQuantizer, - ) - from torchao.quantization.prototype.qat._module_swap_api import ( # noqa: F401, F811 - disable_4w_fake_quant_module_swap, - enable_4w_fake_quant_module_swap, - disable_8da4w_fake_quant_module_swap, - enable_8da4w_fake_quant_module_swap, - Int4WeightOnlyQATQuantizerModuleSwap, - Int8DynActInt4WeightQATQuantizerModuleSwap, - ) - from torchao.quantization.prototype.qat.affine_fake_quantized_tensor import ( # noqa: F401, F811 - AffineFakeQuantizedTensor, - to_affine_fake_quantized, - ) - from torchao.quantization.prototype.qat.api import ( # noqa: F401, F811 - ComposableQATQuantizer, - FakeQuantizeConfig, - ) - from torchao.quantization.prototype.qat.embedding import ( # noqa: F401, F811 - FakeQuantizedEmbedding, - Int4WeightOnlyEmbeddingQATQuantizer, - Int4WeightOnlyEmbedding, - Int4WeightOnlyQATEmbedding, - ) - from torchao.quantization.prototype.qat.fake_quantizer import ( # noqa: F401, F811 - FakeQuantizer, - ) - from torchao.quantization.prototype.qat.linear import ( # noqa: F401, F811 - disable_4w_fake_quant, - disable_8da4w_fake_quant, - enable_4w_fake_quant, - enable_8da4w_fake_quant, - FakeQuantizedLinear, - Int4WeightOnlyQATLinear, - Int4WeightOnlyQATQuantizer, - Int8DynActInt4WeightQATLinear, - Int8DynActInt4WeightQATQuantizer, - ) - @unittest.skipIf( not TORCH_VERSION_AT_LEAST_2_4, "skipping when torch version is 2.4 or lower" ) diff --git a/torchao/quantization/prototype/__init__.py b/torchao/quantization/prototype/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/torchao/quantization/prototype/qat/README.md b/torchao/quantization/prototype/qat/README.md deleted file mode 100644 index dbce4d48e1..0000000000 --- a/torchao/quantization/prototype/qat/README.md +++ /dev/null @@ -1,3 +0,0 @@ -Note: QAT has been moved to torchao/quantization/qat. -This is a legacy folder only for backward compatibility -and will be removed in the near future. diff --git a/torchao/quantization/prototype/qat/__init__.py b/torchao/quantization/prototype/qat/__init__.py deleted file mode 100644 index aa0084c6ea..0000000000 --- a/torchao/quantization/prototype/qat/__init__.py +++ /dev/null @@ -1,25 +0,0 @@ -from torchao.quantization.qat import ( - ComposableQATQuantizer, - Int4WeightOnlyEmbeddingQATQuantizer, - Int4WeightOnlyQATQuantizer, - Int8DynActInt4WeightQATQuantizer, -) -from torchao.quantization.qat.linear import ( - Int8DynActInt4WeightQATLinear, - disable_4w_fake_quant, - disable_8da4w_fake_quant, - enable_4w_fake_quant, - enable_8da4w_fake_quant, -) - -__all__ = [ - "disable_4w_fake_quant", - "disable_8da4w_fake_quant", - "enable_4w_fake_quant", - "enable_8da4w_fake_quant", - "ComposableQATQuantizer", - "Int4WeightOnlyQATQuantizer", - "Int4WeightOnlyEmbeddingQATQuantizer", - "Int8DynActInt4WeightQATQuantizer", - "Int8DynActInt4WeightQATLinear", -] diff --git a/torchao/quantization/prototype/qat/_module_swap_api.py b/torchao/quantization/prototype/qat/_module_swap_api.py deleted file mode 100644 index a0fbd877b2..0000000000 --- a/torchao/quantization/prototype/qat/_module_swap_api.py +++ /dev/null @@ -1,30 +0,0 @@ -# For backward compatibility only -# These will be removed in the future - -from torchao.quantization.qat.linear import ( - Int4WeightOnlyQATQuantizer as Int4WeightOnlyQATQuantizerModuleSwap, -) -from torchao.quantization.qat.linear import ( - Int8DynActInt4WeightQATQuantizer as Int8DynActInt4WeightQATQuantizerModuleSwap, -) -from torchao.quantization.qat.linear import ( - disable_4w_fake_quant as disable_4w_fake_quant_module_swap, -) -from torchao.quantization.qat.linear import ( - disable_8da4w_fake_quant as disable_8da4w_fake_quant_module_swap, -) -from torchao.quantization.qat.linear import ( - enable_4w_fake_quant as enable_4w_fake_quant_module_swap, -) -from torchao.quantization.qat.linear import ( - enable_8da4w_fake_quant as enable_8da4w_fake_quant_module_swap, -) - -__all__ = [ - "Int8DynActInt4WeightQATQuantizerModuleSwap", - "Int4WeightOnlyQATQuantizerModuleSwap", - "enable_8da4w_fake_quant_module_swap", - "disable_8da4w_fake_quant_module_swap", - "enable_4w_fake_quant_module_swap", - "disable_4w_fake_quant_module_swap", -] diff --git a/torchao/quantization/prototype/qat/affine_fake_quantized_tensor.py b/torchao/quantization/prototype/qat/affine_fake_quantized_tensor.py deleted file mode 100644 index 20d51912f0..0000000000 --- a/torchao/quantization/prototype/qat/affine_fake_quantized_tensor.py +++ /dev/null @@ -1,9 +0,0 @@ -from torchao.quantization.qat.affine_fake_quantized_tensor import ( - AffineFakeQuantizedTensor, - to_affine_fake_quantized, -) - -__all__ = [ - "AffineFakeQuantizedTensor", - "to_affine_fake_quantized", -] diff --git a/torchao/quantization/prototype/qat/api.py b/torchao/quantization/prototype/qat/api.py deleted file mode 100644 index c2f1d6f8d7..0000000000 --- a/torchao/quantization/prototype/qat/api.py +++ /dev/null @@ -1,9 +0,0 @@ -from torchao.quantization.qat.api import ( - ComposableQATQuantizer, - FakeQuantizeConfig, -) - -__all__ = [ - "ComposableQATQuantizer", - "FakeQuantizeConfig", -] diff --git a/torchao/quantization/prototype/qat/embedding.py b/torchao/quantization/prototype/qat/embedding.py deleted file mode 100644 index 98b4d05720..0000000000 --- a/torchao/quantization/prototype/qat/embedding.py +++ /dev/null @@ -1,13 +0,0 @@ -from torchao.quantization.qat.embedding import ( - FakeQuantizedEmbedding, - Int4WeightOnlyEmbedding, - Int4WeightOnlyEmbeddingQATQuantizer, - Int4WeightOnlyQATEmbedding, -) - -__all__ = [ - "FakeQuantizedEmbedding", - "Int4WeightOnlyEmbeddingQATQuantizer", - "Int4WeightOnlyEmbedding", - "Int4WeightOnlyQATEmbedding", -] diff --git a/torchao/quantization/prototype/qat/fake_quantizer.py b/torchao/quantization/prototype/qat/fake_quantizer.py deleted file mode 100644 index 3bbe1fb704..0000000000 --- a/torchao/quantization/prototype/qat/fake_quantizer.py +++ /dev/null @@ -1,7 +0,0 @@ -from torchao.quantization.qat.fake_quantizer import ( - FakeQuantizer, -) - -__all__ = [ - "FakeQuantizer", -] diff --git a/torchao/quantization/prototype/qat/linear.py b/torchao/quantization/prototype/qat/linear.py deleted file mode 100644 index 366ab3565c..0000000000 --- a/torchao/quantization/prototype/qat/linear.py +++ /dev/null @@ -1,23 +0,0 @@ -from torchao.quantization.qat.linear import ( - FakeQuantizedLinear, - Int4WeightOnlyQATLinear, - Int4WeightOnlyQATQuantizer, - Int8DynActInt4WeightQATLinear, - Int8DynActInt4WeightQATQuantizer, - disable_4w_fake_quant, - disable_8da4w_fake_quant, - enable_4w_fake_quant, - enable_8da4w_fake_quant, -) - -__all__ = [ - "disable_4w_fake_quant", - "disable_8da4w_fake_quant", - "enable_4w_fake_quant", - "enable_8da4w_fake_quant", - "FakeQuantizedLinear", - "Int4WeightOnlyQATLinear", - "Int4WeightOnlyQATQuantizer", - "Int8DynActInt4WeightQATLinear", - "Int8DynActInt4WeightQATQuantizer", -]