diff --git a/test/quantization/test_qat.py b/test/quantization/test_qat.py
index 3c29028898..58911683ee 100644
--- a/test/quantization/test_qat.py
+++ b/test/quantization/test_qat.py
@@ -1133,62 +1133,6 @@ def embedding_forward_4w(x: torch.Tensor, weight: torch.Tensor) -> torch.Tensor:
         baseline_out = embedding_forward_4w(x2, fq_embedding.weight)
         torch.testing.assert_close(baseline_out, fq_out, atol=0, rtol=0)
 
-    @unittest.skipIf(
-        not TORCH_VERSION_AT_LEAST_2_4, "skipping when torch version is 2.4 or lower"
-    )
-    def test_qat_prototype_bc(self):
-        """
-        Just to make sure we can import all the old prototype paths.
-        We will remove this test in the near future when we actually break BC.
-        """
-        from torchao.quantization.prototype.qat import (  # noqa: F401, F811, I001
-            disable_4w_fake_quant,
-            disable_8da4w_fake_quant,
-            enable_4w_fake_quant,
-            enable_8da4w_fake_quant,
-            ComposableQATQuantizer,
-            Int8DynActInt4WeightQATLinear,
-            Int4WeightOnlyEmbeddingQATQuantizer,
-            Int4WeightOnlyQATQuantizer,
-            Int8DynActInt4WeightQATQuantizer,
-        )
-        from torchao.quantization.prototype.qat._module_swap_api import (  # noqa: F401, F811
-            disable_4w_fake_quant_module_swap,
-            enable_4w_fake_quant_module_swap,
-            disable_8da4w_fake_quant_module_swap,
-            enable_8da4w_fake_quant_module_swap,
-            Int4WeightOnlyQATQuantizerModuleSwap,
-            Int8DynActInt4WeightQATQuantizerModuleSwap,
-        )
-        from torchao.quantization.prototype.qat.affine_fake_quantized_tensor import (  # noqa: F401, F811
-            AffineFakeQuantizedTensor,
-            to_affine_fake_quantized,
-        )
-        from torchao.quantization.prototype.qat.api import (  # noqa: F401, F811
-            ComposableQATQuantizer,
-            FakeQuantizeConfig,
-        )
-        from torchao.quantization.prototype.qat.embedding import (  # noqa: F401, F811
-            FakeQuantizedEmbedding,
-            Int4WeightOnlyEmbeddingQATQuantizer,
-            Int4WeightOnlyEmbedding,
-            Int4WeightOnlyQATEmbedding,
-        )
-        from torchao.quantization.prototype.qat.fake_quantizer import (  # noqa: F401, F811
-            FakeQuantizer,
-        )
-        from torchao.quantization.prototype.qat.linear import (  # noqa: F401, F811
-            disable_4w_fake_quant,
-            disable_8da4w_fake_quant,
-            enable_4w_fake_quant,
-            enable_8da4w_fake_quant,
-            FakeQuantizedLinear,
-            Int4WeightOnlyQATLinear,
-            Int4WeightOnlyQATQuantizer,
-            Int8DynActInt4WeightQATLinear,
-            Int8DynActInt4WeightQATQuantizer,
-        )
-
     @unittest.skipIf(
         not TORCH_VERSION_AT_LEAST_2_4, "skipping when torch version is 2.4 or lower"
     )
diff --git a/torchao/quantization/prototype/__init__.py b/torchao/quantization/prototype/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/torchao/quantization/prototype/qat/README.md b/torchao/quantization/prototype/qat/README.md
deleted file mode 100644
index dbce4d48e1..0000000000
--- a/torchao/quantization/prototype/qat/README.md
+++ /dev/null
@@ -1,3 +0,0 @@
-Note: QAT has been moved to torchao/quantization/qat.
-This is a legacy folder only for backward compatibility
-and will be removed in the near future.
diff --git a/torchao/quantization/prototype/qat/__init__.py b/torchao/quantization/prototype/qat/__init__.py
deleted file mode 100644
index aa0084c6ea..0000000000
--- a/torchao/quantization/prototype/qat/__init__.py
+++ /dev/null
@@ -1,25 +0,0 @@
-from torchao.quantization.qat import (
-    ComposableQATQuantizer,
-    Int4WeightOnlyEmbeddingQATQuantizer,
-    Int4WeightOnlyQATQuantizer,
-    Int8DynActInt4WeightQATQuantizer,
-)
-from torchao.quantization.qat.linear import (
-    Int8DynActInt4WeightQATLinear,
-    disable_4w_fake_quant,
-    disable_8da4w_fake_quant,
-    enable_4w_fake_quant,
-    enable_8da4w_fake_quant,
-)
-
-__all__ = [
-    "disable_4w_fake_quant",
-    "disable_8da4w_fake_quant",
-    "enable_4w_fake_quant",
-    "enable_8da4w_fake_quant",
-    "ComposableQATQuantizer",
-    "Int4WeightOnlyQATQuantizer",
-    "Int4WeightOnlyEmbeddingQATQuantizer",
-    "Int8DynActInt4WeightQATQuantizer",
-    "Int8DynActInt4WeightQATLinear",
-]
diff --git a/torchao/quantization/prototype/qat/_module_swap_api.py b/torchao/quantization/prototype/qat/_module_swap_api.py
deleted file mode 100644
index a0fbd877b2..0000000000
--- a/torchao/quantization/prototype/qat/_module_swap_api.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# For backward compatibility only
-# These will be removed in the future
-
-from torchao.quantization.qat.linear import (
-    Int4WeightOnlyQATQuantizer as Int4WeightOnlyQATQuantizerModuleSwap,
-)
-from torchao.quantization.qat.linear import (
-    Int8DynActInt4WeightQATQuantizer as Int8DynActInt4WeightQATQuantizerModuleSwap,
-)
-from torchao.quantization.qat.linear import (
-    disable_4w_fake_quant as disable_4w_fake_quant_module_swap,
-)
-from torchao.quantization.qat.linear import (
-    disable_8da4w_fake_quant as disable_8da4w_fake_quant_module_swap,
-)
-from torchao.quantization.qat.linear import (
-    enable_4w_fake_quant as enable_4w_fake_quant_module_swap,
-)
-from torchao.quantization.qat.linear import (
-    enable_8da4w_fake_quant as enable_8da4w_fake_quant_module_swap,
-)
-
-__all__ = [
-    "Int8DynActInt4WeightQATQuantizerModuleSwap",
-    "Int4WeightOnlyQATQuantizerModuleSwap",
-    "enable_8da4w_fake_quant_module_swap",
-    "disable_8da4w_fake_quant_module_swap",
-    "enable_4w_fake_quant_module_swap",
-    "disable_4w_fake_quant_module_swap",
-]
diff --git a/torchao/quantization/prototype/qat/affine_fake_quantized_tensor.py b/torchao/quantization/prototype/qat/affine_fake_quantized_tensor.py
deleted file mode 100644
index 20d51912f0..0000000000
--- a/torchao/quantization/prototype/qat/affine_fake_quantized_tensor.py
+++ /dev/null
@@ -1,9 +0,0 @@
-from torchao.quantization.qat.affine_fake_quantized_tensor import (
-    AffineFakeQuantizedTensor,
-    to_affine_fake_quantized,
-)
-
-__all__ = [
-    "AffineFakeQuantizedTensor",
-    "to_affine_fake_quantized",
-]
diff --git a/torchao/quantization/prototype/qat/api.py b/torchao/quantization/prototype/qat/api.py
deleted file mode 100644
index c2f1d6f8d7..0000000000
--- a/torchao/quantization/prototype/qat/api.py
+++ /dev/null
@@ -1,9 +0,0 @@
-from torchao.quantization.qat.api import (
-    ComposableQATQuantizer,
-    FakeQuantizeConfig,
-)
-
-__all__ = [
-    "ComposableQATQuantizer",
-    "FakeQuantizeConfig",
-]
diff --git a/torchao/quantization/prototype/qat/embedding.py b/torchao/quantization/prototype/qat/embedding.py
deleted file mode 100644
index 98b4d05720..0000000000
--- a/torchao/quantization/prototype/qat/embedding.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from torchao.quantization.qat.embedding import (
-    FakeQuantizedEmbedding,
-    Int4WeightOnlyEmbedding,
-    Int4WeightOnlyEmbeddingQATQuantizer,
-    Int4WeightOnlyQATEmbedding,
-)
-
-__all__ = [
-    "FakeQuantizedEmbedding",
-    "Int4WeightOnlyEmbeddingQATQuantizer",
-    "Int4WeightOnlyEmbedding",
-    "Int4WeightOnlyQATEmbedding",
-]
diff --git a/torchao/quantization/prototype/qat/fake_quantizer.py b/torchao/quantization/prototype/qat/fake_quantizer.py
deleted file mode 100644
index 3bbe1fb704..0000000000
--- a/torchao/quantization/prototype/qat/fake_quantizer.py
+++ /dev/null
@@ -1,7 +0,0 @@
-from torchao.quantization.qat.fake_quantizer import (
-    FakeQuantizer,
-)
-
-__all__ = [
-    "FakeQuantizer",
-]
diff --git a/torchao/quantization/prototype/qat/linear.py b/torchao/quantization/prototype/qat/linear.py
deleted file mode 100644
index 366ab3565c..0000000000
--- a/torchao/quantization/prototype/qat/linear.py
+++ /dev/null
@@ -1,23 +0,0 @@
-from torchao.quantization.qat.linear import (
-    FakeQuantizedLinear,
-    Int4WeightOnlyQATLinear,
-    Int4WeightOnlyQATQuantizer,
-    Int8DynActInt4WeightQATLinear,
-    Int8DynActInt4WeightQATQuantizer,
-    disable_4w_fake_quant,
-    disable_8da4w_fake_quant,
-    enable_4w_fake_quant,
-    enable_8da4w_fake_quant,
-)
-
-__all__ = [
-    "disable_4w_fake_quant",
-    "disable_8da4w_fake_quant",
-    "enable_4w_fake_quant",
-    "enable_8da4w_fake_quant",
-    "FakeQuantizedLinear",
-    "Int4WeightOnlyQATLinear",
-    "Int4WeightOnlyQATQuantizer",
-    "Int8DynActInt4WeightQATLinear",
-    "Int8DynActInt4WeightQATQuantizer",
-]