Adding E2E unit tests for KVTensorMetaData class (#4298)

Raahul Kalyaan Jakka · facebook-github-bot · commit 0b34b6d129bc · 2025-06-11T15:22:34.000-07:00
Summary: Pull Request resolved: #4298 X-link: facebookresearch/FBGEMM#1372 Context: In the Publish Component, we have aligned to not use the conventional serialization and deserialization. We need to create a KVTensorMetaData object to pass data to the publish component In this Diff: 1. We are adding a unit test for the KVTensorMetaData consistency a. Serialization of KVT data b. Construction of KVTensorMetaData object c. Creation of ReadOnlyEmbeddingKVDB object d. Narrow() Data Consistency from PMT and KVTensorMetaData Reviewed By: duduyi2013 Differential Revision: D76234751
diff --git a/fbgemm_gpu/test/tbe/ssd/kv_backend_test.py b/fbgemm_gpu/test/tbe/ssd/kv_backend_test.py
@@ -617,3 +617,79 @@ def test_rocksdb_se_de_testing(
             t1 = pmt.wrapped.narrow(0, 0, Es[i])
             t2 = lo.wrapped.narrow(0, 0, Es[i])
             assert torch.equal(t1, t2)
+
+    @given(
+        T=st.integers(min_value=3, max_value=3),
+        D=st.integers(min_value=1, max_value=1),
+        log_E=st.integers(min_value=1, max_value=1),
+        mixed=st.booleans(),
+        weights_precision=st.sampled_from([SparseType.FP32, SparseType.FP16]),
+    )
+    @settings(**default_settings)
+    def test_rocksdb_kv_metadata_testing(
+        self,
+        T: int,
+        D: int,
+        log_E: int,
+        mixed: bool,
+        weights_precision: SparseType,
+    ) -> None:
+
+        # Generating a TBE with 3 tables, each with 1 feature and 1 embedding
+        emb, Es, Ds = self.generate_fbgemm_kv_tbe(T, D, log_E, weights_precision, mixed)
+
+        total_E = sum(Es)
+        indices = torch.as_tensor(
+            np.random.choice(total_E, replace=False, size=(total_E,)), dtype=torch.int64
+        )
+        indices = torch.arange(total_E, dtype=torch.int64)
+
+        weights = torch.randn(
+            total_E, emb.cache_row_dim, dtype=weights_precision.as_dtype()
+        )
+
+        count = torch.as_tensor([total_E])
+
+        # Set the weights and indices into the TBE
+        emb.ssd_db.set(indices, weights, count)
+        emb.ssd_db.wait_util_filling_work_done()
+
+        # Flushing data from the TBE cache to the SSD
+        emb.ssd_db.flush()
+
+        # Creating a hard_link_snapshot (i.e., rocksdb checkpoint)
+        emb.ssd_db.create_rocksdb_hard_link_snapshot(0)
+        pmts = emb.split_embedding_weights(no_snapshot=False)
+
+        # Iterate through the partially materialized tensors
+        # Serialize them using pickle.dumps and then deserialize them using pickle.loads
+        # Provides us a KVTensor backed by ReadOnlyEmbeddingKVDB that can be accessed by multiple processes
+        # Read through the KVTensor and verify that the data is correct with the original weights
+        for i, pmt in enumerate(pmts[0]):
+            if type(pmt) is torch.Tensor:
+                continue
+            kv_metadata = pmt.generate_kvtensor_metadata
+
+            readonly_rdb = torch.classes.fbgemm.ReadOnlyEmbeddingKVDB(
+                kv_metadata.checkpoint_paths,
+                kv_metadata.tbe_uuid,
+                kv_metadata.rdb_num_shards,
+                kv_metadata.rdb_num_threads,
+                kv_metadata.max_D,
+            )
+
+            if kv_metadata.dtype == 5:
+                d_type = torch.float16
+            else:
+                d_type = torch.float32
+
+            t = torch.empty(Es[i], kv_metadata.max_D, dtype=d_type)
+            readonly_rdb.get_range_from_rdb_checkpoint(
+                t, 0 + kv_metadata.table_offset, Es[i], 0  # offset
+            )
+            # reading from readonly_rdb:
+            t1 = t.narrow(1, 0, kv_metadata.table_shape[1])
+            # reading from pmt:
+            t2 = pmt.wrapped.narrow(0, 0, Es[i])
+
+            assert torch.equal(t1, t2)