✅ Update script for annotation store

shaneahmed · shaneahmed · commit cf5b50e247a7 · 2025-03-18T14:05:28.000Z
diff --git a/tests/engines/test_semantic_segmentor.py b/tests/engines/test_semantic_segmentor.py
@@ -85,3 +85,10 @@ def test_semantic_segmentor_patches(
     output = zarr.open(output, mode="r")
     assert 0.24 < np.mean(output["predictions"][:]) < 0.25
     assert "probabilities" not in output.keys()  # noqa: SIM118
+
+
+# def test_hovernet_dat() -> None:
+#     from tiatoolbox.utils.misc import store_from_dat
+#     from pathlib import Path
+#     path_to_file = Path.cwd().parent.parent / "output" / "0.dat"
+#     out = store_from_dat(path_to_file, scale_factor=(1.0, 1.0))
diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -1654,7 +1654,7 @@ def test_patch_pred_store() -> None:
         "other": "other",
     }
 
-    store = misc.dict_to_store(patch_output, (1.0, 1.0))
+    store = misc.dict_to_store_patch_predictions(patch_output, (1.0, 1.0))
 
     # Check that it is an SQLiteStore containing the expected annotations
     assert isinstance(store, SQLiteStore)
@@ -1667,15 +1667,15 @@ def test_patch_pred_store() -> None:
     patch_output.pop("coordinates")
     # check correct error is raised if coordinates are missing
     with pytest.raises(ValueError, match="coordinates"):
-        misc.dict_to_store(patch_output, (1.0, 1.0))
+        misc.dict_to_store_patch_predictions(patch_output, (1.0, 1.0))
 
     patch_output = {
         "predictions": [1, 0, 1],
         "coordinates": [(0, 0, 1, 1), (1, 1, 2, 2), (2, 2, 3, 3)],
         "other": "other",
     }
 
-    store = misc.dict_to_store(patch_output, (1.0, 1.0))
+    store = misc.dict_to_store_patch_predictions(patch_output, (1.0, 1.0))
 
     # Check that it is an SQLiteStore containing the expected annotations
     assert isinstance(store, SQLiteStore)
@@ -1692,7 +1692,7 @@ def test_patch_pred_store_cdict() -> None:
         "other": "other",
     }
     class_dict = {0: "class0", 1: "class1"}
-    store = misc.dict_to_store(patch_output, (1.0, 1.0), class_dict=class_dict)
+    store = misc.dict_to_store_patch_predictions(patch_output, (1.0, 1.0), class_dict=class_dict)
 
     # Check that it is an SQLiteStore containing the expected annotations
     assert isinstance(store, SQLiteStore)
@@ -1713,7 +1713,7 @@ def test_patch_pred_store_sf() -> None:
         "probabilities": [[0.1, 0.9], [0.9, 0.1], [0.4, 0.6]],
         "labels": [1, 0, 1],
     }
-    store = misc.dict_to_store(patch_output, (2.0, 2.0))
+    store = misc.dict_to_store_patch_predictions(patch_output, (2.0, 2.0))
 
     # Check that its an SQLiteStore containing the expected annotations
     assert isinstance(store, SQLiteStore)
@@ -1770,7 +1770,7 @@ def test_patch_pred_store_persist(tmp_path: pytest.TempPathFactory) -> None:
     }
     save_path = tmp_path / "patch_output" / "output.db"
 
-    store_path = misc.dict_to_store(patch_output, (1.0, 1.0), save_path=save_path)
+    store_path = misc.dict_to_store_patch_predictions(patch_output, (1.0, 1.0), save_path=save_path)
 
     print("Annotation store path: ", store_path)
     assert Path.exists(store_path), "Annotation Store output file does not exist"
@@ -1788,7 +1788,7 @@ def test_patch_pred_store_persist(tmp_path: pytest.TempPathFactory) -> None:
     patch_output.pop("coordinates")
     # check correct error is raised if coordinates are missing
     with pytest.raises(ValueError, match="coordinates"):
-        misc.dict_to_store(patch_output, (1.0, 1.0))
+        misc.dict_to_store_patch_predictions(patch_output, (1.0, 1.0))
 
 
 def test_patch_pred_store_persist_ext(tmp_path: pytest.TempPathFactory) -> None:
@@ -1804,7 +1804,7 @@ def test_patch_pred_store_persist_ext(tmp_path: pytest.TempPathFactory) -> None:
     # sends the path of a jpeg source image, expects .db file in the same directory
     save_path = tmp_path / "patch_output" / "output.jpeg"
 
-    store_path = misc.dict_to_store(patch_output, (1.0, 1.0), save_path=save_path)
+    store_path = misc.dict_to_store_patch_predictions(patch_output, (1.0, 1.0), save_path=save_path)
 
     print("Annotation store path: ", store_path)
     assert Path.exists(store_path), "Annotation Store output file does not exist"
@@ -1822,7 +1822,7 @@ def test_patch_pred_store_persist_ext(tmp_path: pytest.TempPathFactory) -> None:
     patch_output.pop("coordinates")
     # check correct error is raised if coordinates are missing
     with pytest.raises(ValueError, match="coordinates"):
-        misc.dict_to_store(patch_output, (1.0, 1.0))
+        misc.dict_to_store_patch_predictions(patch_output, (1.0, 1.0))
 
 
 def test_torch_compile_already_compiled() -> None:
diff --git a/tiatoolbox/models/engine/engine_abc.py b/tiatoolbox/models/engine/engine_abc.py
@@ -21,7 +21,7 @@
 from tiatoolbox.models.dataset.dataset_abc import PatchDataset, WSIPatchDataset
 from tiatoolbox.models.models_abc import load_torch_model
 from tiatoolbox.utils.misc import (
-    dict_to_store,
+    dict_to_store_patch_predictions,
     dict_to_zarr,
     write_to_zarr_in_cache_mode,
 )
@@ -633,7 +633,7 @@ def save_predictions(
         processed_predictions: dict | Path,
         output_type: str,
         save_dir: Path | None = None,
-        **kwargs: dict,
+        **kwargs: EngineABCRunParams,
     ) -> dict | AnnotationStore | Path:
         """Save model predictions.
 
@@ -679,7 +679,7 @@ def save_predictions(
                 processed_predictions_path = processed_predictions
                 processed_predictions = zarr.open(processed_predictions, mode="r")
 
-            out_file = dict_to_store(
+            out_file = dict_to_store_patch_predictions(
                 processed_predictions,
                 scale_factor,
                 class_dict,
diff --git a/tiatoolbox/models/engine/semantic_segmentor_new.py b/tiatoolbox/models/engine/semantic_segmentor_new.py
@@ -2,11 +2,14 @@
 
 from __future__ import annotations
 
+import shutil
 from typing import TYPE_CHECKING
 
+import zarr
 from typing_extensions import Unpack
 
 from .patch_predictor import PatchPredictor, PredictorRunParams
+from ...utils.misc import dict_to_zarr, dict_to_store_semantic_segmentor
 
 if TYPE_CHECKING:  # pragma: no cover
     import os
@@ -297,6 +300,78 @@ def __init__(
             verbose=verbose,
         )
 
+    def save_predictions(
+        self: PatchPredictor,
+        processed_predictions: dict | Path,
+        output_type: str,
+        save_dir: Path | None = None,
+        **kwargs: SemanticSegmentorRunParams,
+    ) -> dict | AnnotationStore | Path:
+        """Save semantic segmentation predictions to disk.
+
+        Args:
+            processed_predictions (dict | Path):
+                A dictionary or path to zarr with model prediction information.
+            save_dir (Path):
+                Optional output path to directory to save the patch dataset output to a
+                `.zarr` or `.db` file, provided `patch_mode` is True. If the
+                `patch_mode` is False then `save_dir` is required.
+            output_type (str):
+                The desired output type for resulting patch dataset.
+            **kwargs (SemanticSegmentorRunParams):
+                Keyword Args required to save the output.
+
+        Returns:
+            dict or Path or :class:`AnnotationStore`:
+                If the `output_type` is "AnnotationStore", the function returns
+                the patch predictor output as an SQLiteStore containing Annotations
+                for each or the Path to a `.db` file depending on whether a
+                save_dir Path is provided. Otherwise, the function defaults to
+                returning patch predictor output, either as a dict or the Path to a
+                `.zarr` file depending on whether a save_dir Path is provided.
+
+        """
+        if (
+            self.cache_mode or not save_dir
+        ) and output_type.lower() != "annotationstore":
+            return processed_predictions
+
+        save_path = Path(kwargs.get("output_file", save_dir / "output.db"))
+
+        if output_type.lower() == "annotationstore":
+            # scale_factor set from kwargs
+            scale_factor = kwargs.get("scale_factor", (1.0, 1.0))
+            # class_dict set from kwargs
+            class_dict = kwargs.get("class_dict")
+
+            processed_predictions_path: str | Path | None = None
+
+            # Need to add support for zarr conversion.
+            if self.cache_mode:
+                processed_predictions_path = processed_predictions
+                processed_predictions = zarr.open(processed_predictions, mode="r")
+
+            out_file = dict_to_store_semantic_segmentor(
+                processed_predictions,
+                scale_factor,
+                class_dict,
+                save_path,
+            )
+            if processed_predictions_path is not None:
+                shutil.rmtree(processed_predictions_path)
+
+            return out_file
+
+        return (
+            dict_to_zarr(
+                processed_predictions,
+                save_path,
+                **kwargs,
+            )
+            if isinstance(processed_predictions, dict)
+            else processed_predictions
+        )
+
     def run(
         self: SemanticSegmentor,
         images: list[os | Path | WSIReader] | np.ndarray,
diff --git a/tiatoolbox/utils/misc.py b/tiatoolbox/utils/misc.py
@@ -1228,6 +1228,116 @@ def patch_predictions_as_annotations(
     return annotations
 
 
+def dict_to_store_semantic_segmentor(
+    patch_output: dict | zarr.group,
+    scale_factor: tuple[float, float],
+    class_dict: dict | None = None,
+    save_path: Path | None = None,
+) -> AnnotationStore | Path:
+    """Converts output of TIAToolbox SemanticSegmentor engine to AnnotationStore.
+
+    Args:
+        patch_output (dict | zarr.Group):
+            A dictionary with "probabilities", "predictions", and "labels" keys.
+        scale_factor (tuple[float, float]):
+            The scale factor to use when loading the
+            annotations. All coordinates will be multiplied by this factor to allow
+            conversion of annotations saved at non-baseline resolution to baseline.
+            Should be model_mpp/slide_mpp.
+        class_dict (dict):
+            Optional dictionary mapping class indices to class names.
+        save_path (str or Path):
+            Optional Output directory to save the Annotation
+            Store results.
+
+    Returns:
+        (SQLiteStore or Path):
+            An SQLiteStore containing Annotations for each patch
+            or Path to file storing SQLiteStore containing Annotations
+            for each patch.
+
+    """
+    preds = patch_output["predictions"]
+    layer_list = np.unique(preds)
+    layer_list = np.delete(layer_list, np.where(layer_list == 0))
+    layer_info_dict = {}
+    count = 1
+
+    for type_class in layer_list:
+        layer = np.where(preds == type_class, 1, 0).astype("uint8")
+        contours, _ = cv2.findContours(
+            layer.astype("uint8"),
+            cv2.RETR_TREE,
+            cv2.CHAIN_APPROX_NONE,
+        )
+        for layer in contours:
+            coords = layer[:, 0, :]
+            layer_info_dict[count] = {
+                "contours": coords,
+                "type": class_dict[type_class],
+            }
+            count += 1
+
+    # return layer_info_dict
+
+    # if "coordinates" not in patch_output:
+    #     # we cant create annotations without coordinates
+    #     msg = "Patch output must contain coordinates."
+    #     raise ValueError(msg)
+    #
+    # # get relevant keys
+    # class_probs = get_zarr_array(patch_output.get("probabilities", []))
+    # preds = get_zarr_array(patch_output.get("predictions", []))
+    #
+    # patch_coords = np.array(patch_output.get("coordinates", []))
+    # if not np.all(np.array(scale_factor) == 1):
+    #     patch_coords = patch_coords * (np.tile(scale_factor, 2))  # to baseline mpp
+    # patch_coords = patch_coords.astype(float)
+    # labels = patch_output.get("labels", [])
+    # # get classes to consider
+    # if len(class_probs) == 0:
+    #     classes_predicted = np.unique(preds).tolist()
+    # else:
+    #     classes_predicted = range(len(class_probs[0]))
+    #
+    # if class_dict is None:
+    #     # if no class dict create a default one
+    #     if len(class_probs) == 0:
+    #         class_dict = {i: i for i in np.unique(np.append(preds, labels)).tolist()}
+    #     else:
+    #         class_dict = {i: i for i in range(len(class_probs[0]))}
+    #
+    # # find what keys we need to save
+    # keys = ["predictions"]
+    # keys = keys + [key for key in ["probabilities", "labels"] if key in patch_output]
+    #
+    # # put patch predictions into a store
+    # annotations = patch_predictions_as_annotations(
+    #     preds,
+    #     keys,
+    #     class_dict,
+    #     class_probs,
+    #     patch_coords,
+    #     classes_predicted,
+    #     labels,
+    # )
+    #
+    # store = SQLiteStore()
+    # _ = store.append_many(annotations, [str(i) for i in range(len(annotations))])
+    #
+    # # if a save director is provided, then dump store into a file
+    # if save_path:
+    #     # ensure parent directory exists
+    #     save_path.parent.absolute().mkdir(parents=True, exist_ok=True)
+    #     # ensure proper db extension
+    #     save_path = save_path.parent.absolute() / (save_path.stem + ".db")
+    #     store.dump(save_path)
+    #     return save_path
+    #
+    # return store
+
+
+
 def get_zarr_array(zarr_array: zarr.core.Array | np.ndarray | list) -> np.ndarray:
     """Converts a zarr array into a numpy array."""
     if isinstance(zarr_array, zarr.core.Array):
@@ -1236,13 +1346,13 @@ def get_zarr_array(zarr_array: zarr.core.Array | np.ndarray | list) -> np.ndarra
     return np.array(zarr_array).astype(float)
 
 
-def dict_to_store(
+def dict_to_store_patch_predictions(
     patch_output: dict | zarr.group,
     scale_factor: tuple[float, float],
     class_dict: dict | None = None,
     save_path: Path | None = None,
 ) -> AnnotationStore | Path:
-    """Converts (and optionally saves) output of TIAToolbox engines as AnnotationStore.
+    """Converts output of TIAToolbox PatchPredictor engines to AnnotationStore.
 
     Args:
         patch_output (dict | zarr.Group):