Ruff formatting

lguerard · lguerard · commit e65413506d48 · 2025-03-12T09:28:35.000+01:00
diff --git a/sopa/cli/segmentation.py b/sopa/cli/segmentation.py
@@ -16,10 +16,16 @@ def cellpose(
         help="Names of the channels used for Cellpose. If one channel, then provide just a nucleus channel. If two channels, this is the nucleus and then the cytoplasm channel"
     ),
     flow_threshold: float = typer.Option(2, help="Cellpose `flow_threshold` parameter"),
-    cellprob_threshold: float = typer.Option(-6, help="Cellpose `cellprob_threshold` parameter"),
+    cellprob_threshold: float = typer.Option(
+        -6, help="Cellpose `cellprob_threshold` parameter"
+    ),
     model_type: str = typer.Option("cyto3", help="Name of the cellpose model"),
-    pretrained_model: str = typer.Option(None, help="Path to the pretrained model to be loaded"),
-    min_area: int = typer.Option(0, help="Minimum area (in pixels^2) for a cell to be considered as valid"),
+    pretrained_model: str = typer.Option(
+        None, help="Path to the pretrained model to be loaded"
+    ),
+    min_area: int = typer.Option(
+        0, help="Minimum area (in pixels^2) for a cell to be considered as valid"
+    ),
     clip_limit: float = typer.Option(
         0.2,
         help="Parameter for skimage.exposure.equalize_adapthist (applied before running cellpose)",
@@ -88,8 +94,12 @@ def generic_staining(
         callback=ast.literal_eval,
         help="Kwargs for the method. This should be a dictionnary, in inline string format.",
     ),
-    channels: list[str] = typer.Option(None, help="Names of the channels used for segmentation."),
-    min_area: int = typer.Option(0, help="Minimum area (in pixels^2) for a cell to be considered as valid"),
+    channels: list[str] = typer.Option(
+        None, help="Names of the channels used for segmentation."
+    ),
+    min_area: int = typer.Option(
+        0, help="Minimum area (in pixels^2) for a cell to be considered as valid"
+    ),
     clip_limit: float = typer.Option(
         0.2,
         help="Parameter for skimage.exposure.equalize_adapthist (applied before running the segmentation method)",
@@ -124,9 +134,9 @@ def generic_staining(
     """
     from sopa.segmentation import methods
 
-    assert hasattr(
-        methods, method_name
-    ), f"'{method_name}' is not a valid method builder under `sopa.segmentation.methods`"
+    assert hasattr(methods, method_name), (
+        f"'{method_name}' is not a valid method builder under `sopa.segmentation.methods`"
+    )
 
     _run_staining_segmentation(
         sdata_path,
@@ -215,7 +225,10 @@ def comseg(
         default=None,
         help="Index of the patch on which the segmentation method should be run.",
     ),
-    min_area: float = typer.Option(default=0, help="Minimum area (in micron^2) for a cell to be considered as valid"),
+    min_area: float = typer.Option(
+        default=0,
+        help="Minimum area (in micron^2) for a cell to be considered as valid",
+    ),
 ):
     """Perform ComSeg segmentation. This can be done on all patches directly, or on one individual patch."""
     from sopa.io.standardize import read_zarr_standardized
@@ -240,8 +253,13 @@ def baysor(
         default=None,
         help="Index of the patch on which the segmentation method should be run. By default, run on all patches.",
     ),
-    min_area: float = typer.Option(default=0, help="Minimum area (in micron^2) for a cell to be considered as valid"),
-    scale: float = typer.Option(default=None, help="Baysor scale parameter (for config inference)"),
+    min_area: float = typer.Option(
+        default=0,
+        help="Minimum area (in micron^2) for a cell to be considered as valid",
+    ),
+    scale: float = typer.Option(
+        default=None, help="Baysor scale parameter (for config inference)"
+    ),
 ):
     """Perform Baysor segmentation. This can be done on all patches directly, or on one individual patch."""
     import sys
@@ -253,7 +271,13 @@ def baysor(
     sdata = read_zarr_standardized(sdata_path)
 
     try:
-        baysor(sdata, config=config, min_area=min_area, patch_index=patch_index, scale=scale)
+        baysor(
+            sdata,
+            config=config,
+            min_area=min_area,
+            patch_index=patch_index,
+            scale=scale,
+        )
     except CalledProcessError as e:
         sys.exit(e.returncode)
 
@@ -321,4 +345,6 @@ def tissue(
 
     sdata = read_zarr_standardized(sdata_path)
 
-    sopa.segmentation.tissue(sdata, image_key=image_key, level=level, mode=mode, **kwargs)
+    sopa.segmentation.tissue(
+        sdata, image_key=image_key, level=level, mode=mode, **kwargs
+    )
diff --git a/sopa/patches/_factory.py b/sopa/patches/_factory.py
@@ -28,7 +28,9 @@ def make_image_patches(
     """
     image_key, _ = get_spatial_image(sdata, key=image_key, return_key=True)
 
-    patches = Patches2D(sdata, image_key, patch_width=patch_width, patch_overlap=patch_overlap)
+    patches = Patches2D(
+        sdata, image_key, patch_width=patch_width, patch_overlap=patch_overlap
+    )
 
     patches.add_shapes(key_added=key_added)
 
@@ -65,10 +67,14 @@ def make_transcript_patches(
         key_added: Optional name of the patches to be saved. By default, uses `"transcripts_patches"`.
         **kwargs: Additional arguments passed to the `OnDiskTranscriptPatches` class.
     """
-    assert not write_cells_centroids or prior_shapes_key, "write_cells_centroids argument requires prior_shapes_key"
+    assert not write_cells_centroids or prior_shapes_key, (
+        "write_cells_centroids argument requires prior_shapes_key"
+    )
 
     points_key, _ = get_spatial_element(
-        sdata.points, key=points_key or sdata.attrs.get(SopaAttrs.TRANSCRIPTS), return_key=True
+        sdata.points,
+        key=points_key or sdata.attrs.get(SopaAttrs.TRANSCRIPTS),
+        return_key=True,
     )
 
     if patch_width is None:
diff --git a/sopa/segmentation/_transcripts.py b/sopa/segmentation/_transcripts.py
@@ -56,21 +56,28 @@ def resolve(
         geo_df_new = ShapesModel.parse(geo_df_new, transformations=transformations)
 
         log.info("Aggregating transcripts on merged cells")
-        table_conflicts = count_transcripts(sdata, gene_column, geo_df=geo_df_new, points_key=points_key)
+        table_conflicts = count_transcripts(
+            sdata, gene_column, geo_df=geo_df_new, points_key=points_key
+        )
         table_conflicts.obs_names = new_ids
         table_conflicts = [table_conflicts]
 
     valid_ids = set(list(geo_df.index))
     table = anndata.concat(
-        [adata[list(valid_ids & set(list(adata.obs_names)))] for adata in adatas] + table_conflicts,
+        [adata[list(valid_ids & set(list(adata.obs_names)))] for adata in adatas]
+        + table_conflicts,
         join="outer",
     )
     table.obs.dropna(axis="columns", inplace=True)
 
     geo_df = geo_df.loc[table.obs_names]
 
-    table.obsm["spatial"] = np.array([[centroid.x, centroid.y] for centroid in geo_df.centroid])
-    table.obs[SopaKeys.REGION_KEY] = pd.Series(key_added, index=table.obs_names, dtype="category")
+    table.obsm["spatial"] = np.array(
+        [[centroid.x, centroid.y] for centroid in geo_df.centroid]
+    )
+    table.obs[SopaKeys.REGION_KEY] = pd.Series(
+        key_added, index=table.obs_names, dtype="category"
+    )
     table.obs[SopaKeys.INSTANCE_KEY] = geo_df.index
 
     table = TableModel.parse(
@@ -83,7 +90,9 @@ def resolve(
     add_spatial_element(sdata, key_added, geo_df)
     add_spatial_element(sdata, SopaKeys.TABLE, table)
 
-    log.info(f"Added sdata.tables['{SopaKeys.TABLE}'], and {len(geo_df)} cell boundaries to sdata['{key_added}']")
+    log.info(
+        f"Added sdata.tables['{SopaKeys.TABLE}'], and {len(geo_df)} cell boundaries to sdata['{key_added}']"
+    )
 
 
 def _read_one_segmented_patch(
@@ -94,13 +103,18 @@ def _read_one_segmented_patch(
 
     loom_file = directory / "segmentation_counts.loom"
     if loom_file.exists():
-        adata = anndata.io.read_loom(directory / "segmentation_counts.loom", obs_names="Name", var_names="Name")
+        adata = anndata.io.read_loom(
+            directory / "segmentation_counts.loom", obs_names="Name", var_names="Name"
+        )
     else:
         adata = anndata.io.read_h5ad(directory / "segmentation_counts.h5ad")
 
     adata.obs.rename(columns={"area": SopaKeys.ORIGINAL_AREA_OBS}, inplace=True)
 
-    cells_ids = pd.Series(adata.obs_names if id_as_string else adata.obs["CellID"].astype(int), index=adata.obs_names)
+    cells_ids = pd.Series(
+        adata.obs_names if id_as_string else adata.obs["CellID"].astype(int),
+        index=adata.obs_names,
+    )
     del adata.obs["CellID"]
 
     with open(polygon_file) as f:
@@ -114,12 +128,16 @@ def _keep_cell(ID: str | int):
 
     cells_ids = cells_ids[cells_ids.map(_keep_cell)]
 
-    geo_df = gpd.GeoDataFrame(index=cells_ids.index, geometry=[shape(polygons_dict[ID]) for ID in cells_ids])
+    geo_df = gpd.GeoDataFrame(
+        index=cells_ids.index, geometry=[shape(polygons_dict[ID]) for ID in cells_ids]
+    )
     geo_df = shapes.to_valid_polygons(geo_df)
 
     ratio_filtered = (geo_df.area <= min_area).mean()
     if ratio_filtered > 0.2:
-        log.warning(f"{ratio_filtered:.2%} of cells will be filtered due to {min_area=}")
+        log.warning(
+            f"{ratio_filtered:.2%} of cells will be filtered due to {min_area=}"
+        )
 
     geo_df = geo_df[geo_df.area > min_area]
 
@@ -131,7 +149,9 @@ def _find_polygon_file(directory: Path) -> tuple[bool, Path]:
     if old_baysor_path.exists():
         return False, old_baysor_path
     new_baysor_path = directory / "segmentation_polygons_2d.json"
-    assert new_baysor_path.exists(), f"Could not find the segmentation polygons file in {directory}"
+    assert new_baysor_path.exists(), (
+        f"Could not find the segmentation polygons file in {directory}"
+    )
     return True, new_baysor_path
 
 
@@ -163,25 +183,34 @@ def _resolve_patches(
     """
     patch_ids = [adata.obs_names for adata in adatas]
 
-    patch_indices = np.arange(len(patches_cells)).repeat([len(cells) for cells in patches_cells])
+    patch_indices = np.arange(len(patches_cells)).repeat(
+        [len(cells) for cells in patches_cells]
+    )
     cells = [cell for cells in patches_cells for cell in cells]
     segmentation_ids = np.array([cell_id for ids in patch_ids for cell_id in ids])
 
-    cells_resolved, cells_indices = solve_conflicts(cells, patch_indices=patch_indices, return_indices=True)
+    cells_resolved, cells_indices = solve_conflicts(
+        cells, patch_indices=patch_indices, return_indices=True
+    )
 
     existing_ids = segmentation_ids[cells_indices[cells_indices >= 0]]
-    new_ids = np.char.add("merged_cell_", np.arange((cells_indices == -1).sum()).astype(str))
+    new_ids = np.char.add(
+        "merged_cell_", np.arange((cells_indices == -1).sum()).astype(str)
+    )
     cells_resolved.index = np.concatenate([existing_ids, new_ids])
 
     return cells_resolved, cells_indices, new_ids
 
 
 def _check_transcript_patches(sdata: SpatialData, with_prior: bool = False):
-    assert (
-        SopaKeys.TRANSCRIPTS_PATCHES in sdata.shapes
-    ), "Transcript patches not found in the SpatialData object. Run `sopa.make_transcript_patches(...)` first."
+    assert SopaKeys.TRANSCRIPTS_PATCHES in sdata.shapes, (
+        "Transcript patches not found in the SpatialData object. Run `sopa.make_transcript_patches(...)` first."
+    )
 
-    directories = [Path(path) for path in sdata[SopaKeys.TRANSCRIPTS_PATCHES][SopaKeys.CACHE_PATH_KEY]]
+    directories = [
+        Path(path)
+        for path in sdata[SopaKeys.TRANSCRIPTS_PATCHES][SopaKeys.CACHE_PATH_KEY]
+    ]
 
     assert all(directory.exists() for directory in directories), (
         "Some patch directories are missing. "
@@ -191,7 +220,9 @@ def _check_transcript_patches(sdata: SpatialData, with_prior: bool = False):
     )
 
     if with_prior:
-        assert SopaKeys.PRIOR_SHAPES_KEY in sdata[SopaKeys.TRANSCRIPTS_PATCHES].columns, (
+        assert (
+            SopaKeys.PRIOR_SHAPES_KEY in sdata[SopaKeys.TRANSCRIPTS_PATCHES].columns
+        ), (
             "You need to create the transcript patches with a `prior_shapes_key`. "
             "For that, you can run cellpose first, and then run again `sopa.make_transcript_patches` with `prior_shapes_key='cellpose_boundaries'`"
         )
diff --git a/sopa/segmentation/methods/__init__.py b/sopa/segmentation/methods/__init__.py
@@ -1,7 +1,7 @@
-from ._cellpose import cellpose_patch, cellpose
-from ._comseg import comseg
-from ._dummy import dummy_method
 from ._baysor import baysor
+from ._cellpose import cellpose, cellpose_patch
+from ._comseg import comseg
 from ._custom import custom_staining_based
-from ._stardist import stardist_patch, stardist
+from ._dummy import dummy_method
 from ._proseg import proseg
+from ._stardist import stardist, stardist_patch