Skip to content

Commit f152ab1

Browse files
Merge pull request #111 from gustaveroussy/dev
Dev
2 parents 18da9a2 + d440a10 commit f152ab1

File tree

11 files changed

+158
-23
lines changed

11 files changed

+158
-23
lines changed

CHANGELOG.md

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,26 @@
11
## [1.x.x] - 2024-xx-xx
22

3+
## [1.1.4] - 2024-08-21
4+
5+
### Hotfix
6+
- Fixed Baysor issue on MERSCOPE data with the Vizgen prior
7+
- Fix patch-maker issue due to new API temporary implementation
8+
9+
10+
## [1.1.3] - 2024-08-18
11+
312
### Fix
413
- Fixed aggregation issue when gene names are `NaN` or `None` (#101)
514
- Fix Xenium reader for old Xenium data format (#105)
615

716
### Added
817
- Support multipolygons in ROI rasterization
918
- Added bins aggregation
10-
- Added Visium HD reader (tutorial comming soon)
19+
- Added Visium HD reader (tutorial coming soon)
1120

1221
### Changed
1322
- Import submodules in init (segmentation, io, utils)
14-
- API simplification in progress (new API + tutorial comming soon)
23+
- API simplification in progress (new API + tutorial coming soon)
1524

1625
## [1.1.2] - 2024-07-24
1726

docs/api/segmentation/aggregate.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
::: sopa.segmentation.aggregate.overlay_segmentation
1+
::: sopa.segmentation.aggregation.overlay_segmentation
22

3-
::: sopa.segmentation.aggregate.average_channels
3+
::: sopa.segmentation.aggregation.average_channels
44

5-
::: sopa.segmentation.aggregate._average_channels_aligned
5+
::: sopa.segmentation.aggregation._average_channels_aligned
66

7-
::: sopa.segmentation.aggregate.count_transcripts
7+
::: sopa.segmentation.aggregation.count_transcripts
88

9-
::: sopa.segmentation.aggregate._count_transcripts_aligned
9+
::: sopa.segmentation.aggregation._count_transcripts_aligned
1010

11-
::: sopa.segmentation.aggregate.Aggregator
11+
::: sopa.segmentation.aggregation.Aggregator

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "sopa"
3-
version = "1.1.3"
3+
version = "1.1.4"
44
description = "Spatial-omics pipeline and analysis"
55
documentation = "https://gustaveroussy.github.io/sopa"
66
homepage = "https://gustaveroussy.github.io/sopa"

sopa/_constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ class ROI:
5858

5959
class SopaFiles:
6060
SOPA_CACHE_DIR = ".sopa_cache"
61+
TRANSCRIPT_TEMP_DIR = "transcript_patches"
6162
PATCHES_FILE_IMAGE = "patches_file_image"
6263
PATCHES_DIRS_BAYSOR = "patches_file_baysor"
6364
PATCHES_DIRS_COMSEG = "patches_file_comseg"

sopa/_sdata.py

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,19 @@
11
from __future__ import annotations
22

33
import logging
4-
from typing import Iterator
4+
from pathlib import Path
5+
from typing import Any, Iterator
56

67
import geopandas as gpd
78
import pandas as pd
8-
import xarray as xr
9+
from anndata import AnnData
910
from datatree import DataTree
1011
from spatialdata import SpatialData
1112
from spatialdata.models import SpatialElement
1213
from spatialdata.transformations import Identity, get_transformation, set_transformation
1314
from xarray import DataArray
1415

15-
from ._constants import SopaAttrs, SopaKeys
16+
from ._constants import SopaAttrs, SopaFiles, SopaKeys
1617

1718
log = logging.getLogger(__name__)
1819

@@ -112,14 +113,14 @@ def get_intensities(sdata: SpatialData) -> pd.DataFrame | None:
112113
return adata.to_df()
113114

114115

115-
def iter_scales(image: DataTree) -> Iterator[xr.DataArray]:
116+
def iter_scales(image: DataTree) -> Iterator[DataArray]:
116117
"""Iterates through all the scales of a `DataTree`
117118
118119
Args:
119120
image: a `DataTree`
120121
121122
Yields:
122-
Each scale (as a `xr.DataArray`)
123+
Each scale (as a `DataArray`)
123124
"""
124125
assert isinstance(image, DataTree), f"Multiscale iteration is reserved for type DataTree. Found {type(image)}"
125126

@@ -154,22 +155,42 @@ def get_spatial_element(
154155
if len(element_dict) == 1:
155156
key = next(iter(element_dict.keys()))
156157

157-
assert valid_attr is None or element_dict[key].attrs.get(
158+
assert valid_attr is None or _get_spatialdata_attrs(element_dict[key]).get(
158159
valid_attr, True
159160
), f"Element {key} is not valid for the attribute {valid_attr}."
160161

161162
return _return_element(element_dict, key, return_key, as_spatial_image)
162163

163164
assert valid_attr is not None, "Multiple elements found. Provide an element key."
164165

165-
keys = [key for key, element in element_dict.items() if element.attrs.get(valid_attr)]
166+
keys = [key for key, element in element_dict.items() if _get_spatialdata_attrs(element).get(valid_attr)]
166167

167168
assert len(keys) > 0, f"No element with the attribute {valid_attr}. Provide an element key."
168169
assert len(keys) == 1, f"Multiple valid elements found: {keys}. Provide an element key."
169170

170171
return _return_element(element_dict, keys[0], return_key, as_spatial_image)
171172

172173

174+
def _get_spatialdata_attrs(element: SpatialElement) -> dict[str, Any]:
175+
if isinstance(element, DataTree):
176+
element = next(iter(element["scale0"].values()))
177+
return element.attrs.get("spatialdata_attrs", {})
178+
179+
180+
def _update_spatialdata_attrs(element: SpatialElement, attrs: dict):
181+
if isinstance(element, DataTree):
182+
for image_scale in iter_scales(element):
183+
_update_spatialdata_attrs(image_scale, attrs)
184+
return
185+
186+
old_attrs = element.uns if isinstance(element, AnnData) else element.attrs
187+
188+
if "spatialdata_attrs" not in old_attrs:
189+
old_attrs["spatialdata_attrs"] = {}
190+
191+
old_attrs["spatialdata_attrs"].update(attrs)
192+
193+
173194
def get_spatial_image(
174195
sdata: SpatialData,
175196
key: str | None = None,
@@ -205,3 +226,9 @@ def _return_element(
205226
element = next(iter(element["scale0"].values()))
206227

207228
return (key, element) if return_key else element
229+
230+
231+
def get_cache_dir(sdata: SpatialData) -> Path:
232+
assert sdata.is_backed(), "SpatialData not saved on-disk. Save the object, or provide a cache directory."
233+
234+
return sdata.path / SopaFiles.SOPA_CACHE_DIR

sopa/cli/patchify.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ def _patchify_transcripts(
202202
config or config_path is not None
203203
), "Provide '--config-path', the path to a Baysor config file (toml) or comseg file (jsons)"
204204

205-
df_key = get_spatial_element(sdata.points)
205+
df_key, _ = get_spatial_element(sdata.points, return_key=True)
206206
patches = Patches2D(sdata, df_key, patch_width_microns, patch_overlap_microns)
207207
valid_indices = patches.patchify_transcripts(
208208
temp_dir,

sopa/io/reader/visium_hd.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from spatialdata_io.readers.visium_hd import visium_hd as visium_hd_spatialdata_io
88

99
from ..._constants import SopaAttrs
10+
from ..._sdata import _update_spatialdata_attrs
1011
from ...utils import string_channel_names
1112
from .utils import _default_image_kwargs
1213

@@ -44,14 +45,19 @@ def visium_hd(
4445

4546
string_channel_names(sdata) # Ensure that channel names are strings
4647

48+
### Add Sopa attributes to detect the spatial elements
4749
for key, image in sdata.images.items():
4850
if key.endswith("_full_image"):
49-
image.attrs[SopaAttrs.CELL_SEGMENTATION] = True
51+
_update_spatialdata_attrs(image, {SopaAttrs.CELL_SEGMENTATION: True})
5052
elif key.endswith("_hires_image"):
51-
image.attrs[SopaAttrs.TISSUE_SEGMENTATION] = True
53+
_update_spatialdata_attrs(image, {SopaAttrs.TISSUE_SEGMENTATION: True})
5254

5355
for key, geo_df in sdata.shapes.items():
5456
if key.endswith("_002um"):
55-
geo_df.attrs[SopaAttrs.BINS_AGGREGATION] = True
57+
_update_spatialdata_attrs(geo_df, {SopaAttrs.BINS_AGGREGATION: True})
58+
59+
for key, table in sdata.tables.items():
60+
if key.endswith("_002um"):
61+
_update_spatialdata_attrs(table, {SopaAttrs.BINS_TABLE: True})
5662

5763
return sdata

sopa/io/reader/xenium.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from spatialdata_io.readers.xenium import xenium as xenium_spatialdata_io
88

99
from ..._constants import SopaAttrs
10+
from ..._sdata import _update_spatialdata_attrs
1011
from ...utils import string_channel_names
1112
from .utils import _default_image_kwargs
1213

@@ -56,6 +57,6 @@ def xenium(
5657

5758
for key, image in sdata.images.items():
5859
if key.startswith("morphology"):
59-
image.attrs[SopaAttrs.CELL_SEGMENTATION] = True
60+
_update_spatialdata_attrs(image, {SopaAttrs.CELL_SEGMENTATION: True})
6061

6162
return sdata

sopa/patches/patches.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from .._constants import EPS, ROI, SopaFiles, SopaKeys
2121
from .._sdata import (
2222
get_boundaries,
23+
get_cache_dir,
2324
get_spatial_element,
2425
get_spatial_image,
2526
to_intrinsic,
@@ -435,6 +436,33 @@ def _assign_prior(series: dd.Series, unassigned_value: int | str | None) -> pd.S
435436
if series.dtype == "int":
436437
if unassigned_value is None or unassigned_value == 0:
437438
return series
438-
return series.replace(unassigned_value, 0)
439+
return series.replace(int(unassigned_value), 0)
439440

440441
raise ValueError(f"Invalid dtype {series.dtype} for prior cell ids. Must be int or string.")
442+
443+
444+
def make_image_patches(
445+
sdata: SpatialData, patch_width: int = 2000, patch_overlap: int = 50, image_key: str | None = None
446+
):
447+
image_key, _ = get_spatial_image(sdata, key=image_key, return_key=True)
448+
patches = Patches2D(sdata, image_key, patch_width=patch_width, patch_overlap=patch_overlap)
449+
450+
patches.write()
451+
452+
453+
def make_transcript_patches(
454+
sdata: SpatialData,
455+
config: dict = {},
456+
patch_width: int = 2000,
457+
patch_overlap: int = 50,
458+
points_key: str | None = None,
459+
cache_dir: str | Path | None = None,
460+
) -> list[int]:
461+
points_key, _ = get_spatial_element(sdata, key=points_key, return_key=True)
462+
patches = Patches2D(sdata, points_key, patch_width=patch_width, patch_overlap=patch_overlap)
463+
464+
cache_dir = Path(cache_dir or get_cache_dir(sdata)) / SopaFiles.TRANSCRIPT_TEMP_DIR
465+
466+
valid_indices = patches.patchify_transcripts(cache_dir, config=config)
467+
468+
return valid_indices

sopa/utils/data.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ def uniform(
3737
include_image: bool = True,
3838
apply_blur: bool = True,
3939
as_output: bool = False,
40+
transcript_cell_id_as_merscope: bool = False,
4041
) -> SpatialData:
4142
"""Generate a dummy dataset composed of cells generated uniformly in a square. It also has transcripts.
4243
@@ -149,7 +150,7 @@ def uniform(
149150
sdata = SpatialData(images=images, points=points, shapes=shapes)
150151

151152
_map_transcript_to_cell(sdata, "cell_id", sdata["transcripts"], sdata["cells"])
152-
sdata["transcripts"]["cell_id"] = sdata["transcripts"]["cell_id"].astype(int)
153+
sdata["transcripts"]["cell_id"] = sdata["transcripts"]["cell_id"].astype(int) - int(transcript_cell_id_as_merscope)
153154

154155
if as_output:
155156
_add_table(sdata)

0 commit comments

Comments
 (0)