Skip to content

Commit 5251df5

Browse files
Use symlink to Xenium output morphology/transcripts files to avoid duplicating data
1 parent e2a8473 commit 5251df5

File tree

4 files changed

+26
-2
lines changed

4 files changed

+26
-2
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
### Added
77
- Run `module load baysor` in Snakemake pipeline if the module is available.
8+
- Use symlink to Xenium output morphology/transcripts files to avoid duplicating data
89

910
## [2.0.2] - 2025-02-21
1011

sopa/_constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ class SopaAttrs:
5353
TRANSCRIPTS = "transcripts_dataframe"
5454
BOUNDARIES = "boundaries_shapes"
5555
UID = "sopa_uid"
56+
XENIUM_OUTPUT_PATH = "xenium_output_path"
5657

5758

5859
class SopaFiles:

sopa/io/explorer/converter.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ def write(
150150
if len(sdata.points):
151151
df = get_spatial_element(sdata.points, key=points_key or sdata.attrs.get(SopaAttrs.TRANSCRIPTS))
152152

153-
if _should_save(mode, "t") and df is not None:
153+
if _should_save(mode, "t") and not _use_symlink(path, sdata, "transcripts*") and df is not None:
154154
gene_column = gene_column or get_feature_key(df)
155155
if gene_column is not None:
156156
df = to_intrinsic(sdata, df, image_key)
@@ -159,7 +159,7 @@ def write(
159159
log.warning("The argument 'gene_column' has to be provided to save the transcripts")
160160

161161
### Saving image
162-
if _should_save(mode, "i"):
162+
if _should_save(mode, "i") and not _use_symlink(path, sdata, "morphology*"):
163163
write_image(
164164
path,
165165
sdata[image_key],
@@ -176,6 +176,26 @@ def write(
176176
log.info(f"You can open the experiment with 'open {path / FileNames.METADATA}'")
177177

178178

179+
def _use_symlink(path: Path, sdata: SpatialData, pattern: str) -> bool:
180+
"""Try using the Xenium output files when existing to avoid re-generating large files."""
181+
if SopaAttrs.XENIUM_OUTPUT_PATH not in sdata.attrs:
182+
return False
183+
184+
files = list(Path(sdata.attrs[SopaAttrs.XENIUM_OUTPUT_PATH]).glob(pattern))
185+
for file in files:
186+
target = path / file.name
187+
188+
if target.exists():
189+
if not target.is_symlink(): # avoid removing non-symlink files
190+
return False
191+
target.unlink()
192+
193+
target.symlink_to(file)
194+
log.info(f"Created symlink {target} -> {file}")
195+
196+
return len(files) > 0
197+
198+
179199
def _get_n_obs(sdata: SpatialData, geo_df: gpd.GeoDataFrame, table_key: str) -> int:
180200
if table_key in sdata.tables:
181201
return sdata.tables[table_key].n_obs

sopa/io/reader/xenium.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,4 +92,6 @@ def xenium(
9292
sdata.points["transcripts"]["qv"] < qv_threshold
9393
)
9494

95+
sdata.attrs[SopaAttrs.XENIUM_OUTPUT_PATH] = str(Path(path).resolve())
96+
9597
return sdata

0 commit comments

Comments
 (0)