@@ -56,21 +56,28 @@ def resolve(
56
56
geo_df_new = ShapesModel .parse (geo_df_new , transformations = transformations )
57
57
58
58
log .info ("Aggregating transcripts on merged cells" )
59
- table_conflicts = count_transcripts (sdata , gene_column , geo_df = geo_df_new , points_key = points_key )
59
+ table_conflicts = count_transcripts (
60
+ sdata , gene_column , geo_df = geo_df_new , points_key = points_key
61
+ )
60
62
table_conflicts .obs_names = new_ids
61
63
table_conflicts = [table_conflicts ]
62
64
63
65
valid_ids = set (list (geo_df .index ))
64
66
table = anndata .concat (
65
- [adata [list (valid_ids & set (list (adata .obs_names )))] for adata in adatas ] + table_conflicts ,
67
+ [adata [list (valid_ids & set (list (adata .obs_names )))] for adata in adatas ]
68
+ + table_conflicts ,
66
69
join = "outer" ,
67
70
)
68
71
table .obs .dropna (axis = "columns" , inplace = True )
69
72
70
73
geo_df = geo_df .loc [table .obs_names ]
71
74
72
- table .obsm ["spatial" ] = np .array ([[centroid .x , centroid .y ] for centroid in geo_df .centroid ])
73
- table .obs [SopaKeys .REGION_KEY ] = pd .Series (key_added , index = table .obs_names , dtype = "category" )
75
+ table .obsm ["spatial" ] = np .array (
76
+ [[centroid .x , centroid .y ] for centroid in geo_df .centroid ]
77
+ )
78
+ table .obs [SopaKeys .REGION_KEY ] = pd .Series (
79
+ key_added , index = table .obs_names , dtype = "category"
80
+ )
74
81
table .obs [SopaKeys .INSTANCE_KEY ] = geo_df .index
75
82
76
83
table = TableModel .parse (
@@ -83,7 +90,9 @@ def resolve(
83
90
add_spatial_element (sdata , key_added , geo_df )
84
91
add_spatial_element (sdata , SopaKeys .TABLE , table )
85
92
86
- log .info (f"Added sdata.tables['{ SopaKeys .TABLE } '], and { len (geo_df )} cell boundaries to sdata['{ key_added } ']" )
93
+ log .info (
94
+ f"Added sdata.tables['{ SopaKeys .TABLE } '], and { len (geo_df )} cell boundaries to sdata['{ key_added } ']"
95
+ )
87
96
88
97
89
98
def _read_one_segmented_patch (
@@ -94,13 +103,18 @@ def _read_one_segmented_patch(
94
103
95
104
loom_file = directory / "segmentation_counts.loom"
96
105
if loom_file .exists ():
97
- adata = anndata .io .read_loom (directory / "segmentation_counts.loom" , obs_names = "Name" , var_names = "Name" )
106
+ adata = anndata .io .read_loom (
107
+ directory / "segmentation_counts.loom" , obs_names = "Name" , var_names = "Name"
108
+ )
98
109
else :
99
110
adata = anndata .io .read_h5ad (directory / "segmentation_counts.h5ad" )
100
111
101
112
adata .obs .rename (columns = {"area" : SopaKeys .ORIGINAL_AREA_OBS }, inplace = True )
102
113
103
- cells_ids = pd .Series (adata .obs_names if id_as_string else adata .obs ["CellID" ].astype (int ), index = adata .obs_names )
114
+ cells_ids = pd .Series (
115
+ adata .obs_names if id_as_string else adata .obs ["CellID" ].astype (int ),
116
+ index = adata .obs_names ,
117
+ )
104
118
del adata .obs ["CellID" ]
105
119
106
120
with open (polygon_file ) as f :
@@ -114,12 +128,16 @@ def _keep_cell(ID: str | int):
114
128
115
129
cells_ids = cells_ids [cells_ids .map (_keep_cell )]
116
130
117
- geo_df = gpd .GeoDataFrame (index = cells_ids .index , geometry = [shape (polygons_dict [ID ]) for ID in cells_ids ])
131
+ geo_df = gpd .GeoDataFrame (
132
+ index = cells_ids .index , geometry = [shape (polygons_dict [ID ]) for ID in cells_ids ]
133
+ )
118
134
geo_df = shapes .to_valid_polygons (geo_df )
119
135
120
136
ratio_filtered = (geo_df .area <= min_area ).mean ()
121
137
if ratio_filtered > 0.2 :
122
- log .warning (f"{ ratio_filtered :.2%} of cells will be filtered due to { min_area = } " )
138
+ log .warning (
139
+ f"{ ratio_filtered :.2%} of cells will be filtered due to { min_area = } "
140
+ )
123
141
124
142
geo_df = geo_df [geo_df .area > min_area ]
125
143
@@ -131,7 +149,9 @@ def _find_polygon_file(directory: Path) -> tuple[bool, Path]:
131
149
if old_baysor_path .exists ():
132
150
return False , old_baysor_path
133
151
new_baysor_path = directory / "segmentation_polygons_2d.json"
134
- assert new_baysor_path .exists (), f"Could not find the segmentation polygons file in { directory } "
152
+ assert new_baysor_path .exists (), (
153
+ f"Could not find the segmentation polygons file in { directory } "
154
+ )
135
155
return True , new_baysor_path
136
156
137
157
@@ -163,25 +183,34 @@ def _resolve_patches(
163
183
"""
164
184
patch_ids = [adata .obs_names for adata in adatas ]
165
185
166
- patch_indices = np .arange (len (patches_cells )).repeat ([len (cells ) for cells in patches_cells ])
186
+ patch_indices = np .arange (len (patches_cells )).repeat (
187
+ [len (cells ) for cells in patches_cells ]
188
+ )
167
189
cells = [cell for cells in patches_cells for cell in cells ]
168
190
segmentation_ids = np .array ([cell_id for ids in patch_ids for cell_id in ids ])
169
191
170
- cells_resolved , cells_indices = solve_conflicts (cells , patch_indices = patch_indices , return_indices = True )
192
+ cells_resolved , cells_indices = solve_conflicts (
193
+ cells , patch_indices = patch_indices , return_indices = True
194
+ )
171
195
172
196
existing_ids = segmentation_ids [cells_indices [cells_indices >= 0 ]]
173
- new_ids = np .char .add ("merged_cell_" , np .arange ((cells_indices == - 1 ).sum ()).astype (str ))
197
+ new_ids = np .char .add (
198
+ "merged_cell_" , np .arange ((cells_indices == - 1 ).sum ()).astype (str )
199
+ )
174
200
cells_resolved .index = np .concatenate ([existing_ids , new_ids ])
175
201
176
202
return cells_resolved , cells_indices , new_ids
177
203
178
204
179
205
def _check_transcript_patches (sdata : SpatialData , with_prior : bool = False ):
180
- assert (
181
- SopaKeys . TRANSCRIPTS_PATCHES in sdata . shapes
182
- ), "Transcript patches not found in the SpatialData object. Run `sopa.make_transcript_patches(...)` first."
206
+ assert SopaKeys . TRANSCRIPTS_PATCHES in sdata . shapes , (
207
+ "Transcript patches not found in the SpatialData object. Run `sopa.make_transcript_patches(...)` first."
208
+ )
183
209
184
- directories = [Path (path ) for path in sdata [SopaKeys .TRANSCRIPTS_PATCHES ][SopaKeys .CACHE_PATH_KEY ]]
210
+ directories = [
211
+ Path (path )
212
+ for path in sdata [SopaKeys .TRANSCRIPTS_PATCHES ][SopaKeys .CACHE_PATH_KEY ]
213
+ ]
185
214
186
215
assert all (directory .exists () for directory in directories ), (
187
216
"Some patch directories are missing. "
@@ -191,7 +220,9 @@ def _check_transcript_patches(sdata: SpatialData, with_prior: bool = False):
191
220
)
192
221
193
222
if with_prior :
194
- assert SopaKeys .PRIOR_SHAPES_KEY in sdata [SopaKeys .TRANSCRIPTS_PATCHES ].columns , (
223
+ assert (
224
+ SopaKeys .PRIOR_SHAPES_KEY in sdata [SopaKeys .TRANSCRIPTS_PATCHES ].columns
225
+ ), (
195
226
"You need to create the transcript patches with a `prior_shapes_key`. "
196
227
"For that, you can run cellpose first, and then run again `sopa.make_transcript_patches` with `prior_shapes_key='cellpose_boundaries'`"
197
228
)
0 commit comments