Skip to content

Commit b83b84d

Browse files
committed
Partial rever
1 parent 607d426 commit b83b84d

File tree

5 files changed

+25
-72
lines changed

5 files changed

+25
-72
lines changed

src/opencosmo/collection/structure/collection.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@
44
from warnings import warn
55

66
import astropy # type: ignore
7-
import numpy as np
87

98
import opencosmo as oc
109
from opencosmo.collection.structure import io as sio
1110
from opencosmo.collection.structure import visit
1211
from opencosmo.dataset.column import DerivedColumn
12+
from opencosmo.index import DataIndex
1313
from opencosmo.io import io
1414
from opencosmo.io.schemas import StructCollectionSchema
1515
from opencosmo.parameters import HaccSimulationParameters
@@ -166,8 +166,8 @@ def __getitem__(self, key: str) -> oc.Dataset | oc.StructureCollection:
166166
elif key == self.__header.file.data_type:
167167
return self.__source
168168

169-
start, size = self.__links[key].make_indices(self.__index)
170-
return self.__datasets[key].take_rows(start, size)
169+
index = self.__links[key].make_index(self.__index)
170+
return self.__datasets[key].with_index(index)
171171

172172
def __enter__(self):
173173
return self
@@ -609,8 +609,8 @@ def with_new_columns(self, dataset: str, **new_columns: DerivedColumn):
609609
self.__hide_source,
610610
)
611611

612-
def take_rows(self, indices: np.ndarray, size: Optional[np.ndarray] = None):
613-
new_source = self.__source.take_rows(indices, size)
612+
def with_index(self, index: DataIndex):
613+
new_source = self.__source.with_index(index)
614614
return StructureCollection(
615615
new_source, self.__header, self.__datasets, self.__links, self.__hide_source
616616
)
@@ -650,8 +650,8 @@ def objects(
650650
for i, row in enumerate(self.__source.rows()):
651651
index = self.__source.index[i]
652652
output = {
653-
key: self.__datasets[key].take_rows(
654-
*self.__links[key].make_indices(index)
653+
key: self.__datasets[key].with_index(
654+
self.__links[key].make_index(index)
655655
)
656656
for key in data_types
657657
}

src/opencosmo/collection/structure/handler.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
from __future__ import annotations
22

3-
from typing import TYPE_CHECKING, Optional
3+
from typing import TYPE_CHECKING
44

55
import h5py
66
import numpy as np
77

8-
from opencosmo.index import DataIndex
8+
from opencosmo.index import ChunkedIndex, DataIndex, SimpleIndex
99
from opencosmo.io import schemas as ios
1010

1111
if TYPE_CHECKING:
@@ -35,17 +35,22 @@ def has_linked_data(self, index: DataIndex) -> np.ndarray:
3535
rows = index.get_data(self.link)
3636
return rows != -1
3737

38-
def make_indices(self, index: DataIndex) -> tuple[np.ndarray, Optional[np.ndarray]]:
38+
def make_index(self, index: DataIndex) -> DataIndex:
3939
if isinstance(self.link, tuple):
4040
start = index.get_data(self.link[0])
4141
size = index.get_data(self.link[1])
4242
valid_rows = size > 0
43-
starts = start[valid_rows]
44-
sizes = size[valid_rows]
45-
return starts, sizes
43+
start = start[valid_rows]
44+
size = size[valid_rows]
45+
if not start.size:
46+
return SimpleIndex(np.array([], dtype=int))
47+
else:
48+
return ChunkedIndex(start, size)
4649
else:
4750
indices_into_data = index.get_data(self.link)
48-
return indices_into_data[indices_into_data >= 0], None
51+
indices_into_data = indices_into_data[indices_into_data >= 0]
52+
53+
return SimpleIndex(indices_into_data)
4954

5055
def make_schema(self, name: str, index: DataIndex) -> ios.LinkSchema:
5156
if isinstance(self.link, h5py.Dataset):

src/opencosmo/collection/structure/io.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -90,12 +90,12 @@ def get_linked_datasets(
9090

9191
def make_index_with_linked_data(
9292
index: DataIndex, links: dict[str, LinkedDatasetHandler]
93-
) -> np.ndarray:
93+
):
9494
mask = np.ones(len(index), dtype=bool)
9595
for link in links.values():
9696
mask &= link.has_linked_data(index)
9797

98-
return np.where(mask)[0]
98+
return index.mask(mask)
9999

100100

101101
def build_structure_collection(targets: list[io.io.OpenTarget], ignore_empty: bool):
@@ -139,7 +139,7 @@ def build_structure_collection(targets: list[io.io.OpenTarget], ignore_empty: bo
139139
source_dataset = io.io.open_single_dataset(link_sources["galaxy_properties"][0])
140140
if ignore_empty:
141141
new_index = make_index_with_linked_data(source_dataset.index, handlers)
142-
source_dataset = source_dataset.take_rows(new_index)
142+
source_dataset = source_dataset.with_index(new_index)
143143
collection = sc.StructureCollection(
144144
source_dataset,
145145
source_dataset.header,
@@ -161,7 +161,7 @@ def build_structure_collection(targets: list[io.io.OpenTarget], ignore_empty: bo
161161

162162
if ignore_empty:
163163
new_index = make_index_with_linked_data(source_dataset.index, handlers)
164-
source_dataset = source_dataset.take_rows(new_index)
164+
source_dataset = source_dataset.with_index(new_index)
165165

166166
return sc.StructureCollection(
167167
source_dataset,

src/opencosmo/dataset/dataset.py

Lines changed: 2 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from opencosmo.dataset.state import DatasetState
1313
from opencosmo.dataset.visit import visit_dataset
1414
from opencosmo.header import OpenCosmoHeader
15-
from opencosmo.index import ChunkedIndex, DataIndex, SimpleIndex
15+
from opencosmo.index import ChunkedIndex, DataIndex
1616
from opencosmo.io.schemas import DatasetSchema
1717
from opencosmo.parameters import HaccSimulationParameters
1818
from opencosmo.spatial import check
@@ -569,44 +569,7 @@ def take_range(self, start: int, end: int) -> Table:
569569
self.__tree,
570570
)
571571

572-
def take_rows(self, indices: np.ndarray, size: Optional[np.ndarray] = None):
573-
"""
574-
Create a new dataset with the rows specified by the arguments. This method allows for
575-
two different types of input
576-
577-
1. A single array, representing the indices of the rows to take
578-
2. Two arrays. The first representing the start of a set of row chunks, and the second representing
579-
their size
580-
581-
Parameters
582-
----------
583-
indices: np.ndarray[int]
584-
The rows to take, or the beginnings of the row chunks
585-
586-
size: np.ndarray[int], optional
587-
The size of the row chunks
588-
589-
Returns
590-
-------
591-
dataset : opencosmo.Dataset
592-
The new dataset with only the selected rows.
593-
594-
"""
595-
if np.any(indices < 0):
596-
raise ValueError("Row indices cannot be negative!")
597-
598-
size_ = 0 if size is None else size
599-
if np.any(indices + size_) > len(self):
600-
raise ValueError(
601-
"Row indices cannot be larger than the length of this dataset!"
602-
)
603-
604-
index: DataIndex
605-
if size is None:
606-
index = SimpleIndex(indices)
607-
else:
608-
index = ChunkedIndex(indices, size)
609-
572+
def with_index(self, index: DataIndex):
610573
new_state = self.__state.with_index(index)
611574
return Dataset(self.__handler, self.__header, new_state, self.__tree)
612575

test/test_collection.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -291,21 +291,6 @@ def test_data_linking(halo_paths):
291291
assert n_profiles > 0
292292

293293

294-
def test_data_gets_all_particles(halo_paths):
295-
collection = oc.open(*halo_paths)
296-
collection = collection.filter(oc.col("sod_halo_mass") > 10**14).take(
297-
10, at="random"
298-
)
299-
for halo in collection.halos():
300-
for name, particle_species in halo.items():
301-
if "particle" not in name:
302-
continue
303-
halo_tag = halo["halo_properties"]["fof_halo_tag"]
304-
tag_filter = oc.col("fof_halo_tag") == halo_tag
305-
ds = collection[name].filter(tag_filter)
306-
assert len(ds) == len(particle_species)
307-
308-
309294
def test_data_linking_bound(halo_paths):
310295
collection = oc.open(*halo_paths)
311296
p1 = tuple(random.uniform(10, 20) for _ in range(3))

0 commit comments

Comments
 (0)