Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions opencosmo/collection/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from .collection import Collection, ParticleCollection, SimulationCollection
from .io import open_linked, open_multi_dataset_file, read_multi_dataset_file
from .io import open_multi_dataset_file, open_simulation_files, read_multi_dataset_file

__all__ = [
"Collection",
"open_linked",
"open_multi_dataset_file",
"read_multi_dataset_file",
"ParticleCollection",
"SimulationCollection",
"open_simulation_files",
]
24 changes: 15 additions & 9 deletions opencosmo/collection/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,19 @@ class Collection(Protocol):
support higher-level operations that are applied across all datasets in the
collection, sometimes in a non-obvious way.

This protocl defines methods a collection must implement. Note that
the "open" and "read" methods are used in the case an entire collection
This protocol defines methods a collection must implement. Most notably they
must include __getitem__, keys, values and __items__, which allows
a collection to behave like a read-only dictionary.


Note that the "open" and "read" methods are used in the case an entire collection
is located within a single file. Multi-file collections are handled
in the collection.io module. Most complexity is hidden from the user
who simply calls "oc.read" and "oc.open" to get a collection. The io
module also does sanity checking to ensure files are structurally valid,
who simply calls "oc.read" and "oc.open" to get a collection. The io
module also does sanity checking to ensure files are structurally valid,
so we do not have to do it here.
"""

@classmethod
def open(
cls, file: h5py.File, datasets_to_get: Optional[Iterable[str]] = None
Expand All @@ -46,10 +51,11 @@ def read(

def write(self, file: h5py.File): ...

def as_dict(self) -> dict[str, oc.Dataset]: ...

def __getitem__(self, key: str) -> oc.Dataset: ...
def keys(self) -> Iterable[str]: ...
def values(self) -> Iterable[oc.Dataset]: ...
def items(self) -> Iterable[tuple[str, oc.Dataset]]: ...
def __enter__(self): ...

def __exit__(self, *exc_details): ...


Expand All @@ -63,7 +69,7 @@ def write_with_common_header(
# figure out if we have unique headers

header.write(file)
for key, dataset in collection.as_dict().items():
for key, dataset in collection.items():
dataset.write(file, key, with_header=False)


Expand All @@ -74,7 +80,7 @@ def write_with_unique_headers(collection: Collection, file: h5py.File):
"""
# figure out if we have unique headers

for key, dataset in collection.as_dict().items():
for key, dataset in collection.items():
dataset.write(file, key)


Expand Down
61 changes: 24 additions & 37 deletions opencosmo/collection/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,36 @@

import h5py

import opencosmo as oc
from opencosmo import dataset as ds
from opencosmo import io
from opencosmo.collection import Collection, ParticleCollection, SimulationCollection
from opencosmo.collection.link import LinkedCollection, get_links, verify_links
from opencosmo.header import read_header
from opencosmo.link.collection import LinkedCollection


class FileHandle:
"""
Helper class used just for setup
def open_simulation_files(**paths: Path) -> SimulationCollection:
"""
Open multiple files and return a simulation collection. The data
type of every file must be the same.

Parameters
----------
paths : str or Path
The paths to the files to open.

def __init__(self, path: Path):
self.handle = h5py.File(path, "r")
self.header = read_header(self.handle)
Returns
-------
SimulationCollection

"""
datasets: dict[str, oc.Dataset] = {}
for key, path in paths.items():
dataset = oc.open(path)
if not isinstance(dataset, oc.Dataset):
raise ValueError("All datasets must be of the same type.")
dtypes = set(dataset.header.file.data_type for dataset in datasets.values())
if len(dtypes) != 1:
raise ValueError("All datasets must be of the same type.")
return SimulationCollection(dtypes.pop(), datasets)


def open_multi_dataset_file(
Expand All @@ -41,34 +56,6 @@ def read_multi_dataset_file(
return CollectionType.read(file, datasets)


def open_linked(*files: Path):
"""
Open a collection of files that are linked together, such as a
properties file and a particle file.
"""
file_handles = [FileHandle(file) for file in files]
datasets = [io.open(file) for file in files]
property_file_type, linked_files = verify_links(*[fh.header for fh in file_handles])
property_handle = next(
filter(lambda x: x.header.file.data_type == property_file_type, file_handles)
).handle
links = get_links(property_handle)
if not links:
raise ValueError("No valid links found in files")

output_datasets: dict[str, ds.Dataset] = {}
for dataset in datasets:
if isinstance(dataset, ds.Dataset):
output_datasets[dataset.header.file.data_type] = dataset
else:
output_datasets.update(dataset.as_dict())

properties_file = output_datasets.pop(property_file_type)
return LinkedCollection(
properties_file.header, properties_file, output_datasets, links
)


def get_collection_type(file: h5py.File) -> type[Collection]:
"""
Determine the type of a single file containing multiple datasets. Currently
Expand Down
Loading
Loading