From 6af453ac8fafd2be280b75ef4de676477b58a90d Mon Sep 17 00:00:00 2001 From: Patrick Wells Date: Fri, 18 Apr 2025 12:18:46 -0500 Subject: [PATCH 1/7] Exponse simulation parameters --- opencosmo/dataset/dataset.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/opencosmo/dataset/dataset.py b/opencosmo/dataset/dataset.py index ca759cb6..3d268747 100644 --- a/opencosmo/dataset/dataset.py +++ b/opencosmo/dataset/dataset.py @@ -14,6 +14,7 @@ from opencosmo.dataset.mask import Mask, apply_masks from opencosmo.handler import OpenCosmoDataHandler from opencosmo.header import OpenCosmoHeader, write_header +from opencosmo.parameters import SimulationParameters class Dataset: @@ -71,6 +72,19 @@ def cosmology(self) -> Cosmology: """ return self.__header.cosmology + @property + def simulation(self) -> SimulationParameters: + """ + Get the parameters of the simulation this dataset is drawn + from. + + Returns + ------- + parameters: opencosmo.parameters.SimulationParameters + """ + return self.__header.simulation + + @property def data(self) -> Table | Column: """ From eb7ec643fee816123dc059f62f38a232d07d178d Mon Sep 17 00:00:00 2001 From: Patrick Wells Date: Fri, 18 Apr 2025 14:47:29 -0500 Subject: [PATCH 2/7] Regularize attributes --- opencosmo/collection/collection.py | 41 ++++++++++++++++++++++ opencosmo/dataset/dataset.py | 29 +++++++++++++--- opencosmo/link/collection.py | 56 +++++++++++++++++++++++------- opencosmo/link/io.py | 11 +++--- test/test_collection.py | 6 ++++ test/test_units.py | 2 +- 6 files changed, 122 insertions(+), 23 deletions(-) diff --git a/opencosmo/collection/collection.py b/opencosmo/collection/collection.py index c7944aba..887f568e 100644 --- a/opencosmo/collection/collection.py +++ b/opencosmo/collection/collection.py @@ -12,12 +12,14 @@ import h5py +from astropy.cosmology import Cosmology import opencosmo as oc from opencosmo.dataset.index import ChunkedIndex from opencosmo.dataset.mask import Mask from opencosmo.handler import InMemoryHandler, OpenCosmoDataHandler, OutOfMemoryHandler from opencosmo.header import OpenCosmoHeader, read_header from opencosmo.link import StructureCollection +from opencosmo.parameters import SimulationParameters from opencosmo.transformations import units as u @@ -135,6 +137,7 @@ def __repr__(self): f"SimulationCollection({n_collections} collections, {n_datasets} datasets)" ) + @classmethod def open( cls, file: h5py.File, datasets_to_get: Optional[Iterable[str]] = None @@ -172,6 +175,44 @@ def __map(self, method, *args, **kwargs): output = {k: getattr(v, method)(*args, **kwargs) for k, v in self.items()} return SimulationCollection(output) + def __map_attribute(self, attribute): + return {k: getattr(v, attribute) for k, v in self.items()} + + @property + def cosmology(self) -> dict[str, Cosmology]: + """ + Get the cosmologies of the simulations in the collection + + Returns: + -------- + cosmologies: dict[str, astropy.cosmology.Cosmology] + """ + return self.__map_attribute("cosmology") + + @property + def redshift(self) -> dict[str, float]: + """ + Get the redshift slices for the simulations in the collection + + Returns: + -------- + redshifts: dict[str, float] + """ + return self.__map_attribute("redshift") + + @property + def simulation(self) -> dict[str, SimulationParameters]: + """ + Get the simulation parameters for the simulations in the collection + + Returns: + -------- + simulation_parameters: dict[str, opencosmo.parameters.SimulationParameters] + """ + + return self.__map_attribute("simulation") + + def filter(self, *masks: Mask, **kwargs) -> SimulationCollection: """ Filter the datasets in the collection. This method behaves diff --git a/opencosmo/dataset/dataset.py b/opencosmo/dataset/dataset.py index 3d268747..12ca56bc 100644 --- a/opencosmo/dataset/dataset.py +++ b/opencosmo/dataset/dataset.py @@ -72,6 +72,29 @@ def cosmology(self) -> Cosmology: """ return self.__header.cosmology + @property + def dtype(self) -> str: + """ + Get the data type of this dataset + + Returns + ------- + dtype: str + """ + return self.__header.file.data_type + + @property + def redshift(self) -> float: + """ + Get the redshift slice this dataset was drawn from + + Returns: + -------- + redshift: gloat + + """ + return self.__header.file.redshift + @property def simulation(self) -> SimulationParameters: """ @@ -103,10 +126,6 @@ def data(self) -> Table | Column: builders=self.__builders, index=self.__index ) - @property - def header(self) -> OpenCosmoHeader: - return self.__header - @property def index(self) -> DataIndex: return self.__index @@ -346,7 +365,7 @@ def with_units(self, convention: str) -> Dataset: convention, self.__base_unit_transformations, self.__header.cosmology, - self.__header.file.redshift, + self.redshift ) new_builders = get_column_builders(new_transformations, self.__builders.keys()) diff --git a/opencosmo/link/collection.py b/opencosmo/link/collection.py index fefb8ed3..04a98df2 100644 --- a/opencosmo/link/collection.py +++ b/opencosmo/link/collection.py @@ -7,15 +7,16 @@ import opencosmo as oc from opencosmo import link as l +from opencosmo.parameters import SimulationParameters def filter_properties_by_dataset( - dataset: oc.Dataset, properties: oc.Dataset, *masks + dataset: oc.Dataset, properties: oc.Dataset, header: oc.header.OpenCosmoHeader, *masks ) -> oc.Dataset: masked_dataset = dataset.filter(*masks) - if properties.header.file.data_type == "halo_properties": + if header.file.data_type == "halo_properties": linked_column = "fof_halo_tag" - elif properties.header.file.data_type == "galaxy_properties": + elif header.file.data_type == "galaxy_properties": linked_column = "gal_tag" tags = masked_dataset.select(linked_column).data @@ -37,6 +38,7 @@ class StructureCollection: def __init__( self, properties: oc.Dataset, + header: oc.header.OpenCosmoHeader, handlers: dict[str, l.LinkHandler], filters: Optional[dict[str, Any]] = {}, *args, @@ -47,12 +49,13 @@ def __init__( """ self.__properties = properties + self.__header = header self.__handlers = handlers self.__index = self.__properties.index self.__filters = filters def __repr__(self): - structure_type = self.__properties.header.file.data_type.split("_")[0] + "s" + structure_type = self.header.file.data_type.split("_")[0] + "s" dtype_str = ", ".join(self.__handlers.keys()) return f"Collection of {structure_type} with linked datasets {dtype_str}" @@ -73,6 +76,30 @@ def cosmology(self) -> astropy.cosmology.Cosmology: """ return self.__properties.cosmology + @property + def redshift(self) -> float: + """ + Get the redshift slice this dataset was drawn from + + Returns: + -------- + redshift: gloat + + """ + return self.__header.file.redshift + + @property + def simulation(self) -> SimulationParameters: + """ + Get the parameters of the simulation this dataset is drawn + from. + + Returns + ------- + parameters: opencosmo.parameters.SimulationParameters + """ + return self.__header.simulation + @property def properties(self) -> oc.Dataset: """ @@ -85,7 +112,7 @@ def keys(self) -> list[str]: """ Return the keys of the linked datasets. """ - return list(self.__handlers.keys()) + [self.__properties.header.file.data_type] + return list(self.__handlers.keys()) + [self.__header.file.data_type] def values(self) -> list[oc.Dataset]: """ @@ -107,7 +134,7 @@ def __getitem__(self, key: str) -> oc.Dataset: """ Return the linked dataset with the given key. """ - if key == self.__properties.header.file.data_type: + if key == self.__header.file.data_type: return self.__properties elif key not in self.__handlers: raise KeyError(f"Dataset {key} not found in collection.") @@ -168,10 +195,11 @@ def filter(self, *masks, on_galaxies: bool = False) -> StructureCollection: raise ValueError("Dataset galaxy_properties not found in collection.") else: filtered = filter_properties_by_dataset( - self["galaxy_properties"], self.__properties, *masks + self["galaxy_properties"], self.__properties, self.__header, *masks ) return StructureCollection( filtered, + self.__header, self.__handlers, ) @@ -201,10 +229,11 @@ def select( ValueError If the specified dataset is not found in the collection. """ - if dataset is None or dataset == self.__properties.header.file.data_type: + if dataset is None or dataset == self.__header.file.data_type: new_properties = self.__properties.select(columns) return StructureCollection( new_properties, + self.__header, self.__handlers, ) @@ -213,7 +242,7 @@ def select( handler = self.__handlers[dataset] new_handler = handler.select(columns) return StructureCollection( - self.__properties, {**self.__handlers, dataset: new_handler} + self.__properties, self.__header, {**self.__handlers, dataset: new_handler} ) def with_units(self, convention: str): @@ -239,6 +268,7 @@ def with_units(self, convention: str): } return StructureCollection( new_properties, + self.__header, new_handlers, ) @@ -263,6 +293,7 @@ def take(self, n: int, at: str = "random"): new_properties = self.__properties.take(n, at) return StructureCollection( new_properties, + self.__header, self.__handlers, ) @@ -305,10 +336,9 @@ def objects( yield row, output def write(self, file: File | Group): - header = self.__properties.header - header.write(file) - self.__properties.write(file, header.file.data_type) - link_group = file[header.file.data_type].create_group("data_linked") + self.__header.write(file) + self.__properties.write(file, self.__header.file.data_type) + link_group = file[self.__header.file.data_type].create_group("data_linked") keys = list(self.__handlers.keys()) keys.sort() for key in keys: diff --git a/opencosmo/link/io.py b/opencosmo/link/io.py index 64187966..ac091d0d 100644 --- a/opencosmo/link/io.py +++ b/opencosmo/link/io.py @@ -113,7 +113,7 @@ def open_linked_files(*files: Path): if len(linked_files_by_type) != len(linked_files): raise ValueError("Linked files must have unique data types") return get_linked_datasets( - properties_dataset, linked_files_by_type, properties_file + properties_dataset, linked_files_by_type, properties_file, headers[properties_index] ) @@ -126,6 +126,8 @@ def open_linked_file( properties_name = list( filter(lambda name: "properties" in name, file_handle.keys()) ) + + header = read_header(file_handle) if len(properties_name) == 2: if ( "galaxy_properties" in properties_name @@ -154,7 +156,7 @@ def open_linked_file( raise ValueError("Properties dataset must be a single dataset") return get_linked_datasets( - properties_dataset, linked_groups_by_type, file_handle[properties_name] + properties_dataset, linked_groups_by_type, file_handle[properties_name], header ) @@ -162,6 +164,7 @@ def get_linked_datasets( properties_dataset: d.Dataset, linked_files_by_type: dict[str, File | Group], properties_file: File, + header: OpenCosmoHeader ) -> l.StructureCollection: datasets = {} for dtype, pointer in linked_files_by_type.items(): @@ -171,7 +174,7 @@ def get_linked_datasets( datasets.update({dtype: pointer}) link_handlers = get_link_handlers( - properties_file, datasets, properties_dataset.header + properties_file, datasets, header ) output = {} for key, handler in link_handlers.items(): @@ -180,7 +183,7 @@ def get_linked_datasets( else: output[key] = handler - return l.StructureCollection(properties_dataset, output) + return l.StructureCollection(properties_dataset, header, output) def get_link_handlers( diff --git a/test/test_collection.py b/test/test_collection.py index b4bb1bd4..5dc705fe 100644 --- a/test/test_collection.py +++ b/test/test_collection.py @@ -172,6 +172,12 @@ def test_link_write(halo_paths, tmp_path): assert n == 10 +def test_simulation_collection_broadcast_attribute(multi_path): + collection = oc.read(multi_path) + for key, value in collection.redshift.items(): + assert isinstance(key, str) + assert isinstance(value, float) + def test_collection_of_linked(galaxy_paths, galaxy_paths_2, tmp_path): galaxies_1 = open_linked_files(*galaxy_paths) diff --git a/test/test_units.py b/test/test_units.py index f6526f6f..8865be2e 100644 --- a/test/test_units.py +++ b/test/test_units.py @@ -72,7 +72,7 @@ def test_physcal_units(haloproperties_step_path, input_path): data_physical = ds_physical.data data = ds.data cols = data.columns - z = ds.header.file.redshift + z = ds.redshift position_cols = filter(lambda col: col.split("_")[-1] in ["x", "y", "z"], cols) position_cols = filter(lambda col: "angmom" not in col, position_cols) From 737b770499bedfeb2ce72d548f9253c5fe138de8 Mon Sep 17 00:00:00 2001 From: Patrick Wells Date: Fri, 18 Apr 2025 15:35:22 -0500 Subject: [PATCH 3/7] Updates to documentation --- docs/source/collection_ref.rst | 1 + docs/source/conf.py | 2 +- docs/source/index.rst | 1 + docs/source/parameters_ref.rst | 24 ++++++++++ opencosmo/collection/collection.py | 36 +++++++++----- opencosmo/dataset/dataset.py | 19 ++------ opencosmo/link/collection.py | 3 ++ opencosmo/parameters/simulation.py | 22 ++++++--- poetry.lock | 75 +++++++++++++++++++++++++++--- pyproject.toml | 1 + test/test_write.py | 29 +++++++----- 11 files changed, 160 insertions(+), 53 deletions(-) create mode 100644 docs/source/parameters_ref.rst diff --git a/docs/source/collection_ref.rst b/docs/source/collection_ref.rst index d3d534c6..5a872a6b 100644 --- a/docs/source/collection_ref.rst +++ b/docs/source/collection_ref.rst @@ -8,6 +8,7 @@ Collections within a single file can always be loaded with :py:func:`opencosmo.o :members: :exclude-members: open,read,close,write :undoc-members: + :member-order: bysource .. autoclass:: opencosmo.StructureCollection diff --git a/docs/source/conf.py b/docs/source/conf.py index b2b81cd1..0502403e 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -14,7 +14,7 @@ # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration -extensions = ["sphinx.ext.autodoc", "sphinx.ext.napoleon", "sphinx_rtd_theme"] +extensions = ["sphinx.ext.autodoc", "sphinx.ext.napoleon", "sphinx_rtd_theme", "sphinxcontrib.autodoc_pydantic"] templates_path = ["_templates"] exclude_patterns = [] diff --git a/docs/source/index.rst b/docs/source/index.rst index d7f332c5..d5779a86 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -39,5 +39,6 @@ The OpenCosmo Python Toolkit provides utilities for reading, writing and manipul io_ref dataset_ref collection_ref + parameters_ref diff --git a/docs/source/parameters_ref.rst b/docs/source/parameters_ref.rst new file mode 100644 index 00000000..d437eb94 --- /dev/null +++ b/docs/source/parameters_ref.rst @@ -0,0 +1,24 @@ +Simulation Parameters +===================== + +You can access the parameters of the simulation any dataset is drawn from with :py:attr:`opencosmo.Dataset.simulation`. All datasets regardless of simulation will have the parameters in :py:class:`opencosmo.parameters.SimulationParameters`. Hydrodynamic simulations will additionally contain the parameters in :py:class:`opencosmo.parameters.SubgridParameters` + + +.. autoclass:: opencosmo.parameters.SimulationParameters + :members: + :undoc-members: + :exclude-members: model_config,empty_string_to_none,cosmology_parameters + :member-order: bysource + + +.. autoclass:: opencosmo.parameters.HydroSimulationParameters + :members: + :undoc-members: + :exclude-members: model_config + :member-order: bysource + +.. autoclass:: opencosmo.parameters.SubgridParameters + :members: + :undoc-members: + :exclude-members: model_config + :member-order: bysource diff --git a/opencosmo/collection/collection.py b/opencosmo/collection/collection.py index 887f568e..8d50d4e7 100644 --- a/opencosmo/collection/collection.py +++ b/opencosmo/collection/collection.py @@ -183,7 +183,7 @@ def cosmology(self) -> dict[str, Cosmology]: """ Get the cosmologies of the simulations in the collection - Returns: + Returns -------- cosmologies: dict[str, astropy.cosmology.Cosmology] """ @@ -194,7 +194,7 @@ def redshift(self) -> dict[str, float]: """ Get the redshift slices for the simulations in the collection - Returns: + Returns -------- redshifts: dict[str, float] """ @@ -205,7 +205,7 @@ def simulation(self) -> dict[str, SimulationParameters]: """ Get the simulation parameters for the simulations in the collection - Returns: + Returns -------- simulation_parameters: dict[str, opencosmo.parameters.SimulationParameters] """ @@ -254,6 +254,27 @@ def select(self, *args, **kwargs) -> SimulationCollection: """ return self.__map("select", *args, **kwargs) + def take(self, n: int, at: str = "random") -> SimulationCollection: + """ + Take a subest of rows from all datasets or collections in this collection. + This method will delegate to the underlying method in + :class:`opencosmo.Dataset`, or :class:`opencosmo.StructureCollection` depending + on the context. As such, behavior may vary depending on what this collection + contains. See their documentation for more info. + + Parameters + ---------- + n: int + The number of rows to take + at: str, default = "random" + The method to use to take rows. Must be one of "start", "end", "random". + + """ + if any(len(ds) < n for ds in self.values()): + raise ValueError(f"Not all datasets in this collection have at least {n} rows!") + return self.__map("take", n, at) + + def with_units(self, convention: str) -> SimulationCollection: """ Transform all datasets or collections to use the given unit convention. This @@ -268,15 +289,6 @@ def with_units(self, convention: str) -> SimulationCollection: """ return self.__map("with_units", convention) - def take(self, *args, **kwargs) -> SimulationCollection: - """ - Take a subest of rows from all datasets or collections in this collection. - This method will delegate to the underlying method in - :class:`opencosmo.Dataset`, or :class:`opencosmo.Collection` depending on the - context. As such, behaviormay vary depending on what this collection contains. - """ - - return self.__map("take", *args, **kwargs) def open_single_dataset( diff --git a/opencosmo/dataset/dataset.py b/opencosmo/dataset/dataset.py index 12ca56bc..543b385b 100644 --- a/opencosmo/dataset/dataset.py +++ b/opencosmo/dataset/dataset.py @@ -73,24 +73,13 @@ def cosmology(self) -> Cosmology: return self.__header.cosmology @property - def dtype(self) -> str: + def redshift(self) -> float: """ - Get the data type of this dataset + The redshift slice this dataset was drawn from Returns ------- - dtype: str - """ - return self.__header.file.data_type - - @property - def redshift(self) -> float: - """ - Get the redshift slice this dataset was drawn from - - Returns: - -------- - redshift: gloat + redshift: float """ return self.__header.file.redshift @@ -98,7 +87,7 @@ def redshift(self) -> float: @property def simulation(self) -> SimulationParameters: """ - Get the parameters of the simulation this dataset is drawn + The parameters of the simulation this dataset is drawn from. Returns diff --git a/opencosmo/link/collection.py b/opencosmo/link/collection.py index 04a98df2..512f67e5 100644 --- a/opencosmo/link/collection.py +++ b/opencosmo/link/collection.py @@ -59,6 +59,9 @@ def __repr__(self): dtype_str = ", ".join(self.__handlers.keys()) return f"Collection of {structure_type} with linked datasets {dtype_str}" + def __len__(self): + return len(self.__properties) + @classmethod def open( cls, file: File, datasets_to_get: Optional[Iterable[str]] = None diff --git a/opencosmo/parameters/simulation.py b/opencosmo/parameters/simulation.py index f79a963b..95ab654d 100644 --- a/opencosmo/parameters/simulation.py +++ b/opencosmo/parameters/simulation.py @@ -80,13 +80,16 @@ def empty_string_to_none(v): class SimulationParameters(BaseModel): box_size: float = Field(ge=0, description="Size of the simulation box (Mpc/h)") - z_ini: float = Field(ge=0.01, description="Initial redshift") - z_end: float = Field(ge=0.0, description="Final redshift") + z_ini: float = Field(ge=0.01, description="Initial redshift of the simulation") + z_end: float = Field(ge=0.0, description="Final redshift of the simulation") n_gravity: Optional[int] = Field( - ge=2, description="Number of gravity-only particles (per dimension)" + ge=2, + description= + "Number of gravity-only particles (per dimension). " + "In hydrodynamic simulations, this parameter will be replaced with \"n_dm\"" ) n_steps: int = Field(ge=1, description="Number of time steps") - pm_grid: int = Field(ge=2, description="Grid resolution (per dimension)") + pm_grid: int = Field(ge=2, description="Number of grid points (per dimension)") offset_gravity_ini: Optional[float] = Field( description="Lagrangian offset for gravity-only particles" ) @@ -104,6 +107,11 @@ def empty_string_to_none(cls, data): @cached_property def step_zs(self) -> list[float]: + """ + Get the redshift of the steps in this simulation. Outputs such that + redshift[step_number] returns the redshift for that step. Keep in + mind that steps go from high z -> low z. + """ a_ini = 1 / (1 + self.z_ini) a_end = 1 / (1 + self.z_end) # Steps are evenly spaced in log(a) @@ -123,9 +131,9 @@ class SubgridParameters(BaseModel): agn_kinetic_eps: float = Field(description="AGN feedback efficiency") agn_kinetic_jet_vel: float = Field(description="AGN feedback velocity") agn_nperh: float = Field(description="AGN sphere of influence") - agn_seed_mass: float = Field(description="AGN seed mass") + agn_seed_mass: float = Field(description="AGN seed mass (Msun / h)") wind_egy_w: float = Field(description="Wind mass loading factor") - wind_kappa_w: float = Field(description="Wind belovity") + wind_kappa_w: float = Field(description="Wind velocity") class HydroSimulationParameters(SimulationParameters): @@ -142,6 +150,6 @@ class HydroSimulationParameters(SimulationParameters): description="Lagrangian offset for dark matter particles" ) subgrid_parameters: SubgridParameters = Field( - description="Parameters for subgrid physics", + description="Parameters for subgrid hydrodynamic physics", exclude=True, ) diff --git a/poetry.lock b/poetry.lock index 49054f61..b110c459 100644 --- a/poetry.lock +++ b/poetry.lock @@ -18,7 +18,7 @@ version = "0.7.0" description = "Reusable constraint types to use with typing.Annotated" optional = false python-versions = ">=3.8" -groups = ["main"] +groups = ["main", "docs"] files = [ {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"}, {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"}, @@ -91,6 +91,30 @@ files = [ docs = ["pytest"] test = ["hypothesis", "pytest", "pytest-remotedata"] +[[package]] +name = "autodoc-pydantic" +version = "2.2.0" +description = "Seamlessly integrate pydantic models in your Sphinx documentation." +optional = false +python-versions = "<4.0.0,>=3.8.1" +groups = ["docs"] +files = [ + {file = "autodoc_pydantic-2.2.0-py3-none-any.whl", hash = "sha256:8c6a36fbf6ed2700ea9c6d21ea76ad541b621fbdf16b5a80ee04673548af4d95"}, +] + +[package.dependencies] +pydantic = ">=2.0,<3.0.0" +pydantic-settings = ">=2.0,<3.0.0" +Sphinx = ">=4.0" + +[package.extras] +docs = ["myst-parser (>=3.0.0,<4.0.0)", "sphinx-copybutton (>=0.5.0,<0.6.0)", "sphinx-rtd-theme (>=2.0.0,<3.0.0)", "sphinx-tabs (>=3,<4)", "sphinxcontrib-mermaid (>=0.9.0,<0.10.0)"] +erdantic = ["erdantic (<2.0)"] +linting = ["ruff (>=0.4.0,<0.5.0)"] +security = ["pip-audit (>=2.7.2,<3.0.0)"] +test = ["coverage (>=7,<8)", "defusedxml (>=0.7.1)", "pytest (>=8.0.0,<9.0.0)", "pytest-sugar (>=1.0.0,<2.0.0)"] +type-checking = ["mypy (>=1.9,<2.0)", "types-docutils (>=0.20,<0.21)", "typing-extensions (>=4.11,<5.0)"] + [[package]] name = "babel" version = "2.17.0" @@ -638,7 +662,7 @@ version = "2.11.1" description = "Data validation using Python type hints" optional = false python-versions = ">=3.9" -groups = ["main"] +groups = ["main", "docs"] files = [ {file = "pydantic-2.11.1-py3-none-any.whl", hash = "sha256:5b6c415eee9f8123a14d859be0c84363fec6b1feb6b688d6435801230b56e0b8"}, {file = "pydantic-2.11.1.tar.gz", hash = "sha256:442557d2910e75c991c39f4b4ab18963d57b9b55122c8b2a9cd176d8c29ce968"}, @@ -660,7 +684,7 @@ version = "2.33.0" description = "Core functionality for Pydantic validation and serialization" optional = false python-versions = ">=3.9" -groups = ["main"] +groups = ["main", "docs"] files = [ {file = "pydantic_core-2.33.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:71dffba8fe9ddff628c68f3abd845e91b028361d43c5f8e7b3f8b91d7d85413e"}, {file = "pydantic_core-2.33.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:abaeec1be6ed535a5d7ffc2e6c390083c425832b20efd621562fbb5bff6dc518"}, @@ -766,6 +790,30 @@ files = [ [package.dependencies] typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" +[[package]] +name = "pydantic-settings" +version = "2.9.1" +description = "Settings management using Pydantic" +optional = false +python-versions = ">=3.9" +groups = ["docs"] +files = [ + {file = "pydantic_settings-2.9.1-py3-none-any.whl", hash = "sha256:59b4f431b1defb26fe620c71a7d3968a710d719f5f4cdbbdb7926edeb770f6ef"}, + {file = "pydantic_settings-2.9.1.tar.gz", hash = "sha256:c509bf79d27563add44e8446233359004ed85066cd096d8b510f715e6ef5d268"}, +] + +[package.dependencies] +pydantic = ">=2.7.0" +python-dotenv = ">=0.21.0" +typing-inspection = ">=0.4.0" + +[package.extras] +aws-secrets-manager = ["boto3 (>=1.35.0)", "boto3-stubs[secretsmanager]"] +azure-key-vault = ["azure-identity (>=1.16.0)", "azure-keyvault-secrets (>=4.8.0)"] +gcp-secret-manager = ["google-cloud-secret-manager (>=2.23.1)"] +toml = ["tomli (>=2.0.1)"] +yaml = ["pyyaml (>=6.0.1)"] + [[package]] name = "pyerfa" version = "2.0.1.5" @@ -830,6 +878,21 @@ pluggy = ">=1.5,<2" [package.extras] dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] +[[package]] +name = "python-dotenv" +version = "1.1.0" +description = "Read key-value pairs from a .env file and set them as environment variables" +optional = false +python-versions = ">=3.9" +groups = ["docs"] +files = [ + {file = "python_dotenv-1.1.0-py3-none-any.whl", hash = "sha256:d7c01d9e2293916c18baf562d95698754b0dbbb5e74d457c45d4f6561fb9d55d"}, + {file = "python_dotenv-1.1.0.tar.gz", hash = "sha256:41f90bc6f5f177fb41f53e87666db362025010eb28f60a01c9143bfa33a2b2d5"}, +] + +[package.extras] +cli = ["click (>=5.0)"] + [[package]] name = "pyyaml" version = "6.0.2" @@ -1148,7 +1211,7 @@ version = "4.13.0" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" -groups = ["main", "develop"] +groups = ["main", "develop", "docs"] files = [ {file = "typing_extensions-4.13.0-py3-none-any.whl", hash = "sha256:c8dd92cc0d6425a97c18fbb9d1954e5ff92c1ca881a309c45f06ebc0b79058e5"}, {file = "typing_extensions-4.13.0.tar.gz", hash = "sha256:0a4ac55a5820789d87e297727d229866c9650f6521b64206413c4fbada24d95b"}, @@ -1160,7 +1223,7 @@ version = "0.4.0" description = "Runtime typing introspection tools" optional = false python-versions = ">=3.9" -groups = ["main"] +groups = ["main", "docs"] files = [ {file = "typing_inspection-0.4.0-py3-none-any.whl", hash = "sha256:50e72559fcd2a6367a19f7a7e610e6afcb9fac940c650290eed893d61386832f"}, {file = "typing_inspection-0.4.0.tar.gz", hash = "sha256:9765c87de36671694a67904bf2c96e395be9c6439bb6c87b5142569dcdd65122"}, @@ -1190,4 +1253,4 @@ zstd = ["zstandard (>=0.18.0)"] [metadata] lock-version = "2.1" python-versions = "^3.11" -content-hash = "f548ab310b6f63a345594b8138463cd12a7a85a578c383f81de6054e942e3816" +content-hash = "27a7afc60ffc75806871faf1289de6c9803295208528896f97e74a346275e393" diff --git a/pyproject.toml b/pyproject.toml index d3111e60..bbd73e41 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,6 +31,7 @@ optional = true [tool.poetry.group.docs.dependencies] sphinx = "^8.1.3" sphinx-rtd-theme = "^3.0.2" +autodoc-pydantic = "^2.2.0" [tool.poetry.group.mpi] diff --git a/test/test_write.py b/test/test_write.py index 89474356..facff27d 100644 --- a/test/test_write.py +++ b/test/test_write.py @@ -11,9 +11,14 @@ def cosmology_resource_path(data_path): @pytest.fixture -def properties_path(data_path): +def halo_properties_path(data_path): return data_path / "haloproperties.hdf5" +@pytest.fixture +def galaxy_properties_path(data_path): + return data_path / "galaxyproperties.hdf5" + + def test_write_header(data_path, tmp_path): header = read_header(data_path / "galaxyproperties.hdf5") @@ -32,8 +37,8 @@ def test_write_header(data_path, tmp_path): ) -def test_write_dataset(properties_path, tmp_path): - ds = read(properties_path) +def test_write_dataset(halo_properties_path, tmp_path): + ds = read(halo_properties_path) new_path = tmp_path / "haloproperties.hdf5" write(new_path, ds) @@ -41,8 +46,8 @@ def test_write_dataset(properties_path, tmp_path): assert all(ds.data == new_ds.data) -def test_after_take_filter(properties_path, tmp_path): - ds = read(properties_path).take(10000) +def test_after_take_filter(halo_properties_path, tmp_path): + ds = read(halo_properties_path).take(10000) ds = ds.filter(col("sod_halo_mass") > 0) filtered_data = ds.data @@ -51,8 +56,8 @@ def test_after_take_filter(properties_path, tmp_path): assert all(filtered_data == new_ds.data) -def test_after_take(properties_path, tmp_path): - ds = read(properties_path).take(10000) +def test_after_take(halo_properties_path, tmp_path): + ds = read(halo_properties_path).take(10000) data = ds.data write(tmp_path / "haloproperties.hdf5", ds) @@ -60,8 +65,8 @@ def test_after_take(properties_path, tmp_path): assert all(data == new_ds.data) -def test_after_filter(properties_path, tmp_path): - ds = read(properties_path) +def test_after_filter(halo_properties_path, tmp_path): + ds = read(halo_properties_path) data = ds.data ds = ds.filter(col("sod_halo_mass") > 0) filtered_data = ds.data @@ -73,13 +78,13 @@ def test_after_filter(properties_path, tmp_path): assert all(filtered_data == new_ds.data) -def test_after_unit_transform(properties_path, tmp_path): - ds = read(properties_path) +def test_after_unit_transform(halo_properties_path, tmp_path): + ds = read(halo_properties_path) ds = ds.with_units("scalefree") # write should not change the data write(tmp_path / "haloproperties.hdf5", ds) - ds = read(properties_path) + ds = read(halo_properties_path) new_ds = read(tmp_path / "haloproperties.hdf5") assert all(ds.data == new_ds.data) From a30fea3a6a09a2c775cd530497837c9380d58181 Mon Sep 17 00:00:00 2001 From: Patrick Wells Date: Fri, 18 Apr 2025 15:39:54 -0500 Subject: [PATCH 4/7] Update doc requirements --- docs/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/requirements.txt b/docs/requirements.txt index 37c5ccc5..efbd9720 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1 +1,2 @@ sphinx_rtd_theme>=3.0.2 +autodoc_pydantic>=2.2.0 From 0ffc35b93673a5ff0f2c9ffb9274cf93f0ef7342 Mon Sep 17 00:00:00 2001 From: Patrick Wells Date: Fri, 18 Apr 2025 15:45:03 -0500 Subject: [PATCH 5/7] Final documentation updates --- docs/source/conf.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/source/conf.py b/docs/source/conf.py index 0502403e..f5fb2c04 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -28,3 +28,5 @@ # -- Options for autodoc ------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html autodoc_typehints = "description" +autodoc_pydantic_model_show_validator_summary=False +autodoc_pydantic_field_list_validators=False From 52b14f27d4ccf79a70b75ecfccb6013a931ee6ee Mon Sep 17 00:00:00 2001 From: Patrick Wells Date: Fri, 18 Apr 2025 15:53:40 -0500 Subject: [PATCH 6/7] Linting --- docs/source/conf.py | 11 ++++++++--- opencosmo/collection/collection.py | 14 ++++++-------- opencosmo/collection/io.py | 2 +- opencosmo/dataset/dataset.py | 20 ++++++++++++++------ opencosmo/link/collection.py | 10 +++++----- opencosmo/link/io.py | 13 +++++++------ opencosmo/parameters/simulation.py | 9 ++++----- test/test_collection.py | 3 ++- test/test_write.py | 2 +- 9 files changed, 48 insertions(+), 36 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index f5fb2c04..c2fbd278 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -14,7 +14,12 @@ # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration -extensions = ["sphinx.ext.autodoc", "sphinx.ext.napoleon", "sphinx_rtd_theme", "sphinxcontrib.autodoc_pydantic"] +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.napoleon", + "sphinx_rtd_theme", + "sphinxcontrib.autodoc_pydantic", +] templates_path = ["_templates"] exclude_patterns = [] @@ -28,5 +33,5 @@ # -- Options for autodoc ------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html autodoc_typehints = "description" -autodoc_pydantic_model_show_validator_summary=False -autodoc_pydantic_field_list_validators=False +autodoc_pydantic_model_show_validator_summary = False +autodoc_pydantic_field_list_validators = False diff --git a/opencosmo/collection/collection.py b/opencosmo/collection/collection.py index 8d50d4e7..0d6ce7d3 100644 --- a/opencosmo/collection/collection.py +++ b/opencosmo/collection/collection.py @@ -11,8 +11,8 @@ import h5py +from astropy.cosmology import Cosmology # type: ignore -from astropy.cosmology import Cosmology import opencosmo as oc from opencosmo.dataset.index import ChunkedIndex from opencosmo.dataset.mask import Mask @@ -137,7 +137,6 @@ def __repr__(self): f"SimulationCollection({n_collections} collections, {n_datasets} datasets)" ) - @classmethod def open( cls, file: h5py.File, datasets_to_get: Optional[Iterable[str]] = None @@ -212,7 +211,6 @@ def simulation(self) -> dict[str, SimulationParameters]: return self.__map_attribute("simulation") - def filter(self, *masks: Mask, **kwargs) -> SimulationCollection: """ Filter the datasets in the collection. This method behaves @@ -258,8 +256,8 @@ def take(self, n: int, at: str = "random") -> SimulationCollection: """ Take a subest of rows from all datasets or collections in this collection. This method will delegate to the underlying method in - :class:`opencosmo.Dataset`, or :class:`opencosmo.StructureCollection` depending - on the context. As such, behavior may vary depending on what this collection + :class:`opencosmo.Dataset`, or :class:`opencosmo.StructureCollection` depending + on the context. As such, behavior may vary depending on what this collection contains. See their documentation for more info. Parameters @@ -271,10 +269,11 @@ def take(self, n: int, at: str = "random") -> SimulationCollection: """ if any(len(ds) < n for ds in self.values()): - raise ValueError(f"Not all datasets in this collection have at least {n} rows!") + raise ValueError( + f"Not all datasets in this collection have at least {n} rows!" + ) return self.__map("take", n, at) - def with_units(self, convention: str) -> SimulationCollection: """ Transform all datasets or collections to use the given unit convention. This @@ -290,7 +289,6 @@ def with_units(self, convention: str) -> SimulationCollection: return self.__map("with_units", convention) - def open_single_dataset( file: h5py.File, dataset_key: str, header: Optional[OpenCosmoHeader] = None ) -> oc.Dataset: diff --git a/opencosmo/collection/io.py b/opencosmo/collection/io.py index 70ca81cc..083b5f3c 100644 --- a/opencosmo/collection/io.py +++ b/opencosmo/collection/io.py @@ -29,7 +29,7 @@ def open_simulation_files(**paths: Path) -> SimulationCollection: dataset = oc.open(path) if not isinstance(dataset, oc.Dataset): raise ValueError("All datasets must be of the same type.") - dtypes = set(dataset.header.file.data_type for dataset in datasets.values()) + dtypes = set(dataset for dataset in datasets.values()) if len(dtypes) != 1: raise ValueError("All datasets must be of the same type.") return SimulationCollection(datasets) diff --git a/opencosmo/dataset/dataset.py b/opencosmo/dataset/dataset.py index 543b385b..8a02c4a7 100644 --- a/opencosmo/dataset/dataset.py +++ b/opencosmo/dataset/dataset.py @@ -68,10 +68,21 @@ def cosmology(self) -> Cosmology: Returns ------- - cosmology : astropy.cosmology.Cosmology + cosmology: astropy.cosmology.Cosmology """ return self.__header.cosmology + @property + def dtype(self) -> str: + """ + The data type of this dataset. + + Returns + ------- + dtype: str + """ + return self.__header.file.data_type + @property def redshift(self) -> float: """ @@ -96,7 +107,6 @@ def simulation(self) -> SimulationParameters: """ return self.__header.simulation - @property def data(self) -> Table | Column: """ @@ -111,9 +121,7 @@ def data(self) -> Table | Column: """ # should rename this, dataset.data can get confusing # Also the point is that there's MORE data than just the table - return self.__handler.get_data( - builders=self.__builders, index=self.__index - ) + return self.__handler.get_data(builders=self.__builders, index=self.__index) @property def index(self) -> DataIndex: @@ -354,7 +362,7 @@ def with_units(self, convention: str) -> Dataset: convention, self.__base_unit_transformations, self.__header.cosmology, - self.redshift + self.redshift, ) new_builders = get_column_builders(new_transformations, self.__builders.keys()) diff --git a/opencosmo/link/collection.py b/opencosmo/link/collection.py index 512f67e5..36593a16 100644 --- a/opencosmo/link/collection.py +++ b/opencosmo/link/collection.py @@ -11,7 +11,10 @@ def filter_properties_by_dataset( - dataset: oc.Dataset, properties: oc.Dataset, header: oc.header.OpenCosmoHeader, *masks + dataset: oc.Dataset, + properties: oc.Dataset, + header: oc.header.OpenCosmoHeader, + *masks, ) -> oc.Dataset: masked_dataset = dataset.filter(*masks) if header.file.data_type == "halo_properties": @@ -189,10 +192,7 @@ def filter(self, *masks, on_galaxies: bool = False) -> StructureCollection: """ if not masks: return self - if ( - not on_galaxies - or self.__properties.header.file.data_type == "galaxy_properties" - ): + if not on_galaxies or self.__properties.dtype == "galaxy_properties": filtered = self.__properties.filter(*masks) elif "galaxy_properties" not in self.__handlers: raise ValueError("Dataset galaxy_properties not found in collection.") diff --git a/opencosmo/link/io.py b/opencosmo/link/io.py index ac091d0d..9f0b33dc 100644 --- a/opencosmo/link/io.py +++ b/opencosmo/link/io.py @@ -5,8 +5,8 @@ from h5py import File, Group from opencosmo import dataset as d -from opencosmo import link as l from opencosmo import io +from opencosmo import link as l from opencosmo.header import OpenCosmoHeader, read_header try: @@ -113,7 +113,10 @@ def open_linked_files(*files: Path): if len(linked_files_by_type) != len(linked_files): raise ValueError("Linked files must have unique data types") return get_linked_datasets( - properties_dataset, linked_files_by_type, properties_file, headers[properties_index] + properties_dataset, + linked_files_by_type, + properties_file, + headers[properties_index], ) @@ -164,7 +167,7 @@ def get_linked_datasets( properties_dataset: d.Dataset, linked_files_by_type: dict[str, File | Group], properties_file: File, - header: OpenCosmoHeader + header: OpenCosmoHeader, ) -> l.StructureCollection: datasets = {} for dtype, pointer in linked_files_by_type.items(): @@ -173,9 +176,7 @@ def get_linked_datasets( else: datasets.update({dtype: pointer}) - link_handlers = get_link_handlers( - properties_file, datasets, header - ) + link_handlers = get_link_handlers(properties_file, datasets, header) output = {} for key, handler in link_handlers.items(): if key in LINK_ALIASES: diff --git a/opencosmo/parameters/simulation.py b/opencosmo/parameters/simulation.py index 95ab654d..1a3d886a 100644 --- a/opencosmo/parameters/simulation.py +++ b/opencosmo/parameters/simulation.py @@ -83,10 +83,9 @@ class SimulationParameters(BaseModel): z_ini: float = Field(ge=0.01, description="Initial redshift of the simulation") z_end: float = Field(ge=0.0, description="Final redshift of the simulation") n_gravity: Optional[int] = Field( - ge=2, - description= - "Number of gravity-only particles (per dimension). " - "In hydrodynamic simulations, this parameter will be replaced with \"n_dm\"" + ge=2, + description="Number of gravity-only particles (per dimension). " + 'In hydrodynamic simulations, this parameter will be replaced with "n_dm"', ) n_steps: int = Field(ge=1, description="Number of time steps") pm_grid: int = Field(ge=2, description="Number of grid points (per dimension)") @@ -109,7 +108,7 @@ def empty_string_to_none(cls, data): def step_zs(self) -> list[float]: """ Get the redshift of the steps in this simulation. Outputs such that - redshift[step_number] returns the redshift for that step. Keep in + redshift[step_number] returns the redshift for that step. Keep in mind that steps go from high z -> low z. """ a_ini = 1 / (1 + self.z_ini) diff --git a/test/test_collection.py b/test/test_collection.py index 5dc705fe..7a6cb446 100644 --- a/test/test_collection.py +++ b/test/test_collection.py @@ -91,7 +91,7 @@ def test_data_link_selection(halo_paths): collection = collection.filter(oc.col("sod_halo_mass") > 10**13).take( 10, at="random" ) - collection = collection.select( ["x", "y", "z"], dataset="dm_particles") + collection = collection.select(["x", "y", "z"], dataset="dm_particles") collection = collection.select(["fof_halo_tag", "sod_halo_mass"]) found_dm_particles = False for properties, particles in collection.objects(): @@ -172,6 +172,7 @@ def test_link_write(halo_paths, tmp_path): assert n == 10 + def test_simulation_collection_broadcast_attribute(multi_path): collection = oc.read(multi_path) for key, value in collection.redshift.items(): diff --git a/test/test_write.py b/test/test_write.py index facff27d..4a2412e9 100644 --- a/test/test_write.py +++ b/test/test_write.py @@ -14,12 +14,12 @@ def cosmology_resource_path(data_path): def halo_properties_path(data_path): return data_path / "haloproperties.hdf5" + @pytest.fixture def galaxy_properties_path(data_path): return data_path / "galaxyproperties.hdf5" - def test_write_header(data_path, tmp_path): header = read_header(data_path / "galaxyproperties.hdf5") new_path = tmp_path / "header.hdf5" From 98fe846dd08b48ec61c1cb665346fae021e74897 Mon Sep 17 00:00:00 2001 From: Patrick Wells Date: Fri, 18 Apr 2025 17:28:02 -0500 Subject: [PATCH 7/7] Additional changes --- docs/source/mpi.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/mpi.rst b/docs/source/mpi.rst index 4e0a0482..06668b10 100644 --- a/docs/source/mpi.rst +++ b/docs/source/mpi.rst @@ -1,7 +1,7 @@ Working with MPI ================ -OpenCosmo can read and write data inside an MPI environment. In general the API works exactly the same within an MPI context as it does otherwise, but there are some things to be aware of in the current version of the library (see below). More flexibility in working in an MPI context is planned for the next version of the library. +OpenCosmo can read and write data in an MPI environment. In general the API works exactly the same within an MPI context as it does otherwise, but there are some things to be aware of in the current version of the library (see below). More flexibility in working in an MPI context is planned for the next version of the library. I/O with Parallel HDF5 and Select Operations --------------------------------------------