Skip to content

Commit 6eeeb7a

Browse files
authored
Merge pull request #791 from DHI/improve_test_coverage
Improve test coverage
2 parents 17b4e45 + 0e19a40 commit 6eeeb7a

15 files changed

+212
-285
lines changed

mikeio/__init__.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from __future__ import annotations
22
from importlib.metadata import PackageNotFoundError, version
33
from pathlib import Path
4-
from platform import architecture
54
from collections.abc import Sequence
65
from typing import Any
76

@@ -15,9 +14,6 @@
1514
__dfs_version__: int = 220
1615

1716

18-
if "64" not in architecture()[0]:
19-
raise Exception("This library has not been tested for a 32 bit system.")
20-
2117
from .dataset import DataArray, Dataset, from_pandas, from_polars
2218
from .dfs import Dfs0, Dfs1, Dfs2, Dfs3
2319
from .dfsu import Dfsu, Mesh, Dfsu2DH, Dfsu2DV, Dfsu3D, DfsuSpectral
@@ -121,7 +117,9 @@ def read(
121117
ext = Path(filename).suffix.lower()
122118

123119
if "dfs" not in ext:
124-
raise ValueError("mikeio.read() is only supported for dfs files")
120+
raise ValueError(
121+
"mikeio.read() is only supported for dfs files. Use mikeio.open for mesh files."
122+
)
125123

126124
dfs = open(filename)
127125
if isinstance(dfs, Mesh):

mikeio/_track.py

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def _extract_track(
1919
end_time: pd.Timestamp,
2020
timestep: float,
2121
geometry: GeometryFM2D,
22-
track: str | Dataset | pd.DataFrame,
22+
track: str | Path | Dataset | pd.DataFrame,
2323
items: Sequence[ItemInfo],
2424
item_numbers: Sequence[int],
2525
time_steps: Sequence[int],
@@ -36,6 +36,8 @@ def _extract_track(
3636
match track:
3737
case str():
3838
times, coords = _get_track_data_from_file(track)
39+
case Path():
40+
times, coords = _get_track_data_from_file(str(track))
3941
case Dataset():
4042
times, coords = _get_track_data_from_dataset(track)
4143
case pd.DataFrame():
@@ -193,20 +195,3 @@ def _get_track_data_from_file(track: str) -> tuple[pd.DatetimeIndex, np.ndarray]
193195
coords = df.iloc[:, 0:2].to_numpy(copy=True)
194196

195197
return times, coords
196-
197-
198-
def _find_end_index(t_rel: pd.Index, end_time: pd.Timestamp) -> int:
199-
# largest idx for which (times - self.end_time)<=0
200-
tmp = np.where(t_rel <= 0)[0]
201-
if len(tmp) == 0:
202-
raise ValueError("No time overlap!")
203-
i_end = tmp[-1]
204-
return i_end
205-
206-
207-
def _find_start_index(t_rel: pd.Index, start_time: pd.Timestamp) -> int:
208-
tmp = np.where(t_rel >= 0)[0]
209-
if len(tmp) == 0:
210-
raise ValueError("No time overlap!")
211-
i_start = tmp[0] # smallest idx for which t_rel>=0
212-
return i_start

mikeio/dataset/_dataarray.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -98,12 +98,9 @@ def __call__(self, tail: bool = True) -> "DataArray":
9898
dims = tuple([d for d in self.da.dims if d not in ("frequency", "direction")])
9999
item = ItemInfo(EUMType.Significant_wave_height)
100100
g = self.da.geometry
101-
if isinstance(g, GeometryFMPointSpectrum):
102-
if g.x is not None and g.y is not None:
103-
geometry: Any = GeometryPoint2D(x=g.x, y=g.y)
104-
else:
105-
geometry = GeometryUndefined()
106-
elif isinstance(g, GeometryFMLineSpectrum):
101+
geometry: Any = GeometryUndefined()
102+
103+
if isinstance(g, GeometryFMLineSpectrum):
107104
geometry = Grid1D(
108105
nx=g.n_nodes,
109106
dx=1.0,
@@ -119,8 +116,6 @@ def __call__(self, tail: bool = True) -> "DataArray":
119116
element_table=g.element_table,
120117
element_ids=g.element_ids,
121118
)
122-
else:
123-
geometry = GeometryUndefined()
124119

125120
return DataArray(
126121
data=Hm0,

mikeio/dataset/_dataset.py

Lines changed: 7 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -162,8 +162,6 @@ def _init_from_DataArrays(
162162
for da in data_vars.values():
163163
first._is_compatible(da, raise_error=True)
164164

165-
self._check_all_different_ids(list(data_vars.values()))
166-
167165
# TODO is it necessary to keep track of item names?
168166
self.__itemattr: set[str] = set()
169167
for key, value in data_vars.items():
@@ -266,32 +264,6 @@ def _unique_item_names(das: Sequence[DataArray]) -> list[str]:
266264
)
267265
return item_names
268266

269-
@staticmethod
270-
def _check_all_different_ids(das: Sequence[DataArray]) -> None:
271-
"""Are all the DataArrays different objects or are some referring to the same."""
272-
ids = np.zeros(len(das), dtype=np.int64)
273-
ids_val = np.zeros(len(das), dtype=np.int64)
274-
for j, da in enumerate(das):
275-
ids[j] = id(da)
276-
ids_val[j] = id(da.values)
277-
278-
if len(ids) != len(np.unique(ids)):
279-
# DataArrays not unique! - find first duplicate and report error
280-
das = list(das)
281-
u, c = np.unique(ids, return_counts=True)
282-
dups = u[c > 1]
283-
for dup in dups:
284-
jj = np.where(ids == dup)[0]
285-
Dataset._id_of_DataArrays_equal(das[jj[0]], das[jj[1]])
286-
if len(ids_val) != len(np.unique(ids_val)):
287-
# DataArray *values* not unique! - find first duplicate and report error
288-
das = list(das)
289-
u, c = np.unique(ids_val, return_counts=True)
290-
dups = u[c > 1]
291-
for dup in dups:
292-
jj = np.where(ids_val == dup)[0]
293-
Dataset._id_of_DataArrays_equal(das[jj[0]], das[jj[1]])
294-
295267
@staticmethod
296268
def _id_of_DataArrays_equal(da1: DataArray, da2: DataArray) -> None:
297269
"""Check if two DataArrays are actually the same object."""
@@ -529,10 +501,12 @@ def __len__(self) -> int:
529501
def __iter__(self) -> Iterator[DataArray]:
530502
yield from self._data_vars.values()
531503

532-
def __setitem__(self, key, value) -> None: # type: ignore
504+
def __setitem__(self, key: int | str, value: DataArray) -> None: # type: ignore
533505
self.__set_or_insert_item(key, value, insert=False)
534506

535-
def __set_or_insert_item(self, key, value: DataArray, insert=False) -> None: # type: ignore
507+
def __set_or_insert_item(
508+
self, key: int | str, value: DataArray, insert: bool = False
509+
) -> None: # type: ignore
536510
if len(self) > 0:
537511
self[0]._is_compatible(value)
538512

@@ -1008,19 +982,17 @@ def __dataset_read_item_time_func(
1008982

1009983
def extract_track(
1010984
self,
1011-
track: pd.DataFrame,
985+
track: str | Path | Dataset | pd.DataFrame,
1012986
method: Literal["nearest", "inverse_distance"] = "nearest",
1013987
dtype: Any = np.float32,
1014988
) -> "Dataset":
1015989
"""Extract data along a moving track.
1016990
1017991
Parameters
1018992
---------
1019-
track: pandas.DataFrame
993+
track: pandas.DataFrame, str or Dataset
1020994
with DatetimeIndex and (x, y) of track points as first two columns
1021-
x,y coordinates must be in same coordinate system as dfsu
1022-
track: str
1023-
filename of csv or dfs0 file containing t,x,y
995+
x,y coordinates must be in same coordinate system as dataset
1024996
method: str, optional
1025997
Spatial interpolation method ('nearest' or 'inverse_distance')
1026998
default='nearest'

mikeio/dfs/_dfs.py

Lines changed: 1 addition & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
from typing import Any, Sequence
88
import numpy as np
99
import pandas as pd
10-
from tqdm import tqdm
1110

1211
from mikecore.DfsFile import (
1312
DfsDynamicItemInfo,
@@ -349,99 +348,6 @@ def __repr__(self) -> str:
349348

350349
return str.join("\n", out)
351350

352-
def read(
353-
self,
354-
*,
355-
items: str | int | Sequence[str | int] | None = None,
356-
time: int | str | slice | None = None,
357-
keepdims: bool = False,
358-
dtype: Any = np.float32,
359-
) -> Dataset:
360-
"""Read data from a dfs file.
361-
362-
Parameters
363-
---------
364-
items: list[int] or list[str], optional
365-
Read only selected items, by number (0-based), or by name
366-
time: int, str, datetime, pd.TimeStamp, sequence, slice or pd.DatetimeIndex, optional
367-
Read only selected time steps, by default None (=all)
368-
keepdims: bool, optional
369-
When reading a single time step only, should the time-dimension be kept
370-
in the returned Dataset? by default: False
371-
dtype: data-type, optional
372-
Define the dtype of the returned dataset (default = np.float32)
373-
374-
Returns
375-
-------
376-
Dataset
377-
378-
"""
379-
self._open()
380-
381-
item_numbers = _valid_item_numbers(self._dfs.ItemInfo, items)
382-
n_items = len(item_numbers)
383-
384-
single_time_selected, time_steps = _valid_timesteps(self._dfs.FileInfo, time)
385-
nt = len(time_steps) if not single_time_selected else 1
386-
387-
shape: tuple[int, ...]
388-
389-
if self._ndim == 1:
390-
shape = (nt, self.nx) # type: ignore
391-
elif self._ndim == 2:
392-
shape = (nt, self.ny, self.nx) # type: ignore
393-
else:
394-
# TODO this is not used, since Dfs3 has a separate .read method
395-
shape = (nt, self.nz, self.ny, self.nx) # type: ignore
396-
397-
dims = self.geometry.default_dims
398-
399-
if single_time_selected and not keepdims:
400-
shape = shape[1:]
401-
else:
402-
dims = ["time"] + list(dims)
403-
404-
data_list: list[np.ndarray] = [
405-
np.ndarray(shape=shape, dtype=dtype) for _ in range(n_items)
406-
]
407-
408-
t_seconds = np.zeros(len(time_steps))
409-
410-
for i, it in enumerate(tqdm(time_steps, disable=not self.show_progress)):
411-
for item in range(n_items):
412-
itemdata = self._dfs.ReadItemTimeStep(item_numbers[item] + 1, int(it))
413-
414-
d = itemdata.Data
415-
assert d.ndim == 1
416-
417-
d[d == self.deletevalue] = np.nan
418-
419-
if self._ndim == 2:
420-
d = d.reshape(self.ny, self.nx) # type: ignore
421-
422-
if single_time_selected:
423-
data_list[item] = np.atleast_2d(d) if keepdims else d
424-
else:
425-
data_list[item][i] = d
426-
427-
t_seconds[i] = itemdata.Time
428-
429-
time = pd.to_datetime(t_seconds, unit="s", origin=self.start_time)
430-
431-
items = _get_item_info(self._dfs.ItemInfo, item_numbers)
432-
433-
self._dfs.Close()
434-
435-
return Dataset(
436-
data=data_list,
437-
time=time,
438-
items=items,
439-
dims=tuple(dims),
440-
geometry=self.geometry,
441-
validate=False,
442-
dt=self._timestep,
443-
)
444-
445351
def _open(self) -> None:
446352
raise NotImplementedError("Should be implemented by subclass")
447353

@@ -494,7 +400,7 @@ def start_time(self) -> pd.Timestamp:
494400
def end_time(self) -> pd.Timestamp:
495401
"""File end time."""
496402
if self._end_time is None:
497-
self._end_time = self.read(items=[0]).time[-1].to_pydatetime()
403+
self._end_time = self.read(items=[0]).time[-1].to_pydatetime() # type: ignore
498404

499405
return self._end_time
500406

mikeio/dfs/_dfs1.py

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,22 @@
11
from __future__ import annotations
22
from pathlib import Path
3+
from typing import Any, Sequence
34

45
from mikecore.DfsFactory import DfsBuilder, DfsFactory
56
from mikecore.DfsFile import DfsFile, DfsSimpleType
67
from mikecore.DfsFileFactory import DfsFileFactory
78
from mikecore.eum import eumQuantity, eumUnit
9+
import numpy as np
10+
import pandas as pd
11+
from tqdm import tqdm
812

913
from .. import __dfs_version__
1014
from ..dataset import Dataset
1115
from ._dfs import (
1216
_Dfs123,
17+
_get_item_info,
18+
_valid_item_numbers,
19+
_valid_timesteps,
1320
write_dfs_data,
1421
)
1522
from ..eum import TimeStepUnit
@@ -91,6 +98,86 @@ def __init__(self, filename: str | Path) -> None:
9198
def _open(self) -> None:
9299
self._dfs = DfsFileFactory.Dfs1FileOpen(self._filename)
93100

101+
def read(
102+
self,
103+
*,
104+
items: str | int | Sequence[str | int] | None = None,
105+
time: int | str | slice | None = None,
106+
keepdims: bool = False,
107+
dtype: Any = np.float32,
108+
) -> Dataset:
109+
"""Read data from a dfs1 file.
110+
111+
Parameters
112+
---------
113+
items: list[int] or list[str], optional
114+
Read only selected items, by number (0-based), or by name
115+
time: int, str, datetime, pd.TimeStamp, sequence, slice or pd.DatetimeIndex, optional
116+
Read only selected time steps, by default None (=all)
117+
keepdims: bool, optional
118+
When reading a single time step only, should the time-dimension be kept
119+
in the returned Dataset? by default: False
120+
dtype: data-type, optional
121+
Define the dtype of the returned dataset (default = np.float32)
122+
123+
Returns
124+
-------
125+
Dataset
126+
127+
"""
128+
self._open()
129+
130+
item_numbers = _valid_item_numbers(self._dfs.ItemInfo, items)
131+
n_items = len(item_numbers)
132+
133+
single_time_selected, time_steps = _valid_timesteps(self._dfs.FileInfo, time)
134+
nt = len(time_steps) if not single_time_selected else 1
135+
shape: tuple[int, ...] = (nt, self.nx)
136+
dims = self.geometry.default_dims
137+
138+
if single_time_selected and not keepdims:
139+
shape = shape[1:]
140+
else:
141+
dims = ("time", *dims)
142+
143+
data_list: list[np.ndarray] = [
144+
np.ndarray(shape=shape, dtype=dtype) for _ in range(n_items)
145+
]
146+
147+
t_seconds = np.zeros(len(time_steps))
148+
149+
for i, it in enumerate(tqdm(time_steps, disable=not self.show_progress)):
150+
for item in range(n_items):
151+
itemdata = self._dfs.ReadItemTimeStep(item_numbers[item] + 1, int(it))
152+
153+
d = itemdata.Data
154+
assert d.ndim == 1
155+
156+
d[d == self.deletevalue] = np.nan
157+
158+
if single_time_selected:
159+
data_list[item] = np.atleast_2d(d) if keepdims else d
160+
else:
161+
data_list[item][i] = d
162+
163+
t_seconds[i] = itemdata.Time
164+
165+
time = pd.to_datetime(t_seconds, unit="s", origin=self.start_time)
166+
167+
items = _get_item_info(self._dfs.ItemInfo, item_numbers)
168+
169+
self._dfs.Close()
170+
171+
return Dataset(
172+
data=data_list,
173+
time=time,
174+
items=items,
175+
dims=tuple(dims),
176+
geometry=self.geometry,
177+
validate=False,
178+
dt=self._timestep,
179+
)
180+
94181
@property
95182
def geometry(self) -> Grid1D:
96183
assert isinstance(self._geometry, Grid1D)

0 commit comments

Comments
 (0)