Skip to content

Commit b7947bd

Browse files
authored
Merge pull request #336 from British-Oceanographic-Data-Centre/tidegauge_validation_v2.0
Tidegauge validation v2.0
2 parents 40b078b + 705faa2 commit b7947bd

File tree

5 files changed

+443
-0
lines changed

5 files changed

+443
-0
lines changed

coast/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,4 @@
2121
from .altimetry import Altimetry
2222
from .timeseries import Timeseries
2323
from .tidegauge import Tidegauge
24+
from .tidegauge_multiple import TidegaugeMultiple

coast/gridded.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from .coast import Coast
1212
from .config import ConfigParser
1313
from .logging_util import get_slug, debug, info, warn, error, warning
14+
import pandas as pd
1415

1516

1617
class Gridded(Coast): # TODO Complete this docstring

coast/tidegauge_multiple.py

Lines changed: 342 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,342 @@
1+
import numpy as np
2+
import xarray as xr
3+
import matplotlib.pyplot as plt
4+
import pandas as pd
5+
import glob
6+
import re
7+
import pytz
8+
import sklearn.metrics as metrics
9+
from . import general_utils, plot_util, crps_util, stats_util
10+
from .logging_util import get_slug, debug, error, info
11+
import xarray.ufuncs as uf
12+
import matplotlib.dates as mdates
13+
import utide as ut
14+
import scipy.signal as signal
15+
16+
17+
class TidegaugeMultiple:
18+
"""
19+
This is an object for storage and manipulation of multiple tide gauges
20+
in a single dataset. This may require some processing of the observations
21+
such as interpolation to a common time step.
22+
23+
This object's dataset should take the form:
24+
25+
Dimensions:
26+
port : The locations dimension. Each time series has an index
27+
time : The time dimension. Each datapoint at each port has an index
28+
29+
Coordinates:
30+
longitude (port) : Longitude values for each port index
31+
latitude (port) : Latitude values for each port index
32+
time (port) : Time values for each time index (datetime)
33+
34+
An example data variable could be ssh, or ntr (non-tidal residual). This
35+
object can also be used for other instrument types, not just tide gauges.
36+
For example moorings.
37+
38+
Every port index for this object should use the same time coordinates.
39+
Therefore, timeseries need to be aligned before being placed into the
40+
object. If there is any padding needed, then NaNs should be used. NaNs
41+
should also be used for quality control/data rejection.
42+
"""
43+
44+
def init():
45+
return
46+
47+
##############################################################################
48+
### ~ Plotting ~ ###
49+
##############################################################################
50+
51+
##############################################################################
52+
### ~ Model Comparison ~ ###
53+
##############################################################################
54+
55+
##############################################################################
56+
### ~ Analysis ~ ###
57+
##############################################################################
58+
59+
def match_missing_values(self, other, fill_value=np.nan):
60+
"""
61+
Will match any missing values between two tidegauge_multiple datasets.
62+
Where missing values (defined by fill_value) are found in either dataset
63+
they are also placed in the corresponding location in the other dataset.
64+
Returns two new tidegaugeMultiple objects containing the new
65+
ssh data. Datasets must contain ssh variables and only ssh will be
66+
masked.
67+
"""
68+
69+
ds_self = self.dataset
70+
ds_other = other.dataset
71+
72+
ssh_self = ds_self.ssh
73+
ssh_other = ds_other.ssh
74+
75+
if ssh_other.dims[0] == "t_dim":
76+
ssh_other = ssh_other.transpose()
77+
if ssh_self.dims[0] == "t_dim":
78+
ssh_self = ssh_self.transpose()
79+
80+
if np.isnan(fill_value):
81+
ind_self = uf.isnan(ssh_self)
82+
ind_other = uf.isnan(ssh_other)
83+
else:
84+
ind_self = ssh_self == fill_value
85+
ind_other = ssh_other == fill_value
86+
87+
ds_self["ssh"] = ssh_self.where(~ind_other)
88+
ds_other["ssh"] = ssh_other.where(~ind_self)
89+
90+
tg_self = TidegaugeMultiple()
91+
tg_other = TidegaugeMultiple()
92+
93+
tg_self.dataset = ds_self
94+
tg_other.dataset = ds_other
95+
96+
return tg_self, tg_other
97+
98+
def harmonic_analysis_utide(
99+
self, min_datapoints=1000, nodal=False, trend=False, method="ols", conf_int="linear", Rayleigh_min=0.95
100+
):
101+
"""
102+
Does a harmonic analysis for each timeseries inside this object using
103+
the utide library. All arguments except min_datapoints are arguments
104+
that are passed to ut.solve(). Please see the utide website for more
105+
information:
106+
107+
https://pypi.org/project/UTide/
108+
109+
Utide will by default do it's harmonic analysis using a set of harmonics
110+
determined using the Rayleigh criterion. This changes the number of
111+
harmonics depending on the length and frequency of the time series.
112+
113+
Output from this routine is not a new dataset, but a list of utide
114+
analysis object. These are structures containing, amongst other things,
115+
amplitudes, phases, constituent names and confidence intervals. This
116+
list can be passed to reconstruct_tide_utide() in this object to create
117+
a new TidegaugeMultiple object containing reconstructed tide data.
118+
119+
INPUTS
120+
min_datapoints : If a time series has less than this value number of
121+
datapoints, then omit from the analysis.
122+
<all_others> : Inputs to utide.solve(). See website above.
123+
124+
OUTPUTS
125+
A list of utide structures from the solve() routine. If a location
126+
is omitted, it will contain [] for it's entry.
127+
"""
128+
ds = self.dataset
129+
n_port = ds.dims["id"]
130+
n_time = ds.dims["t_dim"]
131+
# Harmonic analysis datenums
132+
time = mdates.date2num(ds.time.values)
133+
134+
analyses = []
135+
136+
for pp in range(0, n_port):
137+
138+
# Temporary in-loop datasets
139+
ds_port = ds.isel(id=pp).load()
140+
ssh = ds_port.ssh
141+
142+
number_of_nan = np.sum(np.isnan(ssh.values))
143+
144+
if number_of_nan == n_time:
145+
analyses.append([])
146+
continue
147+
148+
if (n_time - number_of_nan) < min_datapoints:
149+
print(sum(~np.isnan(ssh.values)))
150+
analyses.append([])
151+
continue
152+
153+
# Do harmonic analysis using UTide
154+
uts_obs = ut.solve(
155+
time,
156+
ssh.values,
157+
lat=ssh.latitude.values,
158+
nodal=nodal,
159+
trend=trend,
160+
method=method,
161+
conf_int=conf_int,
162+
Rayleigh_min=Rayleigh_min,
163+
)
164+
165+
analyses.append(uts_obs)
166+
167+
return analyses
168+
169+
def reconstruct_tide_utide(self, utide_solution_list, constit=None):
170+
""" """
171+
172+
ds = self.dataset
173+
n_port = ds.dims["id"]
174+
n_time = ds.dims["t_dim"]
175+
# Harmonic analysis datenums
176+
time = mdates.date2num(ds.time.values)
177+
178+
coords = xr.Dataset(ds[list(ds.coords.keys())])
179+
reconstructed = np.zeros((n_port, n_time)) * np.nan
180+
181+
for pp in np.arange(n_port):
182+
183+
# Reconstruct full tidal signal
184+
pp_solution = utide_solution_list[pp]
185+
186+
if len(pp_solution) == 0:
187+
continue
188+
189+
tide = np.array(ut.reconstruct(time, pp_solution, constit=constit).h)
190+
reconstructed[pp] = tide
191+
192+
coords["ssh_tide"] = (["id", "t_dim"], reconstructed)
193+
tg_return = TidegaugeMultiple()
194+
tg_return.dataset = coords
195+
196+
return tg_return
197+
198+
def calculate_residuals(self, tg_tide, apply_filter=True, window_length=25, polyorder=3):
199+
200+
# NTR: Calculate non tidal residuals
201+
ntr = self.dataset.ssh - tg_tide.dataset.ssh_tide
202+
n_port = self.dataset.dims["id"]
203+
204+
# NTR: Apply filter if wanted
205+
if apply_filter:
206+
for pp in range(n_port):
207+
ntr[pp, :] = signal.savgol_filter(ntr[pp, :], 25, 3)
208+
209+
coords = xr.Dataset(self.dataset[list(self.dataset.coords.keys())])
210+
coords["ntr"] = ntr
211+
212+
tg_return = TidegaugeMultiple()
213+
tg_return.dataset = coords
214+
return tg_return
215+
216+
def threshold_statistics(self, thresholds=np.arange(-0.4, 2, 0.1), peak_separation=12):
217+
218+
ds = self.dataset
219+
ds_thresh = xr.Dataset(ds[list(ds.coords.keys())])
220+
ds_thresh["threshold"] = ("threshold", thresholds)
221+
var_list = list(ds.keys())
222+
n_thresholds = len(thresholds)
223+
n_port = ds.dims["id"]
224+
225+
for vv in var_list:
226+
227+
empty_thresh = np.zeros((n_port, n_thresholds)) * np.nan
228+
ds_thresh["peak_count_" + vv] = (["id", "threshold"], empty_thresh)
229+
ds_thresh["time_over_threshold_" + vv] = (["id", "threshold"], empty_thresh)
230+
ds_thresh["dailymax_count_" + vv] = (["id", "threshold"], empty_thresh)
231+
ds_thresh["monthlymax_count_" + vv] = (["id", "threshold"], empty_thresh)
232+
233+
for pp in range(n_port):
234+
235+
print(pp)
236+
237+
# Identify NTR peaks for threshold analysis
238+
data_pp = ds[vv].isel(id=pp)
239+
if np.sum(np.isnan(data_pp.values)) == ds.dims["t_dim"]:
240+
continue
241+
242+
pk_ind, _ = signal.find_peaks(data_pp.values, distance=peak_separation)
243+
pk_values = data_pp[pk_ind]
244+
245+
# Threshold Analysis
246+
for nn in range(0, n_thresholds):
247+
248+
# Calculate daily and monthly maxima for threshold analysis
249+
ds_daily = data_pp.groupby("time.day")
250+
ds_daily_max = ds_daily.max(skipna=True)
251+
ds_monthly = data_pp.groupby("time.month")
252+
ds_monthly_max = ds_monthly.max(skipna=True)
253+
254+
threshn = thresholds[nn]
255+
# NTR: Threshold Frequency (Peaks)
256+
ds_thresh["peak_count_" + vv][pp, nn] = np.sum(pk_values >= threshn)
257+
258+
# NTR: Threshold integral (Time over threshold)
259+
ds_thresh["time_over_threshold_" + vv][pp, nn] = np.sum(data_pp >= threshn)
260+
261+
# NTR: Number of daily maxima over threshold
262+
ds_thresh["dailymax_count_" + vv][pp, nn] = np.sum(ds_daily_max.values >= threshn)
263+
264+
# NTR: Number of monthly maxima over threshold
265+
ds_thresh["monthlymax_count_" + vv][pp, nn] = np.sum(ds_monthly_max.values >= threshn)
266+
267+
return ds_thresh
268+
269+
def demean_timeseries(self):
270+
return self.dataset - self.dataset.mean(dim="t_dim")
271+
272+
def obs_operator(self, gridded, time_interp="nearest"):
273+
""" """
274+
275+
gridded = gridded.dataset
276+
ds = self.dataset
277+
278+
# Determine spatial indices
279+
print("Calculating spatial indices.", flush=True)
280+
ind_x, ind_y = general_utils.nearest_indices_2d(
281+
gridded.longitude, gridded.latitude, ds.longitude, ds.latitude, mask=gridded.landmask
282+
)
283+
284+
# Extract spatial time series
285+
print("Calculating time indices.", flush=True)
286+
extracted = gridded.isel(x_dim=ind_x, y_dim=ind_y)
287+
extracted = extracted.swap_dims({"dim_0": "id"})
288+
289+
# Compute data (takes a while..)
290+
print(" Indexing model data at tide gauge locations.. ", flush=True)
291+
extracted.load()
292+
293+
# Check interpolation distances
294+
print("Calculating interpolation distances.", flush=True)
295+
interp_dist = general_utils.calculate_haversine_distance(
296+
extracted.longitude, extracted.latitude, ds.longitude.values, ds.latitude.values
297+
)
298+
299+
# Interpolate model onto obs times
300+
print("Interpolating in time...", flush=True)
301+
extracted = extracted.rename({"time": "t_dim"})
302+
extracted = extracted.interp(t_dim=ds.time.values, method=time_interp)
303+
304+
# Put interp_dist into dataset
305+
extracted["interp_dist"] = interp_dist
306+
extracted = extracted.rename_vars({"t_dim": "time"})
307+
308+
tg_out = TidegaugeMultiple()
309+
tg_out.dataset = extracted
310+
return tg_out
311+
312+
def difference(self, other, absolute_diff=True, square_diff=True):
313+
314+
differenced = self.dataset - other.dataset
315+
diff_vars = list(differenced.keys())
316+
save_coords = list(self.dataset.coords.keys())
317+
318+
for vv in diff_vars:
319+
differenced = differenced.rename({vv: "diff_" + vv})
320+
321+
if absolute_diff:
322+
abs_tmp = uf.fabs(differenced)
323+
diff_vars = list(abs_tmp.keys())
324+
for vv in diff_vars:
325+
abs_tmp = abs_tmp.rename({vv: "abs_" + vv})
326+
else:
327+
abs_tmp = xr.Dataset()
328+
329+
if square_diff:
330+
sq_tmp = uf.square(differenced)
331+
diff_vars = list(sq_tmp.keys())
332+
for vv in diff_vars:
333+
sq_tmp = sq_tmp.rename({vv: "square_" + vv})
334+
else:
335+
sq_tmp = xr.Dataset()
336+
337+
differenced = xr.merge((differenced, abs_tmp, sq_tmp, self.dataset[save_coords]))
338+
339+
return_differenced = TidegaugeMultiple()
340+
return_differenced.dataset = differenced
341+
342+
return return_differenced

config/example_nemo_grid_t.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
"deptht_0",
1313
"tmask",
1414
"bottom_level",
15+
"mbathy",
1516
"hbatt"
1617
]
1718
},
@@ -60,6 +61,7 @@
6061
"tmask":"mask",
6162
"deptht_0": "depth_0",
6263
"bottom_level": "bottom_level",
64+
"mbathy":"bottom_level",
6365
"hbatt":"bathymetry"
6466
}
6567
},

0 commit comments

Comments
 (0)