|
| 1 | +""" |
| 2 | +Examples of leveraging xarray in combination with trajan to analyze OMB data |
| 3 | +============================================================================== |
| 4 | +""" |
| 5 | + |
| 6 | +# %% |
| 7 | + |
| 8 | +import matplotlib.pyplot as plt |
| 9 | +from pathlib import Path |
| 10 | +from trajan.readers.omb import read_omb_csv |
| 11 | +from trajan.plot.spectra import plot_trajan_spectra |
| 12 | +import coloredlogs |
| 13 | +import datetime |
| 14 | + |
| 15 | +# %% |
| 16 | + |
| 17 | +# a few helper functions |
| 18 | + |
| 19 | + |
| 20 | +def print_head(filename, nlines=3): |
| 21 | + """Print the first nlines of filename""" |
| 22 | + print("----------") |
| 23 | + print(f"head,{nlines} of {filename}") |
| 24 | + print("") |
| 25 | + with open(filename) as input_file: |
| 26 | + head = [next(input_file) for _ in range(nlines)] |
| 27 | + for line in head: |
| 28 | + print(line, end="") |
| 29 | + print("----------") |
| 30 | + |
| 31 | + |
| 32 | +# %% |
| 33 | + |
| 34 | +# adjust the level of information printed |
| 35 | + |
| 36 | +coloredlogs.install(level='error') |
| 37 | +# coloredlogs.install(level='debug') |
| 38 | + |
| 39 | +# %% |
| 40 | + |
| 41 | +# We start by preparing some example data to work with |
| 42 | + |
| 43 | +# generate a trajan dataset |
| 44 | +path_to_test_data = Path.cwd().parent / "tests" / "test_data" / "csv" / "omb3.csv" |
| 45 | +xr_buoys = read_omb_csv(path_to_test_data) |
| 46 | + |
| 47 | +# %% |
| 48 | + |
| 49 | +# list all the buoys in the dataset |
| 50 | + |
| 51 | +list_buoys = list(xr_buoys.trajectory.data) |
| 52 | +print(f"{list_buoys = }") |
| 53 | + |
| 54 | +# %% |
| 55 | + |
| 56 | +# some users prefer to receive CSV files to perform their analysis: dump all trajectories as CSV |
| 57 | + |
| 58 | +# all positions to CSV, 1 CSV per buoy |
| 59 | +for crrt_buoy in list_buoys: |
| 60 | + crrt_xr = xr_buoys.sel(trajectory=crrt_buoy) |
| 61 | + crrt_xr_gps = crrt_xr.swap_dims({'obs': 'time'})[["lat", "lon"]] |
| 62 | + crrt_xr_gps = crrt_xr_gps.dropna(dim='time') |
| 63 | + crrt_xr_gps.to_dataframe().to_csv(f"{crrt_buoy}_gps.csv") |
| 64 | + |
| 65 | +print_head("drifter_1_gps.csv") |
| 66 | + |
| 67 | +# %% |
| 68 | + |
| 69 | +# similarly, some users prefer to receive CSV files for the wave information to perform their analysis |
| 70 | + |
| 71 | +# scalar data are easy to dump: the following creates files with |
| 72 | +# all wave statistics to CSV, 1 file per buoy |
| 73 | +for crrt_buoy in list_buoys: |
| 74 | + crrt_xr = xr_buoys.sel(trajectory=crrt_buoy) |
| 75 | + crrt_xr_wave_statistics = crrt_xr.swap_dims({"obs_waves_imu": "time_waves_imu"})[["pcutoff", "pHs0", "pT02", "pT24", "Hs0", "T02", "T24"]].rename({"time_waves_imu": "time"}).dropna(dim="time") |
| 76 | + crrt_xr_wave_statistics.to_dataframe().to_csv(f"{crrt_buoy}_wavestats.csv") |
| 77 | + |
| 78 | +print_head("drifter_1_wavestats.csv") |
| 79 | + |
| 80 | +# for spectra, we need to get the frequencies first and to label things |
| 81 | +# all spectra to CSV, 1 file per buoy, 1 frequency per columns |
| 82 | +for crrt_buoy in list_buoys: |
| 83 | + crrt_xr = xr_buoys.sel(trajectory=crrt_buoy) |
| 84 | + # the how="all" is very important: since in the processed spectrum the "invalid / high noise" bins are set to NaN, we must only throw away the wave spectra for which all |
| 85 | + # bins are nan, but we must keep the spectra for which a few low frequency bins are nan. |
| 86 | + # if you want a CSV without any NaN, you can use "elevation_energy_spectrum" instead of "processed_elevation_energy_spectrum" to use the spectra with all bins, including |
| 87 | + # the bins that are dominated by low frequency noise |
| 88 | + crrt_xr_wave_spectra = crrt_xr.swap_dims({"obs_waves_imu": "time_waves_imu"})[["processed_elevation_energy_spectrum"]].rename({"time_waves_imu": "time"}).dropna(dim="time", how="all") |
| 89 | + |
| 90 | + list_frequencies = list(crrt_xr_wave_spectra.frequencies_waves_imu.data) |
| 91 | + |
| 92 | + for crrt_ind, crrt_freq in enumerate(list_frequencies): |
| 93 | + crrt_xr_wave_spectra[f"f={crrt_freq}"] = (['time'], crrt_xr_wave_spectra["processed_elevation_energy_spectrum"][:, crrt_ind].data) |
| 94 | + |
| 95 | + crrt_xr_wave_spectra = crrt_xr_wave_spectra.drop_vars("processed_elevation_energy_spectrum").drop_dims("frequencies_waves_imu") |
| 96 | + crrt_xr_wave_spectra.to_dataframe().to_csv(f"{crrt_buoy}_wavespectra.csv", na_rep="NaN") |
| 97 | + |
| 98 | +print_head("drifter_1_wavespectra.csv") |
| 99 | + |
| 100 | +# %% |
| 101 | + |
| 102 | +# it is easy to look into one single trajectory and select a time slice: |
| 103 | +# (note that, for trajectories coming from buoys that have "real world", buoy dependent |
| 104 | +# sample time, one needs to select only 1 trajectory at a time, since the time axis |
| 105 | +# has to be unique, and it will be different from buoy to buoy) |
| 106 | + |
| 107 | +xr_specific_buoy = xr_buoys.sel(trajectory="drifter_1") |
| 108 | + |
| 109 | +# make a restricted dataset about only the GPS data for a specific buoy, making GPS time the new dim |
| 110 | +# this works because we now have a single buoy so there is only 1 "time" left |
| 111 | +xr_specific_buoy_gps = xr_specific_buoy.swap_dims({'obs': 'time'})[["lat", "lon"]] |
| 112 | + |
| 113 | +# keep only the valid GPS points: avoid the NaT that are due to time alignment in the initial nc file |
| 114 | +xr_specific_buoy_gps = xr_specific_buoy_gps.dropna(dim='time') |
| 115 | + |
| 116 | +# select a specific time interval |
| 117 | +xr_specific_buoy_time_gps = xr_specific_buoy_gps.sel(time=slice("2022-06-17T10", "2022-06-18T01")) |
| 118 | + |
| 119 | +# plot |
| 120 | +xr_specific_buoy_time_gps.traj.plot() |
| 121 | +plt.show() |
| 122 | + |
| 123 | +# %% |
| 124 | + |
| 125 | +# similarly, it is easy to plot wave statistics over a given time slice: |
| 126 | + |
| 127 | +xr_specific_buoy = xr_buoys.sel(trajectory="drifter_1") |
| 128 | + |
| 129 | +# select the wave data for a specific buoy and make time_waves_imu the new time dim |
| 130 | +# this works because we have only 1 buoy left, so only 1 time_waves_imu |
| 131 | +# the how="all" is necessary to not throw away the records for which some few bins |
| 132 | +# have been marked as nan due to the low frequency noise |
| 133 | +xr_specific_buoy_waves = xr_specific_buoy.swap_dims({"obs_waves_imu": "time_waves_imu"})[["accel_energy_spectrum", "elevation_energy_spectrum", "processed_elevation_energy_spectrum", "pcutoff", "pHs0", "pT02", "pT24", "Hs0", "T02", "T24"]].rename({"time_waves_imu": "time"}).dropna(dim="time", how="all") |
| 134 | + |
| 135 | +# plot, for example, swh related quantities; naturally, could use any other fields |
| 136 | +# (except the spectra themselves that are array data) |
| 137 | + |
| 138 | +fix, ax = plt.subplots() |
| 139 | + |
| 140 | +xr_specific_buoy_waves_specific_time = xr_specific_buoy_waves.sel(time=slice("2022-06-17T10", "2022-06-18T01")) |
| 141 | +for crrt_field in ["Hs0", "pHs0"]: |
| 142 | + xr_specific_buoy_waves_specific_time[crrt_field].plot.line(ax=ax, label=crrt_field) |
| 143 | + |
| 144 | +plt.legend() |
| 145 | +plt.ylabel("significant wave height [m]") |
| 146 | +plt.show() |
| 147 | + |
| 148 | +# %% |
| 149 | + |
| 150 | +# to plot one or more 1D wave spectra, we have dedicated tooling: |
| 151 | + |
| 152 | +# plot the spectra for just a few of the buoys; you could also not sel to plot the whole dataset |
| 153 | +xr_several_buoys = xr_buoys.sel(trajectory=["drifter_1", "drifter_2"]) |
| 154 | + |
| 155 | +# tuning of start and end dates |
| 156 | +date_start = datetime.datetime(2022, 6, 16, 0, 0, 0) |
| 157 | +date_end = datetime.datetime(2022, 6, 18, 0, 0, 0) |
| 158 | + |
| 159 | +plot_trajan_spectra(xr_several_buoys, tuple_date_start_end=(date_start, date_end)) |
| 160 | + |
| 161 | +# %% |
0 commit comments