|
3 | 3 | """
|
4 | 4 |
|
5 | 5 | import pathlib
|
| 6 | +from functools import cache |
6 | 7 |
|
7 | 8 | import numpy as np
|
| 9 | +import pandas as pd |
| 10 | + |
| 11 | +from ..plotting.maps import lat_long_grid |
8 | 12 |
|
9 | 13 |
|
10 | 14 | class IOError(Exception):
|
@@ -43,6 +47,166 @@ def read_currents(path: pathlib.Path) -> np.ndarray:
|
43 | 47 | return np.flipud(data.reshape(shape))
|
44 | 48 |
|
45 | 49 |
|
| 50 | +@cache |
| 51 | +def _read_clean_current_metadata(metadata_path: pathlib.Path) -> pd.DataFrame: |
| 52 | + """ |
| 53 | + Read the metadata file for the clean currents .dat file |
| 54 | +
|
| 55 | + :returns: metadata dataframe; model/name/year |
| 56 | + """ |
| 57 | + # First line is the number of models/runs |
| 58 | + with open(metadata_path, "r") as f: |
| 59 | + num_runs = int(f.readline().strip()) |
| 60 | + df = pd.read_csv(f, names=["model", "name", "year"], sep="\s+") |
| 61 | + |
| 62 | + if len(df) != num_runs: |
| 63 | + raise IOError( |
| 64 | + f"Metadata file {metadata_path} has {len(df)} rows, but first line says {num_runs}" |
| 65 | + ) |
| 66 | + |
| 67 | + return df |
| 68 | + |
| 69 | + |
| 70 | +def _coriolis_parameter(latitudes: np.ndarray) -> np.ndarray: |
| 71 | + """ |
| 72 | + Calculate the coriolis parameter at each latitude |
| 73 | + """ |
| 74 | + omega = 7.2921e-5 |
| 75 | + torad = np.pi / 180.0 |
| 76 | + |
| 77 | + return 2 * omega * np.sin(latitudes * torad) |
| 78 | + |
| 79 | + |
| 80 | +def current_speed_from_mdt(mdt: np.ndarray) -> np.ndarray: |
| 81 | + """ |
| 82 | + Convert geodetic MDT to currents. |
| 83 | +
|
| 84 | + By assuming geostrophic balance, we can take the gradient of the MDT to get the steady-state |
| 85 | + currents. |
| 86 | + This requires us to work out the coriolis parameter at each latitude, and to take the gradient |
| 87 | + of the MDT. |
| 88 | +
|
| 89 | + :param mdt: the mean dynamic topography, in metres, covering the globe. |
| 90 | +
|
| 91 | + :returns: the current speed in m/s |
| 92 | +
|
| 93 | + """ |
| 94 | + g = 9.80665 |
| 95 | + torad = np.pi / 180.0 |
| 96 | + R = 6_371_229.0 |
| 97 | + |
| 98 | + # Find the grid spacing (in m) |
| 99 | + lats, longs = lat_long_grid(mdt.shape) |
| 100 | + dlat = np.abs(lats[1] - lats[0]) * torad * R |
| 101 | + dlong = ( |
| 102 | + np.abs(longs[1] - longs[0]) * torad * R * np.cos(torad * lats)[:, np.newaxis] |
| 103 | + ) |
| 104 | + |
| 105 | + # Find the coriolis parameter at each latitude |
| 106 | + f = _coriolis_parameter(lats) |
| 107 | + |
| 108 | + # Velocities are gradients * coriolis param for geostrophic balance |
| 109 | + dmdt_dlat = np.gradient(mdt, axis=0) / dlat |
| 110 | + dmdt_dlon = np.gradient(mdt, axis=1) / dlong |
| 111 | + |
| 112 | + # u should be negative, but it doesnt matter for speed |
| 113 | + u = g / f[:, np.newaxis] * dmdt_dlat |
| 114 | + v = g / f[:, np.newaxis] * dmdt_dlon |
| 115 | + |
| 116 | + return np.sqrt(u**2 + v**2) |
| 117 | + |
| 118 | + |
| 119 | +def read_clean_currents( |
| 120 | + path: pathlib.Path, |
| 121 | + metadata_path: pathlib.Path, |
| 122 | + *, |
| 123 | + year: int, |
| 124 | + model: str = "ACCESS-CM2", |
| 125 | + name: str = "r1i1p1f1_gn", |
| 126 | +) -> np.ndarray: |
| 127 | + """ |
| 128 | + Read clean current data from a .dat file. |
| 129 | +
|
| 130 | + Read a .dat file containing clean current data, |
| 131 | + given the model/name/year, returning a 720x1440 numpy array giving the current |
| 132 | + in m/s. |
| 133 | + Sets land grid points to np.nan. |
| 134 | + Since the clean current data is stored in a large file containing multiple years and models, we need |
| 135 | + to choose the correct one. |
| 136 | +
|
| 137 | + Notes on the name convention from the CMIP6 documentation can be found in docs/current_denoising/generation/ioutils.md, |
| 138 | + or in the original at https://docs.google.com/document/d/1h0r8RZr_f3-8egBMMh7aqLwy3snpD6_MrDz1q8n5XUk. |
| 139 | +
|
| 140 | + :param path: location of the .dat file; clean current data is located in |
| 141 | + data/projects/SING/richard_stuff/Table2/clean_currents/ on the RDSF |
| 142 | + :param metadata_path: location of the metadata .csv file describing the contents of the .dat file |
| 143 | + :param year: start of the 5-year period for which to extract data |
| 144 | + :param model: the climate model to use |
| 145 | + :param name: the model variant to use. Name follows the convention {realisation/initialisation/physics/forcing}_grid |
| 146 | +
|
| 147 | + :returns: a numpy array holding current speeds |
| 148 | + :raises ValueError: if the requested year/model/name is not found in the metadata |
| 149 | + :raises IOError: if the file is malformed, or has a different length to expected from the metadata |
| 150 | +
|
| 151 | + """ |
| 152 | + metadata = _read_clean_current_metadata(metadata_path) |
| 153 | + |
| 154 | + # The dat file contains a header (record length), then the record, then a footer (record length) |
| 155 | + # We want to find the number of bytes to skip to get to the correct record, which |
| 156 | + # corresponds to the row number in the metadata file |
| 157 | + |
| 158 | + # Find the row in the metadata file |
| 159 | + row = metadata[ |
| 160 | + (metadata["year"] == year) |
| 161 | + & (metadata["model"] == model) |
| 162 | + & (metadata["name"] == name) |
| 163 | + ] |
| 164 | + if len(row) == 0: |
| 165 | + raise ValueError( |
| 166 | + f"Could not find entry for {model=}, {name=}, {year=} in metadata" |
| 167 | + ) |
| 168 | + if len(row) > 1: |
| 169 | + raise ValueError( |
| 170 | + f"Found multiple entries for {model=}, {name=}, {year=} in metadata" |
| 171 | + ) |
| 172 | + |
| 173 | + # This tells us how many records to skip |
| 174 | + row_index = row.index[0] |
| 175 | + |
| 176 | + with open(path, "rb") as f: |
| 177 | + n_bytes_per_record = np.fromfile(f, dtype=np.int32, count=1)[0] |
| 178 | + |
| 179 | + # Add the header + footer |
| 180 | + n_bytes_per_record += 8 |
| 181 | + |
| 182 | + # Check the file is the right size, based on the metadata |
| 183 | + expected_size = int(n_bytes_per_record) * len(metadata) |
| 184 | + f.seek(0, 2) # Seek to end of file |
| 185 | + actual_size = f.tell() |
| 186 | + if actual_size != expected_size: |
| 187 | + raise IOError( |
| 188 | + f"File size {actual_size} does not match expected {expected_size} from metadata" |
| 189 | + ) |
| 190 | + |
| 191 | + offset = row_index * n_bytes_per_record |
| 192 | + |
| 193 | + f.seek(offset) |
| 194 | + header = np.fromfile(f, dtype=np.int32, count=1)[0] |
| 195 | + if header + 8 != n_bytes_per_record: |
| 196 | + raise IOError( |
| 197 | + f"Record length marker {header} does not match expected {n_bytes_per_record - 8}" |
| 198 | + ) |
| 199 | + |
| 200 | + retval = np.fromfile(f, dtype="<f4", count=header // 4) |
| 201 | + |
| 202 | + retval[retval == -1.9e19] = np.nan |
| 203 | + |
| 204 | + # Make it look right |
| 205 | + retval = np.flipud(retval.reshape((720, 1440))) |
| 206 | + |
| 207 | + return current_speed_from_mdt(retval) |
| 208 | + |
| 209 | + |
46 | 210 | def _included_indices(
|
47 | 211 | n_rows: int, tile_size: int, max_latitude: float
|
48 | 212 | ) -> tuple[int, int]:
|
|
0 commit comments