From fdef8e9a8548dd73064ffe86ca03c5425771ecc9 Mon Sep 17 00:00:00 2001
From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com>
Date: Mon, 9 Jun 2025 11:39:45 -0400
Subject: [PATCH 01/29] adopt new test data management system, name helper
 function yangtze, reformatting, ignore RST210

---
 .flake8                             |   1 +
 src/ravenpy/testing/__init__.py     |   3 +
 src/ravenpy/testing/helpers.py      |   0
 src/ravenpy/testing/utils.py        | 527 ++++++++++++++++++++++++++++
 src/ravenpy/utilities/publishing.py |  97 +++++
 src/ravenpy/utilities/testdata.py   | 353 -------------------
 6 files changed, 628 insertions(+), 353 deletions(-)
 create mode 100644 src/ravenpy/testing/__init__.py
 create mode 100644 src/ravenpy/testing/helpers.py
 create mode 100644 src/ravenpy/testing/utils.py
 create mode 100644 src/ravenpy/utilities/publishing.py
 delete mode 100644 src/ravenpy/utilities/testdata.py

diff --git a/.flake8 b/.flake8
index 33e184ea..003ab329 100644
--- a/.flake8
+++ b/.flake8
@@ -17,6 +17,7 @@ ignore =
 	D,
 	E,
 	F,
+	RST210,
 	W503
 per-file-ignores =
 rst-roles =
diff --git a/src/ravenpy/testing/__init__.py b/src/ravenpy/testing/__init__.py
new file mode 100644
index 00000000..35c5b34d
--- /dev/null
+++ b/src/ravenpy/testing/__init__.py
@@ -0,0 +1,3 @@
+"""RavenPy Testing Utilities"""
+
+from ravenpy.testing.utils import *
diff --git a/src/ravenpy/testing/helpers.py b/src/ravenpy/testing/helpers.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/ravenpy/testing/utils.py b/src/ravenpy/testing/utils.py
new file mode 100644
index 00000000..2dc389a2
--- /dev/null
+++ b/src/ravenpy/testing/utils.py
@@ -0,0 +1,527 @@
+"""Tools for searching for and acquiring test data."""
+
+from __future__ import annotations
+
+import importlib.metadata as ilm
+import importlib.resources as ilr
+import logging
+import os
+import re
+import time
+import warnings
+from collections.abc import Callable
+from datetime import datetime as dt
+from functools import wraps
+from io import StringIO
+from pathlib import Path
+from shutil import copytree
+from typing import IO, Any, TextIO
+from urllib.error import HTTPError, URLError
+from urllib.parse import urljoin, urlparse
+from urllib.request import urlretrieve
+
+from filelock import FileLock
+from packaging.version import Version
+from xarray import Dataset
+from xarray import open_dataset as _open_dataset
+from xclim.testing.utils import show_versions as _show_versions
+
+import ravenpy
+
+try:
+    import pooch
+except ImportError:
+    warnings.warn(
+        "The `pooch` library is not installed. The default cache directory for testing data will not be set."
+    )
+    pooch = None
+
+LOGGER = logging.getLogger("ravenpy.testing.utils")
+
+__all__ = [
+    "TESTDATA_BRANCH",
+    "TESTDATA_CACHE_DIR",
+    "TESTDATA_REPO_URL",
+    "audit_url",
+    "default_testdata_cache",
+    "default_testdata_repo_url",
+    "default_testdata_version",
+    "gather_testing_data",
+    "open_dataset",
+    "populate_testing_data",
+    "show_versions",
+    "testing_setup_warnings",
+    "yangtze",
+]
+
+default_testdata_version = "v2025.5.16"
+"""Default version of the testing data to use when fetching datasets."""
+
+default_testdata_repo_url = (
+    "https://raw.githubusercontent.com/Ouranosinc/raven-testdata/"
+)
+"""Default URL of the testing data repository to use when fetching datasets."""
+
+try:
+    default_testdata_cache = Path(pooch.os_cache("raven-testdata"))
+    """Default location for the testing data cache."""
+except (AttributeError, TypeError):
+    default_testdata_cache = None
+
+TESTDATA_REPO_URL = str(os.getenv("RAVEN_TESTDATA_REPO_URL", default_testdata_repo_url))
+"""
+Sets the URL of the testing data repository to use when fetching datasets.
+
+Notes
+-----
+When running tests locally, this can be set for both `pytest` and `tox` by exporting the variable:
+
+.. code-block:: console
+
+    $ export RAVEN_TESTDATA_REPO_URL="https://github.com/my_username/raven-testdata"
+
+or setting the variable at runtime:
+
+.. code-block:: console
+
+    $ env RAVEN_TESTDATA_REPO_URL="https://github.com/my_username/raven-testdata" pytest
+"""
+
+TESTDATA_BRANCH = str(os.getenv("RAVEN_TESTDATA_BRANCH", default_testdata_version))
+"""
+Sets the branch of the testing data repository to use when fetching datasets.
+
+Notes
+-----
+When running tests locally, this can be set for both `pytest` and `tox` by exporting the variable:
+
+.. code-block:: console
+
+    $ export RAVEN_TESTDATA_BRANCH="my_testing_branch"
+
+or setting the variable at runtime:
+
+.. code-block:: console
+
+    $ env RAVEN_TESTDATA_BRANCH="my_testing_branch" pytest
+"""
+
+TESTDATA_CACHE_DIR = os.getenv("RAVEN_TESTDATA_CACHE_DIR", default_testdata_cache)
+"""
+Sets the directory to store the testing datasets.
+
+If not set, the default location will be used (based on ``platformdirs``, see :func:`pooch.os_cache`).
+
+Notes
+-----
+When running tests locally, this can be set for both `pytest` and `tox` by exporting the variable:
+
+.. code-block:: console
+
+    $ export RAVEN_TESTDATA_CACHE_DIR="/path/to/my/data"
+
+or setting the variable at runtime:
+
+.. code-block:: console
+
+    $ env RAVEN_TESTDATA_CACHE_DIR="/path/to/my/data" pytest
+"""
+
+
+def show_versions(
+    file: os.PathLike | StringIO | TextIO | None = None,
+    deps: list | None = None,
+) -> str | None:
+    """
+    Print the versions of RavenPy and its dependencies.
+
+    Parameters
+    ----------
+    file : {os.PathLike, StringIO, TextIO}, optional
+        If provided, prints to the given file-like object. Otherwise, returns a string.
+    deps : list, optional
+        A list of dependencies to gather and print version information from. Otherwise, print `RavenPy` dependencies.
+
+    Returns
+    -------
+    str or None
+        The formatted version information if `file` is not provided, otherwise None.
+    """
+
+    def _get_ravenpy_dependencies():
+        ravenpy_metadata = ilm.metadata("ravenpy")
+        requires = ravenpy_metadata.get_all("Requires-Dist")
+        requires = [
+            req.split("[")[0]
+            .split(";")[0]
+            .split(">")[0]
+            .split("<")[0]
+            .split("=")[0]
+            .split("!")[0]
+            .strip()
+            for req in requires
+        ]
+        sorted_deps = sorted(list(set(requires) - {"ravenpy"}))
+
+        return ["ravenpy"] + sorted_deps
+
+    if deps is None:
+        deps = _get_ravenpy_dependencies()
+
+    return _show_versions(file=file, deps=deps)
+
+
+# Test Data Utilities ###
+
+
+def testing_setup_warnings():
+    """Warn users about potential incompatibilities between RavenPy and raven-testdata versions."""
+    if (
+        re.match(r"^\d+\.\d+\.\d+$", ravenpy.__version__)
+        and TESTDATA_BRANCH != default_testdata_version
+    ):
+        # This does not need to be emitted on GitHub Workflows and ReadTheDocs
+        if not os.getenv("CI") and not os.getenv("READTHEDOCS"):
+            warnings.warn(
+                f"`RavenPy` stable ({ravenpy.__version__}) is running tests against a non-default "
+                f"branch of the testing data. It is possible that changes to the testing data may "
+                f"be incompatible with some assertions in this version. "
+                f"Please be sure to check {TESTDATA_REPO_URL} for more information.",
+            )
+
+    if re.match(r"^v\d+\.\d+\.\d+", TESTDATA_BRANCH):
+        # Find the date of the last modification of RavenPy source files to generate a calendar version
+        install_date = dt.strptime(
+            time.ctime(Path(ravenpy.__file__).stat().st_mtime),
+            "%a %b %d %H:%M:%S %Y",
+        )
+        install_calendar_version = (
+            f"{install_date.year}.{install_date.month}.{install_date.day}"
+        )
+
+        if Version(TESTDATA_BRANCH) > Version(install_calendar_version):
+            warnings.warn(
+                f"The installation date of `RavenPy` ({install_date.ctime()}) "
+                f"predates the last release of testing data ({TESTDATA_BRANCH}). "
+                "It is very likely that the testing data is incompatible with this build of `RavenPy`.",
+            )
+
+
+def load_registry(
+    branch: str = TESTDATA_BRANCH, repo: str = TESTDATA_REPO_URL
+) -> dict[str, str]:
+    """
+    Load the registry file for the test data.
+
+    Parameters
+    ----------
+    branch : str
+        Branch of the repository to use when fetching testing datasets.
+    repo : str
+        URL of the repository to use when fetching testing datasets.
+
+    Returns
+    -------
+    dict
+        Dictionary of filenames and hashes.
+    """
+    if not repo.endswith("/"):
+        repo = f"{repo}/"
+    remote_registry = audit_url(
+        urljoin(
+            urljoin(repo, branch if branch.endswith("/") else f"{branch}/"),
+            "data/registry.txt",
+        )
+    )
+
+    if repo != default_testdata_repo_url:
+        external_repo_name = urlparse(repo).path.split("/")[-2]
+        external_branch_name = branch.split("/")[-1]
+        registry_file = Path(
+            str(
+                ilr.files("ravenpy").joinpath(
+                    f"testing/registry.{external_repo_name}.{external_branch_name}.txt"
+                )
+            )
+        )
+        urlretrieve(remote_registry, registry_file)  # noqa: S310
+
+    elif branch != default_testdata_version:
+        custom_registry_folder = Path(
+            str(ilr.files("ravenpy").joinpath(f"testing/{branch}"))
+        )
+        custom_registry_folder.mkdir(parents=True, exist_ok=True)
+        registry_file = custom_registry_folder.joinpath("registry.txt")
+        urlretrieve(remote_registry, registry_file)  # noqa: S310
+
+    else:
+        registry_file = Path(str(ilr.files("ravenpy").joinpath("testing/registry.txt")))
+
+    if not registry_file.exists():
+        raise FileNotFoundError(f"Registry file not found: {registry_file}")
+
+    # Load the registry file
+    with registry_file.open(encoding="utf-8") as f:
+        registry = {line.split()[0]: line.split()[1] for line in f}
+    return registry
+
+
+def yangtze(
+    repo: str = TESTDATA_REPO_URL,
+    branch: str = TESTDATA_BRANCH,
+    cache_dir: str | Path = TESTDATA_CACHE_DIR,
+    allow_updates: bool = True,
+):
+    """
+    Pooch registry instance for RavenPy test data.
+
+    Parameters
+    ----------
+    repo : str
+        URL of the repository to use when fetching testing datasets.
+    branch : str
+        Branch of repository to use when fetching testing datasets.
+    cache_dir : str or Path
+        The path to the directory where the data files are stored.
+    allow_updates : bool
+        If True, allow updates to the data files. Default is True.
+
+    Returns
+    -------
+    pooch.Pooch
+        The Pooch instance for accessing the RavenPy testing data.
+
+    Notes
+    -----
+    There are three environment variables that can be used to control the behaviour of this registry:
+        - ``RAVENPY_TESTDATA_CACHE_DIR``: If this environment variable is set, it will be used as the
+          base directory to store the data files.
+          The directory should be an absolute path (i.e., it should start with ``/``).
+          Otherwise, the default location will be used (based on ``platformdirs``, see :py:func:`pooch.os_cache`).
+        - ``RAVENPY_TESTDATA_REPO_URL``: If this environment variable is set, it will be used as the URL of
+          the repository to use when fetching datasets. Otherwise, the default repository will be used.
+        - ``RAVENPY_TESTDATA_BRANCH``: If this environment variable is set, it will be used as the branch of
+          the repository to use when fetching datasets. Otherwise, the default branch will be used.
+
+    Examples
+    --------
+    Using the registry to download a file:
+
+    .. code-block:: python
+
+        import xarray as xr
+        from ravenpy.testing import yangtze
+
+        example_file = yangtze().fetch("example.nc")
+        data = xr.open_dataset(example_file)
+    """
+    if pooch is None:
+        raise ImportError(
+            "The `pooch` package is required to fetch the RavenPy testing data. "
+            "You can install it with `pip install pooch` or `pip install ravenpy[dev]`."
+        )
+    if not repo.endswith("/"):
+        repo = f"{repo}/"
+    remote = audit_url(
+        urljoin(urljoin(repo, branch if branch.endswith("/") else f"{branch}/"), "data")
+    )
+
+    _yangtze = pooch.create(
+        path=cache_dir,
+        base_url=remote,
+        version=default_testdata_version,
+        version_dev=branch,
+        allow_updates=allow_updates,
+        registry=load_registry(branch=branch, repo=repo),
+    )
+
+    # Add a custom fetch method to the Pooch instance
+    # Needed to address: https://github.com/readthedocs/readthedocs.org/issues/11763
+    # Fix inspired by @bjlittle (https://github.com/bjlittle/geovista/pull/1202)
+    _yangtze.fetch_diversion = _yangtze.fetch
+
+    # Overload the fetch method to add user-agent headers
+    @wraps(_yangtze.fetch_diversion)
+    def _fetch(
+        *args, **kwargs: bool | Callable
+    ) -> str:  # numpydoc ignore=GL08  # *args: str
+        def _downloader(
+            url: str,
+            output_file: str | IO,
+            poocher: pooch.Pooch,
+            check_only: bool | None = False,
+        ) -> None:
+            """Download the file from the URL and save it to the save_path."""
+            headers = {"User-Agent": f"RavenPy ({ravenpy.__version__})"}
+            downloader = pooch.HTTPDownloader(headers=headers)
+            return downloader(url, output_file, poocher, check_only=check_only)
+
+        # default to our http/s downloader with user-agent headers
+        kwargs.setdefault("downloader", _downloader)
+        return _yangtze.fetch_diversion(*args, **kwargs)
+
+    # Replace the fetch method with the custom fetch method
+    _yangtze.fetch = _fetch
+
+    return _yangtze
+
+
+def open_dataset(
+    name: str,
+    yangtze_kwargs: dict[str, Path | str | bool] | None = None,
+    **xr_kwargs: Any,
+) -> Dataset:
+    r"""
+    Convenience function to open a dataset from the RavenPy testing data using the `yangtze` class.
+
+    This is a thin wrapper around the `yangtze` class to make it easier to open RavenPy testing datasets.
+
+    Parameters
+    ----------
+    name : str
+        Name of the file containing the dataset.
+    yangtze_kwargs : dict
+        Keyword arguments passed to the yangtze function.
+    **xr_kwargs : Any
+        Keyword arguments passed to xarray.open_dataset.
+
+    Returns
+    -------
+    xarray.Dataset
+        The dataset.
+
+    See Also
+    --------
+    xarray.open_dataset : Open and read a dataset from a file or file-like object.
+    yangtze : Pooch wrapper for accessing the RavenPy testing data.
+    """
+    if yangtze_kwargs is None:
+        yangtze_kwargs = {}
+    return _open_dataset(yangtze(**yangtze_kwargs).fetch(name), **xr_kwargs)
+
+
+def populate_testing_data(
+    temp_folder: Path | None = None,
+    repo: str = TESTDATA_REPO_URL,
+    branch: str = TESTDATA_BRANCH,
+    local_cache: Path = TESTDATA_CACHE_DIR,
+) -> None:
+    """
+    Populate the local cache with the testing data.
+
+    Parameters
+    ----------
+    temp_folder : Path, optional
+        Path to a temporary folder to use as the local cache. If not provided, the default location will be used.
+    repo : str, optional
+        URL of the repository to use when fetching testing datasets.
+    branch : str, optional
+        Branch of ravenpy-testdata to use when fetching testing datasets.
+    local_cache : Path
+        The path to the local cache. Defaults to the location set by the platformdirs library.
+        The testing data will be downloaded to this local cache.
+    """
+    # Create the Pooch instance
+    n = yangtze(repo=repo, branch=branch, cache_dir=temp_folder or local_cache)
+
+    # Download the files
+    errored_files = []
+    for file in load_registry():
+        try:
+            n.fetch(file)
+        except HTTPError:  # noqa: PERF203
+            msg = f"File `{file}` not accessible in remote repository."
+            logging.error(msg)
+            errored_files.append(file)
+        else:
+            logging.info("Files were downloaded successfully.")
+
+    if errored_files:
+        logging.error(
+            "The following files were unable to be downloaded: %s",
+            errored_files,
+        )
+
+
+def gather_testing_data(
+    worker_cache_dir: str | os.PathLike[str] | Path,
+    worker_id: str,
+    _cache_dir: str | os.PathLike[str] | None = TESTDATA_CACHE_DIR,
+) -> None:
+    """
+    Gather testing data across workers.
+
+    Parameters
+    ----------
+    worker_cache_dir : str or Path
+        The directory to store the testing data.
+    worker_id : str
+        The worker ID.
+    _cache_dir : str or Path, optional
+        The directory to store the testing data. Default is None.
+
+    Raises
+    ------
+    ValueError
+        If the cache directory is not set.
+    FileNotFoundError
+        If the testing data is not found.
+    """
+    if _cache_dir is None:
+        raise ValueError(
+            "The cache directory must be set. "
+            "Please set the `cache_dir` parameter or the `RAVENPY_DATA_DIR` environment variable."
+        )
+    cache_dir = Path(_cache_dir)
+
+    if worker_id == "master":
+        populate_testing_data(branch=TESTDATA_BRANCH)
+    else:
+        cache_dir.mkdir(exist_ok=True, parents=True)
+        lockfile = cache_dir.joinpath(".lock")
+        test_data_being_written = FileLock(lockfile)
+        with test_data_being_written:
+            # This flag prevents multiple calls from re-attempting to download testing data in the same pytest run
+            populate_testing_data(branch=TESTDATA_BRANCH)
+            cache_dir.joinpath(".data_written").touch()
+        with test_data_being_written.acquire():
+            if lockfile.exists():
+                lockfile.unlink()
+        copytree(cache_dir.joinpath(default_testdata_version), worker_cache_dir)
+
+
+# Testing Utilities ###
+
+
+def audit_url(url: str, context: str | None = None) -> str:
+    """
+    Check if the URL is well-formed.
+
+    Parameters
+    ----------
+    url : str
+        The URL to check.
+    context : str, optional
+        Additional context to include in the error message. Default is None.
+
+    Returns
+    -------
+    str
+        The URL if it is well-formed.
+
+    Raises
+    ------
+    URLError
+        If the URL is not well-formed.
+    """
+    msg = ""
+    result = urlparse(url)
+    if result.scheme == "http":
+        msg = f"{context if context else ''} URL is not using secure HTTP: '{url}'".strip()
+    if not all([result.scheme, result.netloc]):
+        msg = f"{context if context else ''} URL is not well-formed: '{url}'".strip()
+
+    if msg:
+        LOGGER.error(msg)
+        raise URLError(msg)
+    return url
diff --git a/src/ravenpy/utilities/publishing.py b/src/ravenpy/utilities/publishing.py
new file mode 100644
index 00000000..81c9e491
--- /dev/null
+++ b/src/ravenpy/utilities/publishing.py
@@ -0,0 +1,97 @@
+"""Publishing utilities for RavenPy."""
+
+from __future__ import annotations
+
+import os
+import re
+from io import StringIO
+from pathlib import Path
+from typing import TextIO
+
+
+def publish_release_notes(
+    style: str = "md",
+    file: os.PathLike[str] | StringIO | TextIO | None = None,
+    changes: str | os.PathLike[str] | None = None,
+) -> str | None:
+    """
+    Format release notes in Markdown or ReStructuredText.
+
+    Parameters
+    ----------
+    style : {"rst", "md"}
+        Use ReStructuredText formatting or Markdown. Default: Markdown.
+    file : {os.PathLike, StringIO, TextIO}, optional
+        If provided, prints to the given file-like object. Otherwise, returns a string.
+    changes : str or os.PathLike[str], optional
+        If provided, manually points to the file where the changelog can be found.
+        Assumes a relative path otherwise.
+
+    Returns
+    -------
+    str, optional
+        If `file` not provided, the formatted release notes.
+
+    Notes
+    -----
+    This function is used solely for development and packaging purposes.
+    """
+    if isinstance(changes, str | Path):
+        changes_file = Path(changes).absolute()
+    else:
+        changes_file = Path(__file__).absolute().parents[3].joinpath("CHANGELOG.rst")
+
+    if not changes_file.exists():
+        raise FileNotFoundError("Changelog file not found in RavenPy folder tree.")
+
+    with Path(changes_file).open(encoding="utf-8") as hf:
+        changes = hf.read()
+
+    if style == "rst":
+        hyperlink_replacements = {
+            r":issue:`([0-9]+)`": r"`GH/\1 <https://github.com/CSHS-CWRA/RavenPy/issues/\1>`_",
+            r":pull:`([0-9]+)`": r"`PR/\1 <https://github.com/CSHS-CWRA/RavenPy/pull/\>`_",
+            r":user:`([a-zA-Z0-9_.-]+)`": r"`@\1 <https://github.com/\1>`_",
+        }
+    elif style == "md":
+        hyperlink_replacements = {
+            r":issue:`([0-9]+)`": r"[GH/\1](https://github.com/CSHS-CWRA/RavenPy/issues/\1)",
+            r":pull:`([0-9]+)`": r"[PR/\1](https://github.com/CSHS-CWRA/RavenPy/pull/\1)",
+            r":user:`([a-zA-Z0-9_.-]+)`": r"[@\1](https://github.com/\1)",
+        }
+    else:
+        msg = f"Formatting style not supported: {style}"
+        raise NotImplementedError(msg)
+
+    for search, replacement in hyperlink_replacements.items():
+        changes = re.sub(search, replacement, changes)
+
+    if style == "md":
+        changes = changes.replace("=========\nChangelog\n=========", "# Changelog")
+
+        titles = {r"\n(.*?)\n([\-]{1,})": "-", r"\n(.*?)\n([\^]{1,})": "^"}
+        for title_expression, level in titles.items():
+            found = re.findall(title_expression, changes)
+            for grouping in found:
+                fixed_grouping = (
+                    str(grouping[0]).replace("(", r"\(").replace(")", r"\)")
+                )
+                search = rf"({fixed_grouping})\n([\{level}]{'{' + str(len(grouping[1])) + '}'})"
+                replacement = f"{'##' if level == '-' else '###'} {grouping[0]}"
+                changes = re.sub(search, replacement, changes)
+
+        link_expressions = r"[\`]{1}([\w\s]+)\s<(.+)>`\_"
+        found = re.findall(link_expressions, changes)
+        for grouping in found:
+            search = rf"`{grouping[0]} <.+>`\_"
+            replacement = f"[{str(grouping[0]).strip()}]({grouping[1]})"
+            changes = re.sub(search, replacement, changes)
+
+    if not file:
+        return changes
+    if isinstance(file, Path | os.PathLike):
+        with Path(file).open("w", encoding="utf-8") as f:
+            print(changes, file=f)
+    else:
+        print(changes, file=file)
+    return None
diff --git a/src/ravenpy/utilities/testdata.py b/src/ravenpy/utilities/testdata.py
deleted file mode 100644
index e3533719..00000000
--- a/src/ravenpy/utilities/testdata.py
+++ /dev/null
@@ -1,353 +0,0 @@
-"""Tools for searching for and acquiring test data."""
-
-import hashlib
-import json
-import logging
-import re
-import urllib
-import warnings
-from collections.abc import Sequence
-from pathlib import Path
-from shutil import copy
-from typing import Optional, Union
-from urllib.error import HTTPError, URLError
-from urllib.parse import urljoin
-from urllib.request import urlretrieve
-
-from platformdirs import user_cache_dir
-from xarray import Dataset
-from xarray import open_dataset as _open_dataset
-
-_default_cache_dir = user_cache_dir("raven_testing_data")
-
-LOGGER = logging.getLogger("RAVEN")
-
-__all__ = [
-    "get_file",
-    "get_local_testdata",
-    "open_dataset",
-    "query_folder",
-]
-
-
-def file_md5_checksum(fname):
-    hash_md5 = hashlib.md5()  # noqa: S324
-    with Path(fname).open("rb") as f:
-        hash_md5.update(f.read())
-    return hash_md5.hexdigest()
-
-
-def get_local_testdata(
-    patterns: Union[str, Sequence[str]],
-    temp_folder: Union[str, Path],
-    branch: str = "master",
-    _local_cache: Union[str, Path] = _default_cache_dir,
-) -> Union[Path, list[Path]]:
-    """
-    Copy specific testdata from a default cache to a temporary folder.
-
-    Return files matching `pattern` in the default cache dir and move to a local temp folder.
-
-    Parameters
-    ----------
-    patterns : str or Sequence of str
-        Glob patterns, which must include the folder.
-    temp_folder : str or Path
-        Target folder to copy files and filetree to.
-    branch : str
-        For GitHub-hosted files, the branch to download from. Default: "master".
-    _local_cache : str or Path
-        Local cache of testing data.
-
-    Returns
-    -------
-    Union[Path, List[Path]]
-    """
-    temp_paths = []
-
-    if isinstance(patterns, str):
-        patterns = [patterns]
-
-    for pattern in patterns:
-        potential_paths = [
-            path for path in Path(temp_folder).joinpath(branch).glob(pattern)
-        ]
-        if potential_paths:
-            temp_paths.extend(potential_paths)
-            continue
-
-        testdata_path = Path(_local_cache)
-        if not testdata_path.exists():
-            raise RuntimeError(f"{testdata_path} does not exists")
-        paths = [path for path in testdata_path.joinpath(branch).glob(pattern)]
-        if not paths:
-            raise FileNotFoundError(
-                f"No data found for {pattern} at {testdata_path}/{branch}."
-            )
-
-        main_folder = Path(temp_folder).joinpath(branch).joinpath(Path(pattern).parent)
-        main_folder.mkdir(exist_ok=True, parents=True)
-
-        for file in paths:
-            temp_file = main_folder.joinpath(file.name)
-            if not temp_file.exists():
-                copy(file, main_folder)
-            temp_paths.append(temp_file)
-
-    # Return item directly when singleton, for convenience
-    return temp_paths[0] if len(temp_paths) == 1 else temp_paths
-
-
-def _get(
-    fullname: Path,
-    github_url: str,
-    branch: str,
-    suffix: str,
-    cache_dir: Path,
-) -> Path:
-    cache_dir = cache_dir.absolute()
-    local_file = cache_dir / branch / fullname
-    md5_name = fullname.with_suffix(f"{suffix}.md5")
-    md5_file = cache_dir / branch / md5_name
-
-    if not github_url.lower().startswith("http"):
-        raise ValueError(f"GitHub URL not safe: '{github_url}'.")
-
-    if local_file.is_file():
-        local_md5 = file_md5_checksum(local_file)
-        try:
-            url = f"{github_url}/raw/{branch}/{md5_name.as_posix()}"
-            msg = f"Attempting to fetch remote file md5: {md5_name.as_posix()}"
-            LOGGER.debug(msg)
-            urlretrieve(url, md5_file)  # noqa: S310
-            with Path(md5_file).open() as f:
-                remote_md5 = f.read()
-            if local_md5.strip() != remote_md5.strip():
-                local_file.unlink()
-                msg = (
-                    f"MD5 checksum for {local_file.as_posix()} does not match upstream md5. "
-                    "Attempting new download."
-                )
-                warnings.warn(msg)
-        except (HTTPError, URLError):
-            msg = f"{md5_name.as_posix()} not accessible online. Unable to determine validity with upstream repo."
-            warnings.warn(msg)
-
-    if not local_file.is_file():
-        # This will always leave this directory on disk.
-        # We may want to add an option to remove it.
-        local_file.parent.mkdir(parents=True, exist_ok=True)
-
-        url = f"{github_url}/raw/{branch}/{fullname.as_posix()}"
-        msg = f"Attempting to fetch remote file: {fullname.as_posix()}"
-        LOGGER.info(msg)
-        try:
-            urlretrieve(url, local_file)  # noqa: S310
-        except HTTPError as e:
-            msg = f"{local_file.name} not found. Aborting file retrieval."
-            local_file.unlink()
-            raise FileNotFoundError(msg) from e
-
-        url = f"{github_url}/raw/{branch}/{md5_name.as_posix()}"
-        msg = f"Attempting to fetch remote file md5: {md5_name.as_posix()}"
-        LOGGER.info(msg)
-        try:
-            urlretrieve(url, md5_file)  # noqa: S310
-        except HTTPError as e:
-            msg = f"{md5_name.as_posix()} not found. Aborting file retrieval."
-            local_file.unlink()
-            raise FileNotFoundError(msg) from e
-
-        local_md5 = file_md5_checksum(local_file)
-        try:
-            with Path(md5_file).open() as f:
-                remote_md5 = f.read()
-            if local_md5.strip() != remote_md5.strip():
-                local_file.unlink()
-                msg = (
-                    f"{local_file.as_posix()} and md5 checksum do not match. "
-                    "There may be an issue with the upstream origin data."
-                )
-                raise OSError(msg)
-        except OSError as e:
-            LOGGER.error(e)
-
-    return local_file
-
-
-# idea copied from xclim that borrowed it from xarray that was borrowed from Seaborn
-def get_file(
-    name: Union[str, Path, Sequence[Union[str, Path]]],
-    github_url: str = "https://github.com/Ouranosinc/raven-testdata",
-    branch: str = "master",
-    cache_dir: Union[str, Path] = _default_cache_dir,
-) -> Union[Path, list[Path]]:
-    """
-    Return a file from an online GitHub-like repository.
-
-    If a local copy is found then always use that to avoid network traffic.
-
-    Parameters
-    ----------
-    name : str or Path or Sequence of str or Path
-        Name of the file or list/tuple of names of files containing the dataset(s) including suffixes.
-    github_url : str
-        URL to GitHub repository where the data is stored.
-    branch : str
-        For GitHub-hosted files, the branch to download from. Default: "master".
-    cache_dir : str or Path
-        The directory in which to search for and write cached data.
-
-    Returns
-    -------
-    Path or list of Path
-    """
-    if isinstance(name, (str, Path)):
-        name = [name]
-
-    cache_dir = Path(cache_dir)
-
-    files = list()
-    for n in name:
-        fullname = Path(n)
-        suffix = fullname.suffix
-        files.append(
-            _get(
-                fullname=fullname,
-                github_url=github_url,
-                branch=branch,
-                suffix=suffix,
-                cache_dir=cache_dir,
-            )
-        )
-    if len(files) == 1:
-        return files[0]
-    return files
-
-
-# Credits to Anselme  https://stackoverflow.com/a/62003257/7322852 (CC-BY-SA 4.0)
-def query_folder(
-    folder: Optional[str] = None,
-    pattern: Optional[str] = None,
-    github_url: str = "https://github.com/Ouranosinc/raven-testdata",
-    branch: str = "master",
-) -> list[str]:
-    """
-    List the files available for retrieval from a remote git repository with get_file.
-
-    If provided a folder name, will perform a globbing-like filtering operation for parent folders.
-
-    Parameters
-    ----------
-    folder : str, optional
-        Relative pathname of the sub-folder from the top-level.
-    pattern : str, optional
-        Regex pattern to identify a file.
-    github_url : str
-        URL to GitHub repository where the data is stored.
-    branch : str
-        For GitHub-hosted files, the branch to download from. Default: "master".
-
-    Returns
-    -------
-    list of str
-    """
-    repo_name = github_url.strip("https://github.com/")
-
-    url = f"https://api.github.com/repos/{repo_name}/git/trees/{branch}?recursive=1"
-    with urllib.request.urlopen(url) as response:  # noqa: S310
-        res = json.loads(response.read().decode())
-
-    try:
-        md5_files = [f["path"] for f in res["tree"] if f["path"].endswith(".md5")]
-        if folder:
-            folder = "/".join("/".split(folder)) if "/" in folder else folder
-            md5_files = [f for f in md5_files if folder in Path(f).parent.as_posix()]
-        files = [re.sub(".md5$", "", f) for f in md5_files]
-
-        if pattern:
-            regex = re.compile(pattern)
-            files = [string for string in files if re.search(regex, string)]
-    except KeyError:
-        if {"message", "documentation_url"}.issubset(set(res.keys())):
-            raise ConnectionRefusedError(res["message"])
-        else:
-            raise
-
-    return files
-
-
-# idea copied from xclim that borrowed it from xarray that was borrowed from Seaborn
-def open_dataset(
-    name: str,
-    suffix: Optional[str] = None,
-    dap_url: Optional[str] = None,
-    github_url: str = "https://github.com/Ouranosinc/raven-testdata",
-    branch: str = "master",
-    cache: bool = True,
-    cache_dir: Union[str, Path] = _default_cache_dir,
-    **kwds,
-) -> Dataset:
-    r"""Open a dataset from the online GitHub-like repository.
-
-    If a local copy is found then always use that to avoid network traffic.
-
-    Parameters
-    ----------
-    name : str
-        Name of the file containing the dataset. If no suffix is given, assumed to be netCDF ('.nc' is appended).
-    suffix : str, optional
-        If no suffix is given, assumed to be netCDF ('.nc' is appended). For no suffix, set "".
-    dap_url : str, optional
-        URL to OPeNDAP folder where the data is stored. If supplied, supersedes github_url.
-    github_url : str
-        URL to GitHub repository where the data is stored.
-    branch : str, optional
-        For GitHub-hosted files, the branch to download from.
-    cache : bool
-        If True, then cache data locally for use on subsequent calls.
-    cache_dir : str or Path
-        The directory in which to search for and write cached data.
-    \*\*kwds : dict
-        For NetCDF files, keywords passed to xarray.open_dataset.
-
-    Returns
-    -------
-    xr.Dataset
-
-    See Also
-    --------
-    xarray.open_dataset
-    """
-    name = Path(name)
-    cache_dir = Path(cache_dir)
-    if suffix is None:
-        suffix = ".nc"
-    fullname = name.with_suffix(suffix)
-
-    if dap_url is not None:
-        dap_file = urljoin(dap_url, str(name))
-        try:
-            ds = _open_dataset(dap_file, **kwds)
-            return ds
-        except OSError:
-            msg = "OPeNDAP file not read. Verify that service is available."
-            LOGGER.error(msg)
-            raise
-
-    local_file = _get(
-        fullname=fullname,
-        github_url=github_url,
-        branch=branch,
-        suffix=suffix,
-        cache_dir=cache_dir,
-    )
-
-    try:
-        ds = _open_dataset(local_file, **kwds)
-        if not cache:
-            ds = ds.load()
-            local_file.unlink()
-        return ds
-    except OSError:
-        raise

From 7677b9513218cd4e680d98f7ecb869ae1b53d583 Mon Sep 17 00:00:00 2001
From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com>
Date: Mon, 9 Jun 2025 14:20:01 -0400
Subject: [PATCH 02/29] add testing data registry, move common testing
 functions to testing.helpers

---
 src/ravenpy/testing/helpers.py   | 137 +++++++++++++++++++++++++++++++
 src/ravenpy/testing/registry.txt |  58 +++++++++++++
 tests/common.py                  | 130 -----------------------------
 3 files changed, 195 insertions(+), 130 deletions(-)
 create mode 100644 src/ravenpy/testing/registry.txt
 delete mode 100644 tests/common.py

diff --git a/src/ravenpy/testing/helpers.py b/src/ravenpy/testing/helpers.py
index e69de29b..cc985c61 100644
--- a/src/ravenpy/testing/helpers.py
+++ b/src/ravenpy/testing/helpers.py
@@ -0,0 +1,137 @@
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+import xarray as xr
+
+__all__ = [
+    "convert_2d",
+    "convert_3d",
+    "count_pixels",
+    "make_bnds",
+    "synthetic_gr4j_inputs",
+]
+
+
+def count_pixels(stats: dict, numeric_categories=False) -> int:
+    category_counts = 0
+    for key, val in stats.items():
+        if numeric_categories:
+            try:
+                int(key)
+            except ValueError:  # noqa: S112
+                continue
+        if key in ["count", "min", "max", "mean", "median", "sum", "nodata"]:
+            continue
+        category_counts += val
+    return category_counts
+
+
+def synthetic_gr4j_inputs(path):
+    time = pd.date_range(start="2000-07-01", end="2002-07-01", freq="D")
+
+    pr = 3 * np.ones(len(time))
+    pr = xr.DataArray(pr, coords={"time": time}, dims="time", name="pr")
+    pr.to_netcdf(Path(path).joinpath("pr.nc"))
+
+    tas = 280 + 20 * np.cos(np.arange(len(time)) * 2 * np.pi / 365.0)
+    tas = xr.DataArray(tas, coords={"time": time}, dims="time", name="tas")
+    tas.to_netcdf(Path(path).joinpath("tas.nc"))
+
+    evap = 3 + 3 * np.cos(-30 + np.arange(len(time)) * 2 * np.pi / 365.0)
+    evap = xr.DataArray(evap, coords={"time": time}, dims="time", name="evap")
+    evap.to_netcdf(Path(path).joinpath("evap.nc"))
+
+
+def make_bnds(params, delta):
+    """
+    Return low and high parameter bounds by subtracting and adding delta*params to params.
+
+    Parameters
+    ----------
+    params : sequence
+        Parameters.
+    delta : float [0,1]
+        Relative delta to subtract and add to parameters.
+
+    Returns
+    -------
+    (tuple, tuple)
+        Low and high bounds for parameters.
+    """
+    arr = np.asarray(params)
+    d = np.abs(arr * delta)
+    return tuple(arr - d), tuple(arr + d)
+
+
+def convert_2d(fn):
+    """Take the 1D Salmon time series and convert it to a 2D time series.
+
+    Example
+    -------
+    >>> fn = "./testdata/raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc"
+    >>> fn2 = "./testdata/raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily_2d.nc"
+    >>> _convert_2d(fn).to_netcdf(fn2, "w")
+    """
+    features = {
+        "name": "Salmon",
+        "area": 4250.6,
+        "elevation": 843.0,
+        "latitude": 54.4848,
+        "longitude": -123.3659,
+    }
+    ds = xr.open_dataset(fn, decode_times=False).rename({"nstations": "region"})
+
+    out = xr.Dataset(
+        coords={
+            "lon": ds.lon.expand_dims("lon").squeeze("region"),
+            "lat": ds.lat.expand_dims("lat").squeeze("region"),
+            "time": ds.time,
+        }
+    )
+
+    for v in ds.data_vars:
+        if v not in ["lon", "lat"]:
+            out[v] = ds[v].expand_dims("region", axis=1)
+
+    # Add geometry feature variables
+    for key, val in features.items():
+        out[key] = xr.DataArray(name=key, data=[val], dims="region")
+
+    return out
+
+
+def convert_3d(fn):
+    """Take the 1D Salmon time series and convert it to a 3D time series.
+
+    Example
+    -------
+    >>> fn = "./testdata/raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc"
+    >>> fn3 = "./testdata/raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily_3d.nc"
+    >>> _convert_3d(fn).to_netcdf(fn3, "w")
+    """
+    elevation = [[843.0]]
+    ds = xr.open_dataset(fn, decode_times=False)
+
+    out = xr.Dataset(
+        coords={
+            "lon": ds.lon.expand_dims("lon").squeeze("nstations"),
+            "lat": ds.lat.expand_dims("lat").squeeze("nstations"),
+            "time": ds.time,
+        }
+    )
+
+    for v in ds.data_vars:
+        if v not in ["lon", "lat", "time"]:
+            out[v] = ds[v]
+            out[v] = out[v].expand_dims(
+                ["lon", "lat"]
+            )  # Needs to be in other step to keep attributes
+
+    out["elevation"] = xr.DataArray(
+        data=elevation,
+        dims=["lon", "lat"],
+        attrs={"units": "m", "standard_name": "altitude"},
+    )
+
+    return out
diff --git a/src/ravenpy/testing/registry.txt b/src/ravenpy/testing/registry.txt
new file mode 100644
index 00000000..e7f08fe1
--- /dev/null
+++ b/src/ravenpy/testing/registry.txt
@@ -0,0 +1,58 @@
+basinmaker/drainage_region_0175_v2-1/finalcat_info_v2-1.zip sha256:a761d4a2284d728ff1c846db84d22dbb37afb3acc293f8b3110a56a99bc283c9
+caspar_eccc_hindcasts/geps_watershed.nc sha256:a7eddbdca773b0c66140fbfe042f1bb889a3cb1f368c0393d0598be58bea5208
+cec_nalcms2010_30m/cec_nalcms_subQC.tiff sha256:31143499c1368b7e7d73a5f33b9184aabb2eb869337352f0905853d39eb5675f
+clrh/mattawin/06FB002.rvh sha256:9ab58a048a358b159f681e90fbb129084232ba58a467d148b8b0e7f154222204
+clrh/mattawin/Lakes.rvh sha256:5d39d2dbaa96089b38952436583da76ba70a6ab587da5cadcb8d9034a684c7ef
+clrh/mattawin/channel_properties.rvp sha256:6b48f275e0fc5f1f323346502fb9a11f6c8869620f1ab89d9c24a6fef7d4633a
+donneesqc_mrc_poly/mrc_subset.gml sha256:89a7a47a008a04b5821d6fbd9d11a84b163b341e23798e2375bc0cf7cd29043e
+donneesqc_mrc_poly/mrc_subset.zip sha256:021bebe8abdea2ca0f4c605d6d38981d46bf4ec0c642dadf1be3aa34522cfd0d
+earthenv_dem_90m/earthenv_dem90_southernQuebec.tiff sha256:310f38da0439300f22751de434a9dbcff23d20376db19aff4e260573b526beff
+eccc_forecasts/geps_watershed.nc sha256:f63cab6543e86bdd6b96599b16cd5fecf9da2e022dd08b6351b6852fec598d21
+famine/famine_input.nc sha256:0026f20c141dc007f9041d1964e831c767a72206eb3690ce1ae328d940e1e6a4
+gr4j_cemaneige/solution.rvc sha256:99b25947c22a99ccdd5a738923ab17af100cee8d61c3e73969e07ee2472b457c
+hydro_simulations/raven-gr4j-cemaneige-sim_hmets-0_Hydrographs.nc sha256:a1763cb78ab81cae8080792cf99c52a42b0dff1f72f6b7b4862c421811599371
+matapedia/Matapedia_meteo_data_2D.nc sha256:1c4a10f7d7964e7943838c9b7725723a80742597bb0753ea53f6fd996f79c0c0
+matapedia/Matapedia_meteo_data_stations.nc sha256:20892444705b504598cadb816694626a93d46a7edac89f6068bc4e476b21e451
+matapedia/Qobs_Matapedia_01BD009.nc sha256:d94e6776954b4e4d05ce185889799b255db7e300f9e438129f53cfde6cb85b07
+nasa/Mars_MGS_MOLA_DEM_georeferenced_region_compressed.tiff sha256:3190c6799724b37c11c326db25cf3cebf57d87e0133e818e6aba18da91d43453
+nrcan/NRCAN_1971-1972_subset.nc sha256:7de9def968a7f3d804255be2b86976e47b0edc3a80e5bf4ad608c1adf2f01d40
+nrcan/NRCAN_2006-2007_subset.nc sha256:a029261f1b74cd688e0b7f70ab3a770e9e9e4f5f005b3a370707803550c1c646
+polygons/Basin_10.zip sha256:d611ec4d082fc7a26fbcfbcd843c04c155e66f2bc044896374218a19af4fc6d9
+polygons/Saskatoon.geojson sha256:5277ccb26affb3d9a7697d2b605fd0367b5621627f409ea17003b36c735e762b
+polygons/mars.geojson sha256:4eb7c1d20f43cf41d886026dd6b4601d087d352fc8988d5f43fb44402d07af14
+polygons/mars.zip sha256:504c509f4d7e6a6344e23a76831cba78491017b168f7c4b28752a3ad13d1f8cc
+polygons/mississippi.geojson sha256:089445dc9db5c93d851a2a654523871fae65996dd6123b40bfc4b0cbef32916e
+raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc sha256:68d234ae5a14051f8d0849ab204aa31c8d65dab4eaec65764306a23540cd8e9d
+raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily_2d.nc sha256:2171a33280e7d2deff582bc2f7bb903a6dd520848be0c09495ae938f7c214f61
+raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily_3d.nc sha256:fb61ca9fa69b3c2ccb56af9dd92211bed72aa386ef2aa941013ed2df238b21b7
+raven-gr4j-cemaneige/raven-gr4j-salmon.rvc sha256:465e617460e41b1ca80423a7722f080374407e5aa120be2121ab6a8f84b58bf6
+raven-gr4j-cemaneige/raven-gr4j-salmon.rvh sha256:8695bcc319ff833999e7914315e7cbc07edcc4d18e6be451ff570db23619b89b
+raven-gr4j-cemaneige/raven-gr4j-salmon.rvi sha256:1c1e087a746fab027cf11d348760422fdcb589e9fb70703fd2fb218781d21f35
+raven-gr4j-cemaneige/raven-gr4j-salmon.rvp sha256:a066e32951dd6cf248e753f2850e7146539f777f501de9f4f1a33def97ff4120
+raven-gr4j-cemaneige/raven-gr4j-salmon.rvt sha256:b0a6bf6835d8d43830f8477ca09c5542622d0db4d0f9e3eb6f663527f5606ef3
+raven-hmets/Salmon-River-Near-Prince-George_Qobs_daily.rvt sha256:ad4a92d6eb48f9501bc10bd88626135d8018392ecbeb99908e9f64ce76467d90
+raven-hmets/Salmon-River-Near-Prince-George_meteo_daily.rvt sha256:e1b47934873caa9b9ac44c8d3069e73520432b5e245124c255b3f5c52f940c17
+raven-hmets/raven-hmets-salmon.rvc sha256:8a79de8c19ff43a21688f494d1e5c6cb07227e8a83b9560c72b934db56eaa17f
+raven-hmets/raven-hmets-salmon.rvh sha256:b602fc0e523ef87888a0985f06b27c6d391ef7bfcef94f1104832b2a25b59ac4
+raven-hmets/raven-hmets-salmon.rvi sha256:f40e4db30a2569790cabc435568ee92fb528166fe95f5265988b3608de3327d2
+raven-hmets/raven-hmets-salmon.rvp sha256:4066a35d2638adab8b608a0721e41ed620f9ab9d7aa0837134b51aee560e3835
+raven-hmets/raven-hmets-salmon.rvt sha256:a32125fc1557cd4d460205a42cea0761203febf3cc7bc6ca9e4c0447292df018
+raven-mohyse/Salmon-River-Near-Prince-George_Qobs_daily.rvt sha256:ad4a92d6eb48f9501bc10bd88626135d8018392ecbeb99908e9f64ce76467d90
+raven-mohyse/Salmon-River-Near-Prince-George_meteo_daily.rvt sha256:e1b47934873caa9b9ac44c8d3069e73520432b5e245124c255b3f5c52f940c17
+raven-mohyse/raven-mohyse-salmon.rvc sha256:282a3ad02ffecb16f3f9ee5c4357a809254499039a4ab51d6b537a2ca9f2224f
+raven-mohyse/raven-mohyse-salmon.rvh sha256:80373ec8a80ee1aba9e8e5c04ea68d1f44de5fd693706dc5ebdeec6783548151
+raven-mohyse/raven-mohyse-salmon.rvi sha256:f6414972f4d170f9aa8532f5dcab45518bbd6938b2e2e952631381e05c6bcf1b
+raven-mohyse/raven-mohyse-salmon.rvp sha256:6f6440be3205407974479c9f7a36527fcb3973e074e3b8ab314a12d13befa7b1
+raven-mohyse/raven-mohyse-salmon.rvt sha256:4420c6db1f981b283fd82d8227efa8a88604ae01140afb35350b2a0fe9e3ab18
+raven-routing-sample/OTT_sub.zip sha256:f90de4ad5467dae7bcd061156c147cd09ec7d9b3182ac86cefaa1816a541db9b
+raven-routing-sample/VIC_streaminputs.nc sha256:ffea0e6b1095eabb417e048d3cc807d7964036ffda3e9cbe478dadb20908d7e9
+raven-routing-sample/VIC_streaminputs_weights.rvt sha256:2912c3604a5461dca27255bbdca5d319afd91a4534257391264a14f8c95bffb8
+raven-routing-sample/VIC_temperatures.nc sha256:d9109f115d12f15a4c311374b5770df3a1fd0f8b861bf9dc029ee90e497b9e8a
+raven-routing-sample/VIC_test_nodata.nc sha256:434cb2dbe3796ff08ff3d26189fade6d978eea91014cbae9c7df3ba6a949bfe8
+raven-routing-sample/VIC_test_nodata_weights.rvt sha256:bf430e9b9a29bb9c31bda87495e5c72822d32ec6cc6ba311b01997f32bcb4815
+raven-routing-sample/WSC02LE024.nc sha256:e76aa242b06eae78c6a9980592550abd154496b447e78a50167cfb2de0e8c41a
+raven-routing-sample/era5-test-dataset-crop.nc sha256:9e3088282022372c4737b04d3360ea88982d749d234ca24ca07743e0f6b28bde
+raven-routing-sample/finalcat_hru_info.zip sha256:dd6818455d9e967d000d4fbc3f33c5b68af9d1babe75b093e2bb29847acaf59a
+raven-routing-sample/lievre_hrus_v21.zip sha256:2dc6d6ab21f5b009e437da4324438d3cea3934ca0cfd72a0ed69a1deb3c8b6e3
+watershed_vector/Basin_test.zip sha256:c5c02f7fe8be37c22813715cdf30e03eb163113a6610d862ce280a2203895a7e
+watershed_vector/LSJ_LL.zip sha256:036d4a607a6d1f11eb4e73915be68fcad11477fecc927742f5b53d21d105bc5b
diff --git a/tests/common.py b/tests/common.py
deleted file mode 100644
index 6ab155f6..00000000
--- a/tests/common.py
+++ /dev/null
@@ -1,130 +0,0 @@
-from pathlib import Path
-
-import numpy as np
-import pandas as pd
-import xarray as xr
-
-
-def count_pixels(stats: dict, numeric_categories=False) -> int:
-    category_counts = 0
-    for key, val in stats.items():
-        if numeric_categories:
-            try:
-                int(key)
-            except ValueError:
-                continue
-        if key in ["count", "min", "max", "mean", "median", "sum", "nodata"]:
-            continue
-        category_counts += val
-    return category_counts
-
-
-def synthetic_gr4j_inputs(path):
-    time = pd.date_range(start="2000-07-01", end="2002-07-01", freq="D")
-
-    pr = 3 * np.ones(len(time))
-    pr = xr.DataArray(pr, coords={"time": time}, dims="time", name="pr")
-    pr.to_netcdf(Path(path).joinpath("pr.nc"))
-
-    tas = 280 + 20 * np.cos(np.arange(len(time)) * 2 * np.pi / 365.0)
-    tas = xr.DataArray(tas, coords={"time": time}, dims="time", name="tas")
-    tas.to_netcdf(Path(path).joinpath("tas.nc"))
-
-    evap = 3 + 3 * np.cos(-30 + np.arange(len(time)) * 2 * np.pi / 365.0)
-    evap = xr.DataArray(evap, coords={"time": time}, dims="time", name="evap")
-    evap.to_netcdf(Path(path).joinpath("evap.nc"))
-
-
-def make_bnds(params, delta):
-    """Return low and high parameter bounds by subtracting and adding delta*params to params.
-
-    Parameters
-    ----------
-    params : sequence
-      Parameters.
-    delta : float [0,1]
-      Relative delta to subtract and add to parameters.
-
-    Returns
-    -------
-    (tuple, tuple)
-      Low and high bounds for parameters.
-
-    """
-    arr = np.asarray(params)
-    d = np.abs(arr * delta)
-    return tuple(arr - d), tuple(arr + d)
-
-
-def _convert_2d(fn):
-    """Take the 1D Salmon time series and convert it to a 2D time series.
-
-    Example
-    -------
-    >>> fn = "./testdata/raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc"
-    >>> fn2 = "./testdata/raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily_2d.nc"
-    >>> _convert_2d(fn).to_netcdf(fn2, "w")
-    """
-
-    features = {
-        "name": "Salmon",
-        "area": 4250.6,
-        "elevation": 843.0,
-        "latitude": 54.4848,
-        "longitude": -123.3659,
-    }
-    ds = xr.open_dataset(fn, decode_times=False).rename({"nstations": "region"})
-
-    out = xr.Dataset(
-        coords={
-            "lon": ds.lon.expand_dims("lon").squeeze("region"),
-            "lat": ds.lat.expand_dims("lat").squeeze("region"),
-            "time": ds.time,
-        }
-    )
-
-    for v in ds.data_vars:
-        if v not in ["lon", "lat"]:
-            out[v] = ds[v].expand_dims("region", axis=1)
-
-    # Add geometry feature variables
-    for key, val in features.items():
-        out[key] = xr.DataArray(name=key, data=[val], dims="region")
-
-    return out
-
-
-def _convert_3d(fn):
-    """Take the 1D Salmon time series and convert it to a 3D time series.
-
-    Example
-    -------
-    >>> fn = "./testdata/raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc"
-    >>> fn3 = "./testdata/raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily_3d.nc"
-    >>> _convert_3d(fn).to_netcdf(fn3, "w")
-    """
-    elevation = [[843.0]]
-    ds = xr.open_dataset(fn, decode_times=False)
-
-    out = xr.Dataset(
-        coords={
-            "lon": ds.lon.expand_dims("lon").squeeze("nstations"),
-            "lat": ds.lat.expand_dims("lat").squeeze("nstations"),
-            "time": ds.time,
-        }
-    )
-
-    for v in ds.data_vars:
-        if v not in ["lon", "lat", "time"]:
-            out[v] = ds[v]
-            out[v] = out[v].expand_dims(
-                ["lon", "lat"]
-            )  # Needs to be in other step to keep attributes
-
-    out["elevation"] = xr.DataArray(
-        data=elevation,
-        dims=["lon", "lat"],
-        attrs={"units": "m", "standard_name": "altitude"},
-    )
-
-    return out

From 7cc651e24d970b7fbe88399fe3c6c463c2783fcf Mon Sep 17 00:00:00 2001
From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com>
Date: Mon, 9 Jun 2025 15:03:27 -0400
Subject: [PATCH 03/29] update notebook testdata fetching mechanism

---
 ...ct_geographical_watershed_properties.ipynb |  9 ++--
 .../03_Extracting_forcing_data.ipynb          |  5 +-
 .../04_Emulating_hydrological_models.ipynb    |  6 ++-
 .../05_Advanced_RavenPy_configuration.ipynb   | 10 ++--
 docs/notebooks/06_Raven_calibration.ipynb     |  9 ++--
 .../07_Making_and_using_hotstart_files.ipynb  |  6 ++-
 ...tting_and_bias_correcting_CMIP6_data.ipynb | 27 +++-------
 ...drological_impacts_of_climate_change.ipynb | 14 ++++--
 docs/notebooks/10_Data_assimilation.ipynb     | 18 +++----
 .../11_Climatological_ESP_forecasting.ipynb   | 10 +++-
 ...2_Performing_hindcasting_experiments.ipynb | 16 +++---
 .../Assess_probabilistic_flood_risk.ipynb     | 49 ++++++++-----------
 ...omparing_hindcasts_and_ESP_forecasts.ipynb | 15 +++---
 .../Distributed_hydrological_modelling.ipynb  | 15 ++++--
 .../Hydrological_realtime_forecasting.ipynb   | 13 +++--
 docs/notebooks/Perform_Regionalization.ipynb  |  8 +--
 .../Running_HMETS_with_CANOPEX_dataset.ipynb  | 29 +++++------
 docs/notebooks/Sensitivity_analysis.ipynb     | 10 ++--
 ...e_change_impact_study_on_a_watershed.ipynb | 10 ++--
 docs/notebooks/time_series_analysis.ipynb     |  8 ++-
 20 files changed, 151 insertions(+), 136 deletions(-)

diff --git a/docs/notebooks/02_Extract_geographical_watershed_properties.ipynb b/docs/notebooks/02_Extract_geographical_watershed_properties.ipynb
index 19c0c56a..f2363a0d 100644
--- a/docs/notebooks/02_Extract_geographical_watershed_properties.ipynb
+++ b/docs/notebooks/02_Extract_geographical_watershed_properties.ipynb
@@ -37,10 +37,10 @@
     "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
     "import rasterio\n",
-    "import rioxarray as rio\n",
     "from birdy import WPSClient\n",
     "\n",
-    "from ravenpy.utilities.testdata import get_file\n",
+    "# Utility that simplifies fetching and caching test data hosted on GitHub\n",
+    "from ravenpy.testing.utils import yangtze\n",
     "\n",
     "# This is the URL of the Geoserver that will perform the computations for us.\n",
     "url = os.environ.get(\n",
@@ -74,9 +74,8 @@
     "\"\"\"\n",
     "feature_url = \"input.geojson\"\n",
     "\"\"\"\n",
-    "# However, to keep things tidy, we have also prepared a version that can be accessed easily for\n",
-    "# demonstration purposes:\n",
-    "feature_url = get_file(\"notebook_inputs/input.geojson\")\n",
+    "# However, to keep things tidy, we have also prepared a version that can be accessed easily for demonstration purposes:\n",
+    "feature_url = yangtze().fetch(\"notebook_inputs/input.geojson\")\n",
     "df = gpd.read_file(feature_url)\n",
     "display(df)\n",
     "df.plot()"
diff --git a/docs/notebooks/03_Extracting_forcing_data.ipynb b/docs/notebooks/03_Extracting_forcing_data.ipynb
index 4e7364eb..064812ed 100644
--- a/docs/notebooks/03_Extracting_forcing_data.ipynb
+++ b/docs/notebooks/03_Extracting_forcing_data.ipynb
@@ -31,7 +31,8 @@
     "import xarray as xr\n",
     "from clisops.core import subset\n",
     "\n",
-    "from ravenpy.utilities.testdata import get_file"
+    "# Utility that simplifies fetching and caching test data hosted on GitHub\n",
+    "from ravenpy.testing.utils import yangtze"
    ]
   },
   {
@@ -63,7 +64,7 @@
     "\"\"\"\n",
     "# However, to keep things tidy, we have also prepared a version that can be accessed easily for\n",
     "# demonstration purposes:\n",
-    "basin_contour = get_file(\"notebook_inputs/input.geojson\")\n",
+    "basin_contour = yangtze().fetch(\"notebook_inputs/input.geojson\")\n",
     "\n",
     "# Also, we can specify which timeframe we want to extract. Here let's focus on a 10-year period\n",
     "reference_start_day = dt.datetime(1985, 12, 31)\n",
diff --git a/docs/notebooks/04_Emulating_hydrological_models.ipynb b/docs/notebooks/04_Emulating_hydrological_models.ipynb
index f8f1974e..b09f0fa2 100644
--- a/docs/notebooks/04_Emulating_hydrological_models.ipynb
+++ b/docs/notebooks/04_Emulating_hydrological_models.ipynb
@@ -43,7 +43,9 @@
     "from pathlib import Path\n",
     "\n",
     "from ravenpy.config import commands as rc\n",
-    "from ravenpy.utilities.testdata import get_file"
+    "\n",
+    "# Utility that simplifies fetching and caching test data hosted on GitHub\n",
+    "from ravenpy.testing.utils import yangtze"
    ]
   },
   {
@@ -148,7 +150,7 @@
     "\"\"\"\n",
     "\n",
     "# In our case, we will prefer to link to existing, pre-computed and locally stored files to keep things tidy:\n",
-    "ERA5_full = get_file(\"notebook_inputs/ERA5_weather_data.nc\")\n",
+    "ERA5_full = yangtze().fetch(\"notebook_inputs/ERA5_weather_data.nc\")\n",
     "\n",
     "\n",
     "# We need to define some configuration options that all models will need. See each line for more details on their use.\n",
diff --git a/docs/notebooks/05_Advanced_RavenPy_configuration.ipynb b/docs/notebooks/05_Advanced_RavenPy_configuration.ipynb
index c3094988..14612554 100644
--- a/docs/notebooks/05_Advanced_RavenPy_configuration.ipynb
+++ b/docs/notebooks/05_Advanced_RavenPy_configuration.ipynb
@@ -26,8 +26,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Utility that simplifies getting data hosted on the remote PAVICS-Hydro data server.\n",
-    "from ravenpy.utilities.testdata import get_file"
+    "# Utility that simplifies fetching and caching data hosted on GitHub\n",
+    "from ravenpy.testing.utils import yangtze\n",
+    "\n",
+    "yangtze = yangtze()"
    ]
   },
   {
@@ -49,7 +51,7 @@
     "# to make the calibration possible in the next notebook. Note that these configuration files also include links to the\n",
     "# required hydrometeorological database (NetCDF file).\n",
     "config = [\n",
-    "    get_file(f\"raven-gr4j-cemaneige/raven-gr4j-salmon.{ext}\")\n",
+    "    yangtze.fetch(f\"raven-gr4j-cemaneige/raven-gr4j-salmon.{ext}\")\n",
     "    for ext in [\"rvt\", \"rvc\", \"rvi\", \"rvh\", \"rvp\"]\n",
     "]\n",
     "config"
@@ -216,7 +218,7 @@
     "\n",
     "# Observed weather data for the Salmon river. We extracted this using Tutorial Notebook 03 and the\n",
     "# salmon_river.geojson file as the contour.\n",
-    "ts = get_file(\"notebook_inputs/ERA5_weather_data_Salmon.nc\")\n",
+    "ts = yangtze.fetch(\"notebook_inputs/ERA5_weather_data_Salmon.nc\")\n",
     "\n",
     "# Set alternate variable names in the timeseries data file\n",
     "alt_names = {\n",
diff --git a/docs/notebooks/06_Raven_calibration.ipynb b/docs/notebooks/06_Raven_calibration.ipynb
index 03a21b08..eb253164 100644
--- a/docs/notebooks/06_Raven_calibration.ipynb
+++ b/docs/notebooks/06_Raven_calibration.ipynb
@@ -33,6 +33,9 @@
     "\n",
     "from ravenpy.config import commands as rc\n",
     "from ravenpy.config import emulators\n",
+    "\n",
+    "# Utility that simplifies fetching and caching test data hosted on GitHub\n",
+    "from ravenpy.testing.utils import yangtze\n",
     "from ravenpy.utilities.calibration import SpotSetup"
    ]
   },
@@ -52,10 +55,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from ravenpy.utilities.testdata import get_file\n",
-    "\n",
-    "# We get the netCDF for testing on a server. You can replace the getfile method by a string containing the path to your own netCDF\n",
-    "nc_file = get_file(\n",
+    "# We get the netCDF for testing on a server. You can replace the yangtze method with a string containing the absolute or relative path to your own netCDF\n",
+    "nc_file = yangtze().fetch(\n",
     "    \"raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc\"\n",
     ")\n",
     "\n",
diff --git a/docs/notebooks/07_Making_and_using_hotstart_files.ipynb b/docs/notebooks/07_Making_and_using_hotstart_files.ipynb
index 53570186..903c7207 100644
--- a/docs/notebooks/07_Making_and_using_hotstart_files.ipynb
+++ b/docs/notebooks/07_Making_and_using_hotstart_files.ipynb
@@ -41,7 +41,9 @@
     "# Import the GR4JCN model\n",
     "from ravenpy.config import commands as rc\n",
     "from ravenpy.config import emulators\n",
-    "from ravenpy.utilities.testdata import get_file"
+    "\n",
+    "# Utility that simplifies fetching and caching test data hosted on GitHub\n",
+    "from ravenpy.testing.utils import yangtze"
    ]
   },
   {
@@ -65,7 +67,7 @@
     ")\n",
     "\n",
     "# Get dataset:\n",
-    "ERA5_full = get_file(\"notebook_inputs/ERA5_weather_data.nc\")\n",
+    "ERA5_full = yangtze().fetch(\"notebook_inputs/ERA5_weather_data.nc\")\n",
     "\n",
     "# Set alternative names for netCDF variables\n",
     "alt_names = {\n",
diff --git a/docs/notebooks/08_Getting_and_bias_correcting_CMIP6_data.ipynb b/docs/notebooks/08_Getting_and_bias_correcting_CMIP6_data.ipynb
index cca9b156..42e8f21d 100644
--- a/docs/notebooks/08_Getting_and_bias_correcting_CMIP6_data.ipynb
+++ b/docs/notebooks/08_Getting_and_bias_correcting_CMIP6_data.ipynb
@@ -19,21 +19,6 @@
     "In this tutorial, we will be using the shapefile or GeoJSON file for watershed contours as generated in previous notebooks. The file can be uploaded to your workspace here and used directly in the cells below. In this notebook, we present a quick demonstration of the bias-correction approach on a small and predetermined dataset, but you can use your own basin according to your needs."
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "import warnings\n",
-    "\n",
-    "from numba.core.errors import NumbaDeprecationWarning\n",
-    "\n",
-    "warnings.simplefilter(\"ignore\", category=NumbaDeprecationWarning)"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -44,19 +29,23 @@
    "source": [
     "import datetime as dt\n",
     "import tempfile\n",
+    "import warnings\n",
     "from pathlib import Path\n",
     "\n",
     "import gcsfs\n",
     "import intake\n",
-    "import numpy as np\n",
     "import xarray as xr\n",
     "import xclim\n",
     "import xsdba\n",
     "from clisops.core import average, subset\n",
+    "from numba.core.errors import NumbaDeprecationWarning\n",
     "\n",
-    "from ravenpy.utilities.testdata import get_file\n",
+    "# Utility that simplifies fetching and caching test data hosted on GitHub\n",
+    "from ravenpy.testing.utils import yangtze\n",
     "\n",
-    "tmp = Path(tempfile.mkdtemp())"
+    "tmp = Path(tempfile.mkdtemp())\n",
+    "\n",
+    "warnings.simplefilter(\"ignore\", category=NumbaDeprecationWarning)"
    ]
   },
   {
@@ -89,7 +78,7 @@
     "# You can replace the getfile method by a string containing the path to your own geojson.\n",
     "\n",
     "# Get basin contour.\n",
-    "basin_contour = get_file(\"notebook_inputs/input.geojson\")\n",
+    "basin_contour = yangtze().fetch(\"notebook_inputs/input.geojson\")\n",
     "\n",
     "reference_start_day = dt.datetime(1980, 12, 31)\n",
     "reference_end_day = dt.datetime(1991, 1, 1)\n",
diff --git a/docs/notebooks/09_Hydrological_impacts_of_climate_change.ipynb b/docs/notebooks/09_Hydrological_impacts_of_climate_change.ipynb
index 884c6c40..4ca5467c 100644
--- a/docs/notebooks/09_Hydrological_impacts_of_climate_change.ipynb
+++ b/docs/notebooks/09_Hydrological_impacts_of_climate_change.ipynb
@@ -33,7 +33,11 @@
     "from ravenpy import Emulator\n",
     "from ravenpy.config import commands as rc\n",
     "from ravenpy.config import emulators\n",
-    "from ravenpy.utilities.testdata import get_file\n",
+    "\n",
+    "# Utility that simplifies fetching and caching test data hosted on GitHub\n",
+    "from ravenpy.testing.utils import yangtze\n",
+    "\n",
+    "yangtze = yangtze()\n",
     "\n",
     "warnings.filterwarnings(\"ignore\")"
    ]
@@ -70,7 +74,7 @@
     "# We get the netCDF for testing on a server. You can replace the getfile method by a string containing the path\n",
     "# to your own netCDF\n",
     "\n",
-    "reference_ds = get_file(\"notebook_inputs/reference_dataset.nc\")\n",
+    "reference_nc = yangtze.fetch(\"notebook_inputs/reference_dataset.nc\")\n",
     "\n",
     "# Alternate names for the data in the climate data NetCDF files\n",
     "alt_names = {\n",
@@ -93,7 +97,7 @@
     "    params=[0.529, -3.396, 407.29, 1.072, 16.9, 0.947],\n",
     "    Gauge=[\n",
     "        rc.Gauge.from_nc(\n",
-    "            reference_ds,  # path to the reference period dataset.\n",
+    "            reference_nc,  # path to the reference period dataset.\n",
     "            data_type=data_type,\n",
     "            alt_names=alt_names,\n",
     "            data_kwds=data_kwds,\n",
@@ -134,7 +138,7 @@
     "end_date = dt.datetime(2090, 12, 31)\n",
     "\n",
     "# Get the future period dataset (path)\n",
-    "future_ds = get_file(\"notebook_inputs/future_dataset.nc\")\n",
+    "future_nc = yangtze.fetch(\"notebook_inputs/future_dataset.nc\")\n",
     "\n",
     "# Start a new model instance, again in this case a GR4JCN model emulator.\n",
     "m = emulators.GR4JCN(\n",
@@ -142,7 +146,7 @@
     "    Gauge=[\n",
     "        rc.Gauge.from_nc(\n",
     "            # name of the future period dataset.\n",
-    "            future_ds,\n",
+    "            future_nc,\n",
     "            data_type=data_type,\n",
     "            alt_names=alt_names,\n",
     "            data_kwds=data_kwds,\n",
diff --git a/docs/notebooks/10_Data_assimilation.ipynb b/docs/notebooks/10_Data_assimilation.ipynb
index 2100c42c..ea8dd42f 100644
--- a/docs/notebooks/10_Data_assimilation.ipynb
+++ b/docs/notebooks/10_Data_assimilation.ipynb
@@ -23,13 +23,7 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": [
-    "import warnings\n",
-    "\n",
-    "from numba.core.errors import NumbaDeprecationWarning\n",
-    "\n",
-    "warnings.simplefilter(\"ignore\", category=NumbaDeprecationWarning)"
-   ]
+   "source": []
   },
   {
    "cell_type": "code",
@@ -40,19 +34,25 @@
     "# Import packages\n",
     "import datetime as dt\n",
     "import tempfile\n",
+    "import warnings\n",
     "from pathlib import Path\n",
     "\n",
     "import matplotlib.pyplot as plt\n",
     "import xarray as xr\n",
+    "from numba.core.errors import NumbaDeprecationWarning\n",
     "\n",
     "from ravenpy import Emulator, EnsembleReader\n",
     "from ravenpy.config import commands as rc\n",
     "from ravenpy.config import emulators\n",
     "from ravenpy.config import options as o\n",
-    "from ravenpy.utilities.testdata import get_file\n",
+    "\n",
+    "# Utility that simplifies fetching and caching test data hosted on GitHub\n",
+    "from ravenpy.testing.utils import yangtze\n",
+    "\n",
+    "warnings.simplefilter(\"ignore\", category=NumbaDeprecationWarning)\n",
     "\n",
     "# Import hydrometeorological data\n",
-    "salmon_meteo = get_file(\n",
+    "salmon_meteo = yangtze().fetch(\n",
     "    \"raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc\"\n",
     ")\n",
     "\n",
diff --git a/docs/notebooks/11_Climatological_ESP_forecasting.ipynb b/docs/notebooks/11_Climatological_ESP_forecasting.ipynb
index b62965bd..e4a7b568 100644
--- a/docs/notebooks/11_Climatological_ESP_forecasting.ipynb
+++ b/docs/notebooks/11_Climatological_ESP_forecasting.ipynb
@@ -30,8 +30,12 @@
     "\n",
     "from ravenpy.config import commands as rc\n",
     "from ravenpy.config import emulators\n",
+    "\n",
+    "# Utility that simplifies fetching and caching test data hosted on GitHub\n",
+    "from ravenpy.testing.utils import yangtze\n",
     "from ravenpy.utilities import forecasting\n",
-    "from ravenpy.utilities.testdata import get_file"
+    "\n",
+    "yangtze = yangtze()"
    ]
   },
   {
@@ -53,7 +57,9 @@
    "source": [
     "# Get the selected watershed's time series. You can use your own time-series for your catchment by replacing\n",
     "# this line with the name / path of your input file.\n",
-    "ts = get_file(\"raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc\")\n",
+    "ts = yangtze.fetch(\n",
+    "    \"raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc\"\n",
+    ")\n",
     "\n",
     "# This is the forecast start date, on which the forecasts will be launched.\n",
     "start_date = dt.datetime(1980, 6, 1)\n",
diff --git a/docs/notebooks/12_Performing_hindcasting_experiments.ipynb b/docs/notebooks/12_Performing_hindcasting_experiments.ipynb
index 9ea83cb9..3664a183 100644
--- a/docs/notebooks/12_Performing_hindcasting_experiments.ipynb
+++ b/docs/notebooks/12_Performing_hindcasting_experiments.ipynb
@@ -20,20 +20,24 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# This entire section is cookie-cutter template to import required packages and prepare the temporary writing space.\n",
+    "# This entire section is cookiecutter template to import required packages and prepare the temporary writing space.\n",
     "import datetime as dt\n",
     "import tempfile\n",
     "from pathlib import Path\n",
     "\n",
     "import xarray as xr\n",
-    "from clisops.core import average, subset\n",
+    "from clisops.core import subset\n",
     "\n",
-    "from ravenpy import Emulator, RavenWarning\n",
+    "from ravenpy import Emulator\n",
     "from ravenpy.config import commands as rc\n",
     "from ravenpy.config.emulators import GR4JCN\n",
     "from ravenpy.extractors.forecasts import get_CASPAR_dataset\n",
+    "\n",
+    "# Utility that simplifies fetching and caching test data hosted on GitHub\n",
+    "from ravenpy.testing.utils import yangtze\n",
     "from ravenpy.utilities import forecasting\n",
-    "from ravenpy.utilities.testdata import get_file\n",
+    "\n",
+    "yangtze = yangtze()\n",
     "\n",
     "tmp = Path(tempfile.mkdtemp())"
    ]
@@ -60,7 +64,7 @@
     "ts_hindcast, _ = get_CASPAR_dataset(\"GEPS\", hdate)\n",
     "\n",
     "# Get basin contour\n",
-    "basin_contour = get_file(\"notebook_inputs/salmon_river.geojson\")\n",
+    "basin_contour = yangtze.fetch(\"notebook_inputs/salmon_river.geojson\")\n",
     "\n",
     "# Subset the data for the region of interest and take the mean to get a single vector\n",
     "with xr.set_options(keep_attrs=True):\n",
@@ -105,7 +109,7 @@
     "# )\n",
     "\n",
     "# TODO: We will use ERA5 data for Salmon River because it covers the correct period.\n",
-    "ts = get_file(\"notebook_inputs/ERA5_weather_data_Salmon.nc\")\n",
+    "ts = yangtze.fetch(\"notebook_inputs/ERA5_weather_data_Salmon.nc\")\n",
     "\n",
     "# This is the model start date, on which the simulation will be launched for a certain duration\n",
     "# to set up the initial states. We will then save the final states as a launching point for the\n",
diff --git a/docs/notebooks/Assess_probabilistic_flood_risk.ipynb b/docs/notebooks/Assess_probabilistic_flood_risk.ipynb
index 1e327c25..e0d654cf 100644
--- a/docs/notebooks/Assess_probabilistic_flood_risk.ipynb
+++ b/docs/notebooks/Assess_probabilistic_flood_risk.ipynb
@@ -16,34 +16,27 @@
    "id": "1",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "import warnings\n",
-    "\n",
-    "from numba.core.errors import NumbaDeprecationWarning\n",
-    "\n",
-    "warnings.simplefilter(\"ignore\", category=NumbaDeprecationWarning)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2",
-   "metadata": {},
-   "outputs": [],
    "source": [
     "%matplotlib inline\n",
-    "\n",
     "import datetime as dt\n",
+    "import warnings\n",
     "\n",
+    "import xarray as xr\n",
     "import xclim\n",
     "from matplotlib import pyplot as plt\n",
+    "from numba.core.errors import NumbaDeprecationWarning\n",
+    "\n",
+    "# Utility that simplifies fetching and caching test data hosted on GitHub\n",
+    "from ravenpy.testing.utils import yangtze\n",
     "\n",
-    "from ravenpy.utilities.testdata import get_file, open_dataset"
+    "yangtze = yangtze()\n",
+    "\n",
+    "warnings.simplefilter(\"ignore\", category=NumbaDeprecationWarning)"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "3",
+   "id": "2",
    "metadata": {},
    "source": [
     "Perform the time series analysis on observed data for the catchment using the frequency analysis WPS capabilities."
@@ -52,13 +45,13 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "4",
+   "id": "3",
    "metadata": {},
    "outputs": [],
    "source": [
     "# Get the data that we will be using for the demonstration.\n",
     "file = \"raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc\"\n",
-    "ts = open_dataset(file).qobs\n",
+    "ts = xr.open_dataset(yangtze.fetch(file)).qobs\n",
     "\n",
     "# Perform the frequency analysis for various return periods. We compute 2, 5, 10, 25, 50 and 100 year return\n",
     "# periods, but later on we will only compare the forecasts to the 2 year return period.\n",
@@ -71,7 +64,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "5",
+   "id": "4",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -96,7 +89,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "6",
+   "id": "5",
    "metadata": {},
    "source": [
     "## Probabilistic forecast\n",
@@ -107,7 +100,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "7",
+   "id": "6",
    "metadata": {
     "pycharm": {
      "is_executing": true
@@ -157,7 +150,7 @@
     "    params=[0.529, -3.396, 407.29, 1.072, 16.9, 0.947],\n",
     "    Gauge=[\n",
     "        rc.Gauge.from_nc(\n",
-    "            get_file(file),\n",
+    "            yangtze.fetch(file),\n",
     "            data_type=data_type,\n",
     "            alt_names=alt_names,\n",
     "            data_kwds=data_kwds,\n",
@@ -172,7 +165,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "8",
+   "id": "7",
    "metadata": {},
    "source": [
     "Now that the configuration is ready, launch the ESP forecasting tool to generate an ensemble hydrological forecast:"
@@ -181,7 +174,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "9",
+   "id": "8",
    "metadata": {
     "pycharm": {
      "is_executing": true
@@ -201,7 +194,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "10",
+   "id": "9",
    "metadata": {
     "pycharm": {
      "is_executing": true
@@ -220,7 +213,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "11",
+   "id": "10",
    "metadata": {
     "pycharm": {
      "is_executing": true
@@ -247,7 +240,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "12",
+   "id": "11",
    "metadata": {},
    "source": [
     "### Results analysis\n",
diff --git a/docs/notebooks/Comparing_hindcasts_and_ESP_forecasts.ipynb b/docs/notebooks/Comparing_hindcasts_and_ESP_forecasts.ipynb
index 4f0e235a..b74dd54b 100644
--- a/docs/notebooks/Comparing_hindcasts_and_ESP_forecasts.ipynb
+++ b/docs/notebooks/Comparing_hindcasts_and_ESP_forecasts.ipynb
@@ -20,8 +20,7 @@
    "outputs": [],
    "source": [
     "%matplotlib inline\n",
-    "# This entire section is cookie-cutter template to allow calling the servers and instantiating the connection\n",
-    "# to the WPS server. Do not modify this block.\n",
+    "\n",
     "import datetime as dt\n",
     "\n",
     "import matplotlib.pyplot as plt\n",
@@ -32,8 +31,12 @@
     "from ravenpy.config import commands as rc\n",
     "from ravenpy.config.emulators import GR4JCN\n",
     "from ravenpy.extractors.forecasts import get_CASPAR_dataset\n",
+    "\n",
+    "# Utility that simplifies fetching and caching test data hosted on GitHub\n",
+    "from ravenpy.testing.utils import yangtze\n",
     "from ravenpy.utilities import forecasting\n",
-    "from ravenpy.utilities.testdata import get_file, open_dataset"
+    "\n",
+    "yangtze = yangtze()"
    ]
   },
   {
@@ -57,7 +60,7 @@
     "\n",
     "# Define the catchment contour. Here we use the Salmon River file we previously generated using the Delineator\n",
     "# in Tutorial Notebook 01.\n",
-    "basin_contour = get_file(\"notebook_inputs/salmon_river.geojson\")\n",
+    "basin_contour = yangtze.fetch(\"notebook_inputs/salmon_river.geojson\")\n",
     "\n",
     "# Define some of the catchment properties. Could also be replaced by a call to the properties WPS as in\n",
     "# the Tutorial Notebook 02.\n",
@@ -71,7 +74,7 @@
     "\n",
     "# Observed weather data for the Salmon river. We extracted this using Tutorial Notebook 03 and the\n",
     "# salmon_river.geojson file as the contour.\n",
-    "ts = get_file(\"notebook_inputs/ERA5_weather_data_Salmon.nc\")\n",
+    "ts = yangtze.fetch(\"notebook_inputs/ERA5_weather_data_Salmon.nc\")\n",
     "\n",
     "# Set alternative names for netCDF variables\n",
     "alt_names = {\n",
@@ -117,7 +120,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "dss = open_dataset(ts)\n",
+    "dss = xr.open_dataset(ts)\n",
     "dss"
    ]
   },
diff --git a/docs/notebooks/Distributed_hydrological_modelling.ipynb b/docs/notebooks/Distributed_hydrological_modelling.ipynb
index 2517f2ab..6e968009 100644
--- a/docs/notebooks/Distributed_hydrological_modelling.ipynb
+++ b/docs/notebooks/Distributed_hydrological_modelling.ipynb
@@ -22,6 +22,7 @@
     "from pathlib import Path\n",
     "\n",
     "import matplotlib.pyplot as plt\n",
+    "import xarray as xr\n",
     "\n",
     "from ravenpy import Emulator\n",
     "from ravenpy.config import commands as rc\n",
@@ -31,7 +32,11 @@
     "    open_shapefile,\n",
     "    upstream_from_coords,\n",
     ")\n",
-    "from ravenpy.utilities.testdata import get_file, open_dataset\n",
+    "\n",
+    "# Utility that simplifies fetching and caching test data hosted on GitHub\n",
+    "from ravenpy.testing.utils import yangtze\n",
+    "\n",
+    "yangtze = yangtze()\n",
     "\n",
     "tmp_path = Path(tempfile.mkdtemp())"
    ]
@@ -50,7 +55,7 @@
    "outputs": [],
    "source": [
     "# Get path to pre-downloaded BasinMaker Routing product database for our catchment\n",
-    "shp_path = get_file(\"basinmaker/drainage_region_0175_v2-1/finalcat_info_v2-1.zip\")\n",
+    "shp_path = yangtze.fetch(\"basinmaker/drainage_region_0175_v2-1/finalcat_info_v2-1.zip\")\n",
     "\n",
     "# Note that for this to work, the coordinates must be in the small\n",
     "# BasinMaker example (drainage_region_0175)\n",
@@ -85,7 +90,7 @@
    "outputs": [],
    "source": [
     "# Streamflow observations file\n",
-    "qobs_fn = get_file(\"matapedia/Qobs_Matapedia_01BD009.nc\")\n",
+    "qobs_fn = yangtze.fetch(\"matapedia/Qobs_Matapedia_01BD009.nc\")\n",
     "\n",
     "# Make an observation gauge from the observed streamflow\n",
     "qobs = rc.ObservationData.from_nc(qobs_fn, alt_names=(\"discharge\",))"
@@ -105,7 +110,7 @@
    "outputs": [],
    "source": [
     "# Meteo observations file\n",
-    "meteo_grid_fn = get_file(\"matapedia/Matapedia_meteo_data_stations.nc\")\n",
+    "meteo_grid_fn = yangtze.fetch(\"matapedia/Matapedia_meteo_data_stations.nc\")\n",
     "\n",
     "# Alternate names for variables in the files\n",
     "alt_names = {\n",
@@ -183,7 +188,7 @@
     ")\n",
     "\n",
     "# Plot the observed streamflow\n",
-    "qobs_data = open_dataset(qobs_fn)\n",
+    "qobs_data = xr.open_dataset(qobs_fn)\n",
     "qobs_data.discharge.plot.line(x=\"time\", label=\"Observations\", color=\"red\", lw=1.5)\n",
     "\n",
     "plt.legend()"
diff --git a/docs/notebooks/Hydrological_realtime_forecasting.ipynb b/docs/notebooks/Hydrological_realtime_forecasting.ipynb
index c9e8ceb5..5da47b07 100644
--- a/docs/notebooks/Hydrological_realtime_forecasting.ipynb
+++ b/docs/notebooks/Hydrological_realtime_forecasting.ipynb
@@ -23,13 +23,18 @@
     "\n",
     "import fiona\n",
     "import matplotlib.pyplot as plt\n",
+    "import xarray as xr\n",
     "\n",
     "from ravenpy import Emulator\n",
     "from ravenpy.config import commands as rc\n",
     "from ravenpy.config import emulators\n",
     "from ravenpy.extractors.forecasts import get_recent_ECCC_forecast\n",
+    "\n",
+    "# Utility that simplifies fetching and caching test data hosted on GitHub\n",
+    "from ravenpy.testing.utils import yangtze\n",
     "from ravenpy.utilities import forecasting\n",
-    "from ravenpy.utilities.testdata import get_file, open_dataset"
+    "\n",
+    "yangtze = yangtze()"
    ]
   },
   {
@@ -40,7 +45,7 @@
    "source": [
     "# Define the catchment contour. Here we use the Salmon River file we previously generated using the Delineator\n",
     "# in Tutorial Notebook 01.\n",
-    "basin_contour = get_file(\"notebook_inputs/salmon_river.geojson\")\n",
+    "basin_contour = yangtze().fetch(\"notebook_inputs/salmon_river.geojson\")\n",
     "\n",
     "# Get the most recent ECCC forecast data from the Geomet extraction tool:\n",
     "forecast_data = get_recent_ECCC_forecast(\n",
@@ -76,7 +81,7 @@
     "\n",
     "# Observed weather data for the Salmon river. We extracted this using Tutorial Notebook 03 and the\n",
     "# salmon_river.geojson file as the contour. Used for the model warm-up.\n",
-    "ts = get_file(\"notebook_inputs/ERA5_weather_data_Salmon.nc\")\n",
+    "ts = yangtze().fetch(\"notebook_inputs/ERA5_weather_data_Salmon.nc\")\n",
     "\n",
     "# Set alternative names for netCDF variables\n",
     "alt_names = {\n",
@@ -158,7 +163,7 @@
     "}\n",
     "\n",
     "# ECCC forecast time format is a bit complex to work with, so we will use cftime to make it more manageable.\n",
-    "fcst_tmp = open_dataset(fname, use_cftime=True)\n",
+    "fcst_tmp = xr.open_dataset(fname, use_cftime=True)\n",
     "\n",
     "# Get the first timestep that will be used for the model simulation\n",
     "start_date = fcst_tmp.time.data[0] + dt.timedelta(days=1)\n",
diff --git a/docs/notebooks/Perform_Regionalization.ipynb b/docs/notebooks/Perform_Regionalization.ipynb
index 73c7d497..3b4a7034 100644
--- a/docs/notebooks/Perform_Regionalization.ipynb
+++ b/docs/notebooks/Perform_Regionalization.ipynb
@@ -25,12 +25,14 @@
     "\n",
     "from ravenpy.config import commands as rc\n",
     "from ravenpy.config import emulators\n",
+    "\n",
+    "# Utility that simplifies fetching and caching test data hosted on GitHub\n",
+    "from ravenpy.testing.utils import yangtze\n",
     "from ravenpy.utilities.regionalization import (\n",
     "    read_gauged_params,\n",
     "    read_gauged_properties,\n",
     "    regionalize,\n",
-    ")\n",
-    "from ravenpy.utilities.testdata import get_file"
+    ")"
    ]
   },
   {
@@ -47,7 +49,7 @@
    "outputs": [],
    "source": [
     "# Get the forcing dataset for the ungauged watershed\n",
-    "ts = get_file(\"notebook_inputs/ERA5_weather_data_Salmon.nc\")\n",
+    "ts = yangtze().fetch(\"notebook_inputs/ERA5_weather_data_Salmon.nc\")\n",
     "\n",
     "# Get HRUs of ungauged watershed\n",
     "hru = dict(\n",
diff --git a/docs/notebooks/Running_HMETS_with_CANOPEX_dataset.ipynb b/docs/notebooks/Running_HMETS_with_CANOPEX_dataset.ipynb
index a0fe45e6..ca1776fe 100644
--- a/docs/notebooks/Running_HMETS_with_CANOPEX_dataset.ipynb
+++ b/docs/notebooks/Running_HMETS_with_CANOPEX_dataset.ipynb
@@ -15,36 +15,27 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import warnings\n",
-    "\n",
-    "from numba.core.errors import NumbaDeprecationWarning\n",
-    "\n",
-    "warnings.simplefilter(\"ignore\", category=NumbaDeprecationWarning)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Cookie-cutter template necessary to provide the tools, packages and paths for the project. All notebooks\n",
-    "# need this template (or a slightly adjusted one depending on the required packages)\n",
     "import datetime as dt\n",
     "import tempfile\n",
+    "import warnings\n",
     "from pathlib import Path\n",
     "\n",
     "import pandas as pd\n",
     "import spotpy\n",
     "import xarray as xr\n",
+    "from numba.core.errors import NumbaDeprecationWarning\n",
     "\n",
     "from ravenpy.config import commands as rc\n",
     "from ravenpy.config import emulators\n",
+    "\n",
+    "# Utility that simplifies fetching and caching test data hosted on GitHub\n",
+    "from ravenpy.testing.utils import yangtze\n",
     "from ravenpy.utilities.calibration import SpotSetup\n",
-    "from ravenpy.utilities.testdata import get_file\n",
     "\n",
     "# Make a temporary folder\n",
-    "tmp = Path(tempfile.mkdtemp())"
+    "tmp = Path(tempfile.mkdtemp())\n",
+    "\n",
+    "warnings.simplefilter(\"ignore\", category=NumbaDeprecationWarning)"
    ]
   },
   {
@@ -96,7 +87,9 @@
    "source": [
     "# With this info, we can gather some properties from the CANOPEX database. This same database is used for\n",
     "# regionalization, so let's query it there where more information is available:\n",
-    "tmp = pd.read_csv(get_file(\"regionalisation_data/gauged_catchment_properties.csv\"))\n",
+    "tmp = pd.read_csv(\n",
+    "    yangtze().fetch(\"regionalisation_data/gauged_catchment_properties.csv\")\n",
+    ")\n",
     "\n",
     "basin_area = float(tmp[\"area\"][watershedID])\n",
     "basin_latitude = float(tmp[\"latitude\"][watershedID])\n",
diff --git a/docs/notebooks/Sensitivity_analysis.ipynb b/docs/notebooks/Sensitivity_analysis.ipynb
index 5fc1e0a0..a3604a8c 100644
--- a/docs/notebooks/Sensitivity_analysis.ipynb
+++ b/docs/notebooks/Sensitivity_analysis.ipynb
@@ -41,10 +41,12 @@
     "from ravenpy import OutputReader, run\n",
     "from ravenpy.config import commands as rc\n",
     "from ravenpy.config import emulators\n",
-    "from ravenpy.utilities.testdata import get_file\n",
+    "\n",
+    "# Utility that simplifies fetching and caching test data hosted on GitHub\n",
+    "from ravenpy.testing.utils import yangtze\n",
     "\n",
     "# We get the netCDF from a server. You can replace the `get_file` function by a string containing the path to your own netCDF.\n",
-    "nc_file = get_file(\n",
+    "nc_file = yangtze(branch=\"new-system\").fetch(\n",
     "    \"raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc\"\n",
     ")\n",
     "\n",
@@ -155,8 +157,8 @@
     "# of folders with each run's data)\n",
     "workdir = Path(tempfile.mkdtemp())\n",
     "\n",
-    "# Pre-define the results matrix based on the number of parameters we will test (and thus how many runs we will need to do). We will test SA with\n",
-    "# two objective functions (NSE and AbsErr). Let's pre-define both vectors now.\n",
+    "# Pre-define the results-matrix based on the number of parameters we will test (and thus how many runs we will need to do).\n",
+    "# We will test SA with two objective functions (NSE and AbsErr). Let's pre-define both vectors now.\n",
     "Y_NSE = np.zeros([param_values.shape[0]])\n",
     "Y_ABS = np.zeros([param_values.shape[0]])\n",
     "\n",
diff --git a/docs/notebooks/paper/Perform_a_climate_change_impact_study_on_a_watershed.ipynb b/docs/notebooks/paper/Perform_a_climate_change_impact_study_on_a_watershed.ipynb
index c232ef4f..709cae0a 100644
--- a/docs/notebooks/paper/Perform_a_climate_change_impact_study_on_a_watershed.ipynb
+++ b/docs/notebooks/paper/Perform_a_climate_change_impact_study_on_a_watershed.ipynb
@@ -78,8 +78,12 @@
     "from ravenpy import Emulator\n",
     "from ravenpy.config import commands as rc\n",
     "from ravenpy.config.emulators import GR4JCN\n",
+    "\n",
+    "# Utility that simplifies fetching and caching test data hosted on GitHub\n",
+    "from ravenpy.testing.utils import yangtze\n",
     "from ravenpy.utilities.calibration import SpotSetup\n",
-    "from ravenpy.utilities.testdata import get_file"
+    "\n",
+    "yangtze = yangtze()"
    ]
   },
   {
@@ -131,12 +135,12 @@
     "# Name of the watershed boundaries file that is uploaded to the server.\n",
     "# Note that this file contains the .shx, .shp and other associated files for shapefiles, all zipped into one file.\n",
     "# It will also be used later for extracting meteorological data.\n",
-    "basin_contour = get_file(\"paper/shapefile_basin_574_HYSETS.zip\")\n",
+    "basin_contour = yangtze.fetch(\"paper/shapefile_basin_574_HYSETS.zip\")\n",
     "\n",
     "# This file is an extraction of streamflow for catchment 574 in HYSETS.\n",
     "# Weather data will be gathered later from the ERA5 database, but could also be taken directly from HYSETS.\n",
     "# This is to show how the process could be linked together for your own applications using ERA5 data.\n",
-    "streamflow_file = get_file(\"paper/Qobs_574_HYSETS.nc\")"
+    "streamflow_file = yangtze.fetch(\"paper/Qobs_574_HYSETS.nc\")"
    ]
   },
   {
diff --git a/docs/notebooks/time_series_analysis.ipynb b/docs/notebooks/time_series_analysis.ipynb
index 16949371..0c3ce76a 100644
--- a/docs/notebooks/time_series_analysis.ipynb
+++ b/docs/notebooks/time_series_analysis.ipynb
@@ -17,17 +17,17 @@
    "source": [
     "%matplotlib inline\n",
     "\n",
-    "import xarray as xr\n",
     "import xclim\n",
     "from pandas.plotting import register_matplotlib_converters\n",
     "\n",
-    "from ravenpy.utilities.testdata import get_file, open_dataset\n",
+    "# Utility that simplifies fetching and caching test data hosted on GitHub\n",
+    "from ravenpy.testing.utils import yangtze\n",
     "\n",
     "register_matplotlib_converters()\n",
     "\n",
     "# Get the file we will use to analyze flows\n",
     "file = \"hydro_simulations/raven-gr4j-cemaneige-sim_hmets-0_Hydrographs.nc\"\n",
-    "ds = open_dataset(file)"
+    "ds = yangtze().fetch(file)"
    ]
   },
   {
@@ -168,8 +168,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import json\n",
-    "\n",
     "with xclim.set_options(\n",
     "    check_missing=\"pct\", missing_options={\"pct\": {\"tolerance\": 0.05}}\n",
     "):\n",

From efbeb530937375df184688111629b62312b41260 Mon Sep 17 00:00:00 2001
From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com>
Date: Mon, 9 Jun 2025 17:02:14 -0400
Subject: [PATCH 04/29] WIP - adjust tests to new fetch mechanics, no more
 tests package

---
 src/ravenpy/ravenpy.py                |   5 +-
 tests/__init__.py                     |   1 -
 tests/conftest.py                     | 279 +++++++++-----------------
 tests/emulators.py                    |  12 +-
 tests/test_base.py                    |   2 -
 tests/test_bias_correction.py         |  20 +-
 tests/test_cli.py                     |  52 ++---
 tests/test_commands.py                |  65 +++---
 tests/test_coords.py                  |   3 +-
 tests/test_distributed_workflow.py    |  10 +-
 tests/test_emulators.py               |  30 ++-
 tests/test_ensemble.py                |   4 +-
 tests/test_external_dataset_access.py |  84 +-------
 tests/test_extractor.py               |   4 +-
 tests/test_forecasting.py             |   8 +-
 tests/test_geo_utilities.py           | 104 +++++-----
 tests/test_geoserver.py               |   4 +-
 tests/test_graphs.py                  |   4 +-
 tests/test_hindcasting.py             |  13 +-
 tests/test_nb_graphs.py               |  16 +-
 tests/test_ravenpy.py                 |   3 +-
 tests/test_regionalisation.py         |   2 +-
 tests/test_rvs.py                     |  12 +-
 tests/test_utils.py                   |   4 +-
 tox.ini                               |   2 +-
 25 files changed, 281 insertions(+), 462 deletions(-)
 delete mode 100644 tests/__init__.py

diff --git a/src/ravenpy/ravenpy.py b/src/ravenpy/ravenpy.py
index 9c0f235f..5a5f751b 100644
--- a/src/ravenpy/ravenpy.py
+++ b/src/ravenpy/ravenpy.py
@@ -2,7 +2,6 @@
 
 import collections
 import os
-import shutil
 import subprocess  # noqa: S404
 import tempfile
 import warnings
@@ -86,6 +85,7 @@ def output_path(self) -> Optional[Path]:
         if self._output_path is not None:
             return self._output_path
         warnings.warn("`output_path` not set. Model must be run first.")
+        return None
 
     @property
     def modelname(self) -> str:
@@ -146,6 +146,7 @@ def solution(self) -> Optional[dict]:
         solution = self.files.get("solution")
         if solution:
             return parsers.parse_solution(solution)
+        return None
 
     @property
     def diagnostics(self) -> Optional[dict]:
@@ -153,6 +154,7 @@ def diagnostics(self) -> Optional[dict]:
         diag = self.files.get("diagnostics")
         if diag:
             return parsers.parse_diagnostics(diag)
+        return None
 
     @property
     def hydrograph(self) -> xr.Dataset:
@@ -179,6 +181,7 @@ def messages(self) -> Optional[str]:
         msg = self.files.get("messages")
         if msg:
             return msg.read_text()
+        return None
 
     @property
     def path(self) -> Path:
diff --git a/tests/__init__.py b/tests/__init__.py
deleted file mode 100644
index 398bb0a1..00000000
--- a/tests/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Unit test package for ravenpy."""
diff --git a/tests/conftest.py b/tests/conftest.py
index 92b94121..92e0f60d 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,193 +1,93 @@
-import datetime as dt
+from __future__ import annotations
+
+import logging
 import os
-import shutil
+from collections.abc import Callable
 from pathlib import Path
-from typing import Optional, Union
+from typing import Optional
 
 import pytest
 import xarray as xr
-from filelock import FileLock
-from xclim.indicators.generic import fit, stats
-
-from ravenpy.utilities.testdata import _default_cache_dir
-from ravenpy.utilities.testdata import get_file as _get_file
-from ravenpy.utilities.testdata import get_local_testdata as _get_local_testdata
-
-from .common import _convert_2d, _convert_3d
 
 # Additional pytest fixtures for emulators
-from .emulators import (  # noqa: F401
+from emulators import (  # noqa: F401
     config_rv,
     gr4jcn_config,
     minimal_emulator,
     numeric_config,
     symbolic_config,
 )
+from xclim.indicators.generic import fit, stats
 
-RAVEN_TESTING_DATA_BRANCH = os.getenv("RAVEN_TESTING_DATA_BRANCH", "master")
-SKIP_TEST_DATA = os.getenv("RAVENPY_SKIP_TEST_DATA")
-DEFAULT_CACHE = Path(_default_cache_dir)
-
-
-def populate_testing_data(
-    temp_folder: Optional[Path] = None,
-    branch: str = RAVEN_TESTING_DATA_BRANCH,
-    _local_cache: Path = DEFAULT_CACHE,
-) -> None:
-    if _local_cache.joinpath(".data_written").exists():
-        # This flag prevents multiple calls from re-attempting to download testing data in the same pytest run
-        return
-
-    models = [
-        "gr4j-cemaneige",
-        "hbvec",
-        "hmets",
-        "mohyse",
-    ]
-
-    data_entries = list()
-    entries = [
-        "raven-{model}/Salmon-River-Near-Prince-George_Qobs_daily.rvt",
-        "raven-{model}/Salmon-River-Near-Prince-George_meteo_daily.rvt",
-        "raven-{model}/raven-{model0}-salmon.rvc",
-        "raven-{model}/raven-{model0}-salmon.rvh",
-        "raven-{model}/raven-{model0}-salmon.rvi",
-        "raven-{model}/raven-{model0}-salmon.rvp",
-        "raven-{model}/raven-{model0}-salmon.rvt",
-    ]
-    for model in models:
-        for entry in entries:
-            data_entries.append(entry.format(model=model, model0=model.split("-")[0]))
-
-    data_entries.extend(
-        [
-            "caspar_eccc_hindcasts/geps_watershed.nc",
-            "clrh/mattawin/06FB002.rvh",
-            "clrh/mattawin/channel_properties.rvp",
-            "clrh/mattawin/Lakes.rvh",
-            "eccc_forecasts/geps_watershed.nc",
-            "cmip5/nasa_nex-gddp-1.0_day_inmcm4_historical+rcp45_nex-gddp_1971-1972_subset.nc",
-            "cmip5/nasa_nex-gddp-1.0_day_inmcm4_historical+rcp85_nex-gddp_2070-2071_subset.nc",
-            "cmip5/tas_Amon_CanESM2_rcp85_r1i1p1_200601-210012_subset.nc",
-            "famine/famine_input.nc",
-            "gr4j_cemaneige/solution.rvc",
-            "hydro_simulations/raven-gr4j-cemaneige-sim_hmets-0_Hydrographs.nc",
-            "nasa/Mars_MGS_MOLA_DEM_georeferenced_region_compressed.tiff",
-            "nrcan/NRCAN_1971-1972_subset.nc",
-            "nrcan/NRCAN_2006-2007_subset.nc",
-            "polygons/mars.geojson",
-            "polygons/mars.zip",
-            "polygons/Saskatoon.geojson",
-            "raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc",
-            "raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily_2d.nc",
-            "raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily_3d.nc",
-            "raven-routing-sample/OTT_sub.zip",
-            "raven-routing-sample/VIC_streaminputs.nc",
-            "raven-routing-sample/VIC_streaminputs_weights.rvt",
-            "raven-routing-sample/VIC_temperatures.nc",
-            "raven-routing-sample/VIC_test_nodata.nc",
-            "raven-routing-sample/VIC_test_nodata_weights.rvt",
-            "raven-routing-sample/WSC02LE024.nc",
-            "raven-routing-sample/era5-test-dataset-crop.nc",
-            "raven-routing-sample/finalcat_hru_info.zip",
-            "raven-routing-sample/lievre_hrus_v21.zip",
-            "watershed_vector/LSJ_LL.zip",
-            "basinmaker/drainage_region_0175_v2-1/finalcat_info_v2-1.zip",
-            "matapedia/Qobs_Matapedia_01BD009.nc",
-            "matapedia/Matapedia_meteo_data_2D.nc",
-            "matapedia/Matapedia_meteo_data_stations.nc",
-        ]
-    )
-
-    data = dict()
-    for filepattern in data_entries:
-        if temp_folder is None:
-            try:
-                data[filepattern] = _get_file(
-                    filepattern, branch=branch, cache_dir=_local_cache
-                )
-            except FileNotFoundError:
-                continue
-        elif temp_folder:
-            try:
-                data[filepattern] = _get_local_testdata(
-                    filepattern,
-                    temp_folder=temp_folder,
-                    branch=branch,
-                    _local_cache=_local_cache,
-                )
-            except FileNotFoundError:
-                continue
-
-    return
+from ravenpy.testing.helpers import convert_2d, convert_3d
+from ravenpy.testing.utils import (
+    TESTDATA_BRANCH,
+    TESTDATA_CACHE_DIR,
+    TESTDATA_REPO_URL,
+    default_testdata_cache,
+    gather_testing_data,
+)
+from ravenpy.testing.utils import open_dataset as _open_dataset
+from ravenpy.testing.utils import (
+    testing_setup_warnings,
+)
+from ravenpy.testing.utils import yangtze as _yangtze
 
 
 @pytest.fixture(scope="session")
 def threadsafe_data_dir(tmp_path_factory) -> Path:
-    """Constructor for worker-session temporary data folders."""
     return Path(tmp_path_factory.getbasetemp().joinpath("data"))
 
 
 @pytest.fixture(scope="session")
-def get_file(threadsafe_data_dir):
-    def _get_session_scoped_file(file: Union[str, Path]):
-        return _get_file(
-            file, cache_dir=threadsafe_data_dir, branch=RAVEN_TESTING_DATA_BRANCH
-        )
-
-    return _get_session_scoped_file
+def yangtze(threadsafe_data_dir, worker_id):
+    return _yangtze(
+        repo=TESTDATA_REPO_URL,
+        branch=TESTDATA_BRANCH,
+        cache_dir=(
+            TESTDATA_CACHE_DIR if worker_id == "master" else threadsafe_data_dir
+        ),
+    )
 
 
 @pytest.fixture(scope="session")
-def get_local_testdata(threadsafe_data_dir):
-    def _get_session_scoped_local_testdata(file: Union[str, Path]):
-        return _get_local_testdata(
+def open_dataset(threadsafe_data_dir, worker_id):
+    def _open_session_scoped_file(file: str | os.PathLike, **xr_kwargs):
+        yangtze_kwargs = {
+            "branch": TESTDATA_BRANCH,
+            "repo": TESTDATA_REPO_URL,
+            "cache_dir": (
+                TESTDATA_CACHE_DIR if worker_id == "master" else threadsafe_data_dir
+            ),
+        }
+        xr_kwargs.setdefault("cache", True)
+        xr_kwargs.setdefault("engine", "h5netcdf")
+        return _open_dataset(
             file,
-            temp_folder=threadsafe_data_dir,
-            branch=RAVEN_TESTING_DATA_BRANCH,
-            _local_cache=DEFAULT_CACHE,
+            yangtze_kwargs=yangtze_kwargs,
+            **xr_kwargs,
         )
 
-    return _get_session_scoped_local_testdata
-
-
-@pytest.fixture(scope="session", autouse=True)
-def gather_session_data(threadsafe_data_dir, worker_id):
-    """Gather testing data on pytest run.
-
-    When running pytest with multiple workers, one worker will copy data remotely to DEFAULT_CACHE while
-    other workers wait using lockfile. Once the lock is released, all workers will copy data to their local
-    threadsafe_data_dir."""
-    if worker_id == "master":
-        if not SKIP_TEST_DATA:
-            populate_testing_data(branch=RAVEN_TESTING_DATA_BRANCH)
-    else:
-        if not SKIP_TEST_DATA:
-            DEFAULT_CACHE.mkdir(exist_ok=True)
-            test_data_being_written = FileLock(DEFAULT_CACHE.joinpath(".lock"))
-            with test_data_being_written as fl:
-                # This flag prevents multiple calls from re-attempting to download testing data in the same pytest run
-                populate_testing_data(branch=RAVEN_TESTING_DATA_BRANCH)
-                DEFAULT_CACHE.joinpath(".data_written").touch()
-            fl.acquire()
-        shutil.copytree(DEFAULT_CACHE, threadsafe_data_dir)
+    return _open_session_scoped_file
 
 
 @pytest.fixture(scope="session")
-def q_sim_1(threadsafe_data_dir, get_local_testdata):
+def q_sim_1(yangtze):
     """A file storing a Raven streamflow simulation over one basin."""
-    return get_local_testdata(
+    return yangtze.fetch(
         "hydro_simulations/raven-gr4j-cemaneige-sim_hmets-0_Hydrographs.nc",
     )
 
 
 @pytest.fixture(scope="session")
-def ts_stats(q_sim_1, tmp_path):
-    with xr.open_dataset(q_sim_1) as ds:
-        q = ds.q_sim
-        ts = stats(q, op="max")
-        fn = tmp_path / "ts_stats.nc"
-        ts.to_netcdf_(fn)
+def ts_stats(q_sim_1, threadsafe_data_dir):
+    fn = threadsafe_data_dir / "ts_stats.nc"
+
+    if not fn.exists():
+        with xr.open_dataset(q_sim_1) as ds:
+            q = ds.q_sim
+            ts = stats(q, op="max")
+            ts.to_netcdf_(fn)
     return fn
 
 
@@ -201,40 +101,37 @@ def fit_params(ts_stats, threadsafe_data_dir):
             q = ds[name]
             p = fit(q, dist="gumbel_r")
             p.to_netcdf(fn)
-
-    yield fn
+    return fn
 
 
-@pytest.fixture(scope="session")
-def bad_netcdf(get_local_testdata, threadsafe_data_dir):
-    fn = threadsafe_data_dir / "bad_netcdf.nc"
+@pytest.fixture
+def bad_netcdf(yangtze, tmp_path):
+    fn = tmp_path / "bad_netcdf.nc"
 
-    salmon_file = get_local_testdata(
+    salmon_file = yangtze.fetch(
         "raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc"
     )
 
-    if not fn.exists():
-        # Test with scalar elevation. Should normally have a station dimension, but not always the case.
-        with xr.open_dataset(salmon_file) as ds:
-            ds["station_name"] = ds["station_name"].astype("str")
-            ds["elevation"] = 1.0
-            ds.to_netcdf(fn)
-
+    # Test with scalar elevation. Should normally have a station dimension, but not always the case.
+    with xr.open_dataset(salmon_file) as ds:
+        ds["station_name"] = ds["station_name"].astype("str")
+        ds["elevation"] = 1.0
+        ds.to_netcdf(fn)
     return fn
 
 
 @pytest.fixture(scope="session")
 def salmon_hru():
     out = {
-        "land": dict(
-            area=4250.6,
-            elevation=843.0,
-            latitude=54.4848,
-            longitude=-123.3659,
-            hru_type="land",
-        )
+        "land": {
+            "area": 4250.6,
+            "elevation": 843.0,
+            "latitude": 54.4848,
+            "longitude": -123.3659,
+            "hru_type": "land",
+        }
     }
-    yield out
+    return out
 
 
 # Used in test_emulators.py
@@ -244,7 +141,7 @@ def input2d(salmon, threadsafe_data_dir):
     fn_out = threadsafe_data_dir / "input2d.nc"
 
     if not fn_out.exists():
-        _convert_2d(salmon).to_netcdf(fn_out)
+        convert_2d(salmon).to_netcdf(fn_out)
 
     return fn_out
 
@@ -255,7 +152,7 @@ def input3d(salmon, threadsafe_data_dir):
     fn_out = threadsafe_data_dir / "input3d.nc"
 
     if not fn_out.exists():
-        ds = _convert_3d(salmon)
+        ds = convert_3d(salmon)
         ds = ds.drop_vars("qobs")
         ds.to_netcdf(fn_out)
         ds.close()
@@ -280,25 +177,33 @@ class P(Params):
     class TestConfig(Config):
         params: P = P()
         calendar: o.Calendar = Field("JULIAN", alias="Calendar")
-        air_snow_coeff: Optional[Sym] = Field(1 - P.X1, alias="AirSnowCoeff")
+        air_snow_coeff: Sym | None = Field(1 - P.X1, alias="AirSnowCoeff")
 
     return TestConfig, P
 
 
-@pytest.fixture(scope="session", autouse=True)
-def cleanup(request):
-    """Cleanup a testing file once we are finished.
+@pytest.fixture(autouse=True, scope="session")
+def gather_session_data(request, yangtze, worker_id):
+    """
+    Gather testing data on pytest run.
 
-    This flag prevents remote data from being downloaded multiple times in the same pytest run.
+    When running pytest with multiple workers, one worker will copy data remotely to the default cache dir while
+    other workers wait using a lockfile. Once the lock is released, all workers will then copy data to their local
+    threadsafe_data_dir. As this fixture is scoped to the session, it will only run once per pytest run.
     """
+    testing_setup_warnings()
+    gather_testing_data(worker_cache_dir=yangtze.path, worker_id=worker_id)
 
     def remove_data_written_flag():
-        flag = DEFAULT_CACHE.joinpath(".data_written")
+        """Clean up the cache folder once we are finished."""
+        flag = default_testdata_cache.joinpath(".data_written")
         if flag.exists():
-            flag.unlink()
+            try:
+                flag.unlink()
+            except FileNotFoundError:
+                logging.info(
+                    "Teardown race condition occurred: .data_written flag already removed. Lucky!"
+                )
+                pass
 
     request.addfinalizer(remove_data_written_flag)
-
-
-if __name__ == "__main__":
-    populate_testing_data(branch=RAVEN_TESTING_DATA_BRANCH)
diff --git a/tests/emulators.py b/tests/emulators.py
index f637d681..c2e4ae59 100644
--- a/tests/emulators.py
+++ b/tests/emulators.py
@@ -241,10 +241,10 @@
 
 
 @pytest.fixture(scope="session")
-def gr4jcn_config(get_local_testdata, salmon_hru) -> (GR4JCN, params):
+def gr4jcn_config(yangtze, salmon_hru) -> (GR4JCN, params):
     """Return symbolic config and params for basic gr4jcn."""
 
-    salmon_file = get_local_testdata(
+    salmon_file = yangtze.fetch(
         "raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc"
     )
 
@@ -266,7 +266,7 @@ def gr4jcn_config(get_local_testdata, salmon_hru) -> (GR4JCN, params):
 
 
 @pytest.fixture(scope="session", params=names)
-def symbolic_config(get_local_testdata, salmon_hru, request):
+def symbolic_config(yangtze, salmon_hru, request):
     """Emulator configuration instantiated with symbolic parameters."""
     name = request.param
     cls = configs[name]
@@ -275,7 +275,7 @@ def symbolic_config(get_local_testdata, salmon_hru, request):
     # Extra attributes for gauges
     gextras = {"ALL": {"elevation": salmon_hru["land"]["elevation"]}}
 
-    salmon_file = get_local_testdata(
+    salmon_file = yangtze.fetch(
         "raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc"
     )
 
@@ -331,12 +331,12 @@ def numeric_config(symbolic_config):
 
 
 @pytest.fixture(scope="session")
-def minimal_emulator(get_local_testdata, salmon_hru):
+def minimal_emulator(yangtze, salmon_hru):
     """Return the config for a single emulator."""
     cls = configs["HMETS"]
     data_type = ["RAINFALL", "TEMP_MIN", "TEMP_MAX", "SNOWFALL", "PET"]
 
-    salmon_file = get_local_testdata(
+    salmon_file = yangtze.fetch(
         "raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc"
     )
 
diff --git a/tests/test_base.py b/tests/test_base.py
index 444fd79b..fedcd4d6 100644
--- a/tests/test_base.py
+++ b/tests/test_base.py
@@ -1,7 +1,5 @@
 from ravenpy.config import commands as rc
 
-# def test_parse_symbolic()
-
 
 def test_line_command():
     c = rc.SBGroupPropertyMultiplier(
diff --git a/tests/test_bias_correction.py b/tests/test_bias_correction.py
index 7644492b..e90135e8 100644
--- a/tests/test_bias_correction.py
+++ b/tests/test_bias_correction.py
@@ -7,22 +7,18 @@
 # FIXME: This doesn't test Raven functionality; Should we move it to xclim?
 @pytest.mark.skip(reason="This test is not testing Raven functionality")
 class TestBiasCorrect:
-    def test_bias_correction(self, get_local_testdata):
-        ds_fut_sub = xr.open_dataset(
-            get_local_testdata(
-                "cmip5/nasa_nex-gddp-1.0_day_inmcm4_historical+rcp85_nex-gddp_2070-2071_subset.nc"
-            )
+    def test_bias_correction(self, open_dataset):
+        ds_fut_sub = open_dataset(
+            "cmip5/nasa_nex-gddp-1.0_day_inmcm4_historical+rcp85_nex-gddp_2070-2071_subset.nc"
         )
-        ds_ref_sub = xr.open_dataset(
-            get_local_testdata(
-                "cmip5/nasa_nex-gddp-1.0_day_inmcm4_historical+rcp45_nex-gddp_1971-1972_subset.nc"
-            )
+
+        ds_ref_sub = open_dataset(
+            "cmip5/nasa_nex-gddp-1.0_day_inmcm4_historical+rcp45_nex-gddp_1971-1972_subset.nc"
         )
         ds_ref_sub = convert_calendar(ds_ref_sub, "noleap")
 
-        ds_his_sub = xr.open_dataset(
-            get_local_testdata("nrcan/NRCAN_1971-1972_subset.nc")
-        )
+        ds_his_sub = open_dataset("nrcan/NRCAN_1971-1972_subset.nc")
+
         ds_his_sub = convert_calendar(ds_his_sub, "noleap")
         group = xsdba.Grouper("time.month")
         # Train the model to find the correction factors
diff --git a/tests/test_cli.py b/tests/test_cli.py
index dd39a2f5..dbde1a2f 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -9,18 +9,16 @@
 
 
 class TestGenerateGridWeights:
-    def test_generate_grid_weights_with_nc_input_and_2d_coords(
-        self, tmp_path, get_local_testdata
-    ):
+    def test_generate_grid_weights_with_nc_input_and_2d_coords(self, tmp_path, yangtze):
         runner = CliRunner()
         output_path = tmp_path / "bla.rvt"
 
         copyfile(
-            get_local_testdata("raven-routing-sample/VIC_streaminputs.nc"),
+            yangtze.fetch("raven-routing-sample/VIC_streaminputs.nc"),
             tmp_path / "VIC_streaminputs.nc",
         )
         copyfile(
-            get_local_testdata("raven-routing-sample/finalcat_hru_info.zip"),
+            yangtze.fetch("raven-routing-sample/finalcat_hru_info.zip"),
             tmp_path / "finalcat_hru_info.zip",
         )
 
@@ -52,20 +50,18 @@ def test_generate_grid_weights_with_nc_input_and_2d_coords(
         weight = float(re.search("1 52 (.+)", output).group(1))
         assert abs(weight - 0.2610203097218425) < 1e-04
 
-    def test_generate_grid_weights_with_multiple_subids(
-        self, tmp_path, get_local_testdata
-    ):
+    def test_generate_grid_weights_with_multiple_subids(self, tmp_path, yangtze):
         # currently exactly same output as "test_generate_grid_weights_with_nc_input_and_2d_coords"
         # needs a "routing-file-path" with multiple gauges
         runner = CliRunner()
         output_path = tmp_path / "bla.rvt"
 
         copyfile(
-            get_local_testdata("raven-routing-sample/VIC_streaminputs.nc"),
+            yangtze.fetch("raven-routing-sample/VIC_streaminputs.nc"),
             tmp_path / "VIC_streaminputs.nc",
         )
         copyfile(
-            get_local_testdata("raven-routing-sample/finalcat_hru_info.zip"),
+            yangtze.fetch("raven-routing-sample/finalcat_hru_info.zip"),
             tmp_path / "finalcat_hru_info.zip",
         )
 
@@ -101,18 +97,16 @@ def test_generate_grid_weights_with_multiple_subids(
         weight = float(re.search("1 52 (.+)", output).group(1))
         assert abs(weight - 0.2610203097218425) < 1e-04
 
-    def test_generate_grid_weights_with_nc_input_and_1d_coords(
-        self, tmp_path, get_local_testdata
-    ):
+    def test_generate_grid_weights_with_nc_input_and_1d_coords(self, tmp_path, yangtze):
         runner = CliRunner()
         output_path = tmp_path / "bla.rvt"
 
         copyfile(
-            get_local_testdata("raven-routing-sample/era5-test-dataset-crop.nc"),
+            yangtze.fetch("raven-routing-sample/era5-test-dataset-crop.nc"),
             tmp_path / "era5-test-dataset-crop.nc",
         )
         copyfile(
-            get_local_testdata("raven-routing-sample/finalcat_hru_info.zip"),
+            yangtze.fetch("raven-routing-sample/finalcat_hru_info.zip"),
             tmp_path / "finalcat_hru_info.zip",
         )
 
@@ -144,16 +138,16 @@ def test_generate_grid_weights_with_nc_input_and_1d_coords(
         weight = float(re.search("4 3731 (.+)", output).group(1))
         assert abs(weight - 0.0034512752779023515) < 1e-04
 
-    def test_generate_grid_weights_with_shp_input(self, tmp_path, get_local_testdata):
+    def test_generate_grid_weights_with_shp_input(self, tmp_path, yangtze):
         runner = CliRunner()
         output_path = tmp_path / "bla.rvt"
 
         copyfile(
-            get_local_testdata("raven-routing-sample/OTT_sub.zip"),
+            yangtze.fetch("raven-routing-sample/OTT_sub.zip"),
             tmp_path / "OTT_sub.zip",
         )
         copyfile(
-            get_local_testdata("raven-routing-sample/finalcat_hru_info.zip"),
+            yangtze.fetch("raven-routing-sample/finalcat_hru_info.zip"),
             tmp_path / "finalcat_hru_info.zip",
         )
 
@@ -182,18 +176,16 @@ def test_generate_grid_weights_with_shp_input(self, tmp_path, get_local_testdata
         weight = float(re.search("13 238 (.+)", output).group(1))
         assert abs(weight - 0.5761414847779369) < 1e-04
 
-    def test_generate_grid_weights_with_weight_rescaling(
-        self, tmp_path, get_local_testdata
-    ):
+    def test_generate_grid_weights_with_weight_rescaling(self, tmp_path, yangtze):
         runner = CliRunner()
         output_path = tmp_path / "bla.rvt"
 
         copyfile(
-            get_local_testdata("raven-routing-sample/OTT_sub.zip"),
+            yangtze.fetch("raven-routing-sample/OTT_sub.zip"),
             tmp_path / "OTT_sub.zip",
         )
         copyfile(
-            get_local_testdata("raven-routing-sample/finalcat_hru_info.zip"),
+            yangtze.fetch("raven-routing-sample/finalcat_hru_info.zip"),
             tmp_path / "finalcat_hru_info.zip",
         )
 
@@ -226,19 +218,19 @@ def test_generate_grid_weights_with_weight_rescaling(
 
 
 class TestAggregateForcingsToHRUs:
-    def test_aggregate_forcings_to_hrus(self, tmp_path, get_local_testdata):
+    def test_aggregate_forcings_to_hrus(self, tmp_path, yangtze):
         runner = CliRunner()
         output_nc_file_path = tmp_path / "aggreg.nc"
         output_weight_file_path = tmp_path / "weight_aggreg.rvt"
 
         copyfile(
-            get_local_testdata(
+            yangtze.fetch(
                 "raven-routing-sample/VIC_streaminputs.nc",
             ),
             tmp_path / "VIC_streaminputs.nc",
         )
         copyfile(
-            get_local_testdata(
+            yangtze.fetch(
                 "raven-routing-sample/VIC_streaminputs_weights.rvt",
             ),
             tmp_path / "VIC_streaminputs_weights.rvt",
@@ -280,7 +272,7 @@ def test_aggregate_forcings_to_hrus(self, tmp_path, get_local_testdata):
         assert new_weights[2][2] == 1.0  # All new_weights[:][2] need to be 1.0
         assert new_weights[3][2] == 1.0  # All new_weights[:][2] need to be 1.0
 
-        # check aggregated NetCDF file
+        # check the aggregated NetCDF file
         nc_in = nc4.Dataset(output_nc_file_path, "r")
         val = nc_in.variables["Streaminputs"][:]
         nc_in.close()
@@ -290,17 +282,17 @@ def test_aggregate_forcings_to_hrus(self, tmp_path, get_local_testdata):
         assert abs(val[0, 50] - 0.010276) < 1e-04
         assert abs(val[16071, 50] - 0.516639) < 1e-04
 
-    def test_aggregate_forcings_to_hrus_with_nodata(self, tmp_path, get_local_testdata):
+    def test_aggregate_forcings_to_hrus_with_nodata(self, tmp_path, yangtze):
         runner = CliRunner()
         output_nc_file_path = tmp_path / "aggreg.nc"
         output_weight_file_path = tmp_path / "weight_aggreg.rvt"
 
         copyfile(
-            get_local_testdata("raven-routing-sample/VIC_test_nodata.nc"),
+            yangtze.fetch("raven-routing-sample/VIC_test_nodata.nc"),
             tmp_path / "VIC_test_nodata.nc",
         )
         copyfile(
-            get_local_testdata("raven-routing-sample/VIC_test_nodata_weights.rvt"),
+            yangtze.fetch("raven-routing-sample/VIC_test_nodata_weights.rvt"),
             tmp_path / "VIC_test_nodata_weights.rvt",
         )
 
diff --git a/tests/test_commands.py b/tests/test_commands.py
index 24053e0f..46eef5b9 100644
--- a/tests/test_commands.py
+++ b/tests/test_commands.py
@@ -1,3 +1,4 @@
+import pathlib
 import re
 from collections.abc import Sequence
 from textwrap import dedent
@@ -72,9 +73,9 @@ def test_hrus():
     assert hrus[0].subbasin_id == 1
 
 
-def test_hrus_parse(get_local_testdata):
-    f = get_local_testdata("clrh/mattawin/06FB002.rvh")
-    hrus = rc.HRUs.parse(f.read_text())
+def test_hrus_parse(yangtze):
+    f = yangtze.fetch("clrh/mattawin/06FB002.rvh")
+    hrus = rc.HRUs.parse(pathlib.Path(f).read_text())
     assert len(hrus) == 40
     hru = hrus[0]
 
@@ -363,10 +364,10 @@ def test_parse(self):
         assert sv[0].data["ATMOS_PRECIP"] == -0.16005
 
 
-def test_read_from_netcdf(get_local_testdata):
+def test_read_from_netcdf(yangtze):
     from ravenpy.config.commands import ReadFromNetCDF
 
-    f = get_local_testdata(
+    f = yangtze.fetch(
         "raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc"
     )
     c = ReadFromNetCDF.from_nc(f, "PRECIP", station_idx=1, alt_names=("rain",))
@@ -389,37 +390,35 @@ def test_read_from_netcdf(get_local_testdata):
     assert isinstance(c.da, xr.DataArray)
 
 
-def test_station_forcing(get_local_testdata):
-    f = get_local_testdata(
+def test_station_forcing(yangtze):
+    f = yangtze.fetch(
         "raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily_2d.nc"
     )
     c = rc.StationForcing.from_nc(f, "PRECIP", station_idx=1, alt_names="rain")
     dedent(c.to_rv())
 
 
-def test_gridded_forcing(get_local_testdata):
-    """TODO: Make sure dimensions are in the order x, y, t."""
-    fn = get_local_testdata("raven-routing-sample/VIC_temperatures.nc")
+def test_gridded_forcing(yangtze):
+    # TODO: Make sure dimensions are in the order x, y, t.
+    fn = yangtze.fetch("raven-routing-sample/VIC_temperatures.nc")
     rc.GriddedForcing.from_nc(fn, data_type="TEMP_AVE", alt_names=("Avg_temp",))
     # assert gf.dim_names_nc == ("lon_dim", "lat_dim", "time")
 
-    fn = get_local_testdata("raven-routing-sample/VIC_streaminputs.nc")
+    fn = yangtze.fetch("raven-routing-sample/VIC_streaminputs.nc")
     rc.GriddedForcing.from_nc(fn, data_type="PRECIP", alt_names=("Streaminputs",))
     # assert gf.dim_names_nc == ("lon_dim", "lat_dim", "time")
 
-    fn = get_local_testdata(
-        "cmip5/tas_Amon_CanESM2_rcp85_r1i1p1_200601-210012_subset.nc"
-    )
+    fn = yangtze.fetch("cmip5/tas_Amon_CanESM2_rcp85_r1i1p1_200601-210012_subset.nc")
     gf = rc.GriddedForcing.from_nc(fn, data_type="TEMP_AVE", engine="netcdf4")
     assert gf.latitude_var_name_nc is None
 
 
-def test_gauge(get_local_testdata, tmp_path):
-    salmon_file = get_local_testdata(
+def test_gauge(yangtze, tmp_path):
+    salmon_file = yangtze.fetch(
         "raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc"
     )
     salmon_file_tmp = tmp_path / "salmon_river_near_prince_george-tmp.nc"
-    salmon_file_tmp.write_bytes(salmon_file.read_bytes())
+    salmon_file_tmp.write_bytes(pathlib.Path(salmon_file).read_bytes())
 
     g = rc.Gauge.from_nc(
         salmon_file_tmp,
@@ -433,8 +432,8 @@ def test_gauge(get_local_testdata, tmp_path):
     assert g.data[0].read_from_netcdf.deaccumulate
 
 
-def test_gauge_raises(get_local_testdata):
-    f = get_local_testdata(
+def test_gauge_raises(yangtze):
+    f = yangtze.fetch(
         "raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc"
     )
 
@@ -459,8 +458,8 @@ def test_grid_weights():
     assert parsed.data == gw.data
 
 
-def test_redirect_to_file(get_local_testdata):
-    f = get_local_testdata("raven-routing-sample/VIC_test_nodata_weights.rvt")
+def test_redirect_to_file(yangtze):
+    f = yangtze.fetch("raven-routing-sample/VIC_test_nodata_weights.rvt")
     r = rc.RedirectToFile(f)
     assert re.match(r"^:RedirectToFile\s+(\S+)$", r.to_rv())
 
@@ -493,15 +492,15 @@ class Test(RV):
     Test().to_rv()
 
 
-def test_subbasins_parse(get_local_testdata):
-    f = get_local_testdata("clrh/mattawin/06FB002.rvh")
-    sb = rc.SubBasins.parse(f.read_text())
+def test_subbasins_parse(yangtze):
+    f = yangtze.fetch("clrh/mattawin/06FB002.rvh")
+    sb = rc.SubBasins.parse(pathlib.Path(f).read_text())
     assert len(sb) == 35
 
 
-def test_reservoir_parse(get_local_testdata):
-    f = get_local_testdata("clrh/mattawin/Lakes.rvh")
-    rs = rc.Reservoir.parse(f.read_text())
+def test_reservoir_parse(yangtze):
+    f = yangtze.fetch("clrh/mattawin/Lakes.rvh")
+    rs = rc.Reservoir.parse(pathlib.Path(f).read_text())
     assert len(rs) == 5
     r = rs[0]
 
@@ -560,9 +559,9 @@ def test_hru_group():
     )
 
 
-def test_subbasin_group_parse(get_local_testdata):
-    f = get_local_testdata("clrh/mattawin/06FB002.rvh")
-    sbgs = rc.SubBasinGroup.parse(f.read_text())
+def test_subbasin_group_parse(yangtze):
+    f = yangtze.fetch("clrh/mattawin/06FB002.rvh")
+    sbgs = rc.SubBasinGroup.parse(pathlib.Path(f).read_text())
     assert len(sbgs) == 2
     sbg = sbgs[0]
     assert sbg.name == "Allsubbasins"
@@ -570,9 +569,9 @@ def test_subbasin_group_parse(get_local_testdata):
     sbg.sb_ids[0] == 23007946
 
 
-def test_channel_profile_parse(get_local_testdata):
-    f = get_local_testdata("clrh/mattawin/channel_properties.rvp")
-    cps = rc.ChannelProfile.parse(f.read_text())
+def test_channel_profile_parse(yangtze):
+    f = yangtze.fetch("clrh/mattawin/channel_properties.rvp")
+    cps = rc.ChannelProfile.parse(pathlib.Path(f).read_text())
     assert len(cps) == 20
 
     cp = cps[0]
diff --git a/tests/test_coords.py b/tests/test_coords.py
index 0c14e0d2..7429e53a 100644
--- a/tests/test_coords.py
+++ b/tests/test_coords.py
@@ -1,8 +1,7 @@
 from ravenpy.utilities.coords import infer_scale_and_offset
-from ravenpy.utilities.testdata import open_dataset
 
 
-def test_infer_scale_and_offset():
+def test_infer_scale_and_offset(open_dataset):
     # ERA5 precip and tas
     ts = "era5/tas_pr_20180101-20180108.nc"
 
diff --git a/tests/test_distributed_workflow.py b/tests/test_distributed_workflow.py
index f9231827..aeafe930 100644
--- a/tests/test_distributed_workflow.py
+++ b/tests/test_distributed_workflow.py
@@ -13,8 +13,8 @@
 )
 
 
-def test_simple_workflow(get_local_testdata, tmp_path):
-    shp_path = get_local_testdata(
+def test_simple_workflow(tmp_path, yangtze):
+    shp_path = yangtze.fetch(
         "basinmaker/drainage_region_0175_v2-1/finalcat_info_v2-1.zip"
     )
 
@@ -38,7 +38,7 @@ def test_simple_workflow(get_local_testdata, tmp_path):
     rvh = bm.extract(hru_from_sb=True)
 
     # Streamflow obs
-    qobs_fn = get_local_testdata("matapedia/Qobs_Matapedia_01BD009.nc")
+    qobs_fn = yangtze.fetch("matapedia/Qobs_Matapedia_01BD009.nc")
 
     qobs = rc.ObservationData.from_nc(
         qobs_fn,
@@ -47,7 +47,7 @@ def test_simple_workflow(get_local_testdata, tmp_path):
     )
 
     # Meteo obs for GriddedForcing - does not work because subbasins do not overlap 100% with the ERA data
-    meteo_grid_fn = get_local_testdata("matapedia/Matapedia_meteo_data_2D.nc")
+    meteo_grid_fn = yangtze.fetch("matapedia/Matapedia_meteo_data_2D.nc")
 
     # Dict of GW attributes
     gw = GridWeightExtractor(
@@ -77,7 +77,7 @@ def test_simple_workflow(get_local_testdata, tmp_path):
     # Weights for some HRUs do not sum to one.
 
     # Meteo forcing per station (virtual stations, since this is ERA5 data)
-    meteo_station = get_local_testdata("matapedia/Matapedia_meteo_data_stations.nc")
+    meteo_station = yangtze.fetch("matapedia/Matapedia_meteo_data_stations.nc")
 
     [
         rc.StationForcing.from_nc(meteo_station, dtyp, alt_names=(alias,))
diff --git a/tests/test_emulators.py b/tests/test_emulators.py
index 0781c7a9..4b39e258 100644
--- a/tests/test_emulators.py
+++ b/tests/test_emulators.py
@@ -206,16 +206,16 @@ def test_run_with_dap_link(minimal_emulator, tmp_path):
     Emulator(conf, workdir=tmp_path).run()
 
 
-def test_routing(get_local_testdata):
-    """We need at least 2 subbasins to activate routing."""
+def test_routing(yangtze):
+    """We need at least two subbasins to activate routing."""
     from ravenpy.config.emulators.gr4jcn import P
 
     # Salmon catchment is now split into land- and lake-part.
     # The areas do not sum up to overall area of 4250.6 [km2].
     # This is the reason the "test_routing" will give different
     # results compared to "test_run". The "salmon_land_hru"
-    # however is kept at the overall area of 4250.6 [km2] such
-    # that other tests still obtain same results as before.
+    # however, is kept in the overall area of 4250.6 [km2] such
+    # that other tests still get the same results as before.
 
     salmon_land_hru_1 = dict(
         area=4250.6,
@@ -235,10 +235,10 @@ def test_routing(get_local_testdata):
         hru_type="land",
     )
 
-    salmon_river = get_local_testdata(
+    salmon_river = yangtze.fetch(
         "raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc"
     )
-    ts_2d = get_local_testdata(
+    ts_2d = yangtze.fetch(
         "raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily_2d.nc"
     )
 
@@ -438,7 +438,7 @@ def test_routing(get_local_testdata):
 
     hds = out.hydrograph.q_sim
 
-    assert len(hds.nbasins) == 1  # number of "gauged" basins is 1
+    assert len(hds.nbasins) == 1  # the number of "gauged" basins is 1
 
     # We only have one SB with gauged=True, so the output has a single column.
     # The number of time steps simulated between (2000, 1, 1) and
@@ -480,7 +480,7 @@ def test_routing(get_local_testdata):
 
 @pytest.mark.slow
 @pytest.mark.xfail
-def test_routing_lievre_tutorial(get_local_testdata, tmp_path):
+def test_routing_lievre_tutorial(tmp_path, yangtze):
     from ravenpy.extractors.routing_product import (
         BasinMakerExtractor,
         GridWeightExtractor,
@@ -491,18 +491,12 @@ def test_routing_lievre_tutorial(get_local_testdata, tmp_path):
     # Input files #
     ###############
 
-    routing_product_shp_path = get_local_testdata(
-        "raven-routing-sample/lievre_hrus_v21.zip"
-    )
+    routing_product_shp_path = yangtze.fetch("raven-routing-sample/lievre_hrus_v21.zip")
 
-    vic_streaminputs_nc_path = get_local_testdata(
-        "raven-routing-sample/VIC_streaminputs.nc"
-    )
-    vic_temperatures_nc_path = get_local_testdata(
-        "raven-routing-sample/VIC_temperatures.nc"
-    )
+    vic_streaminputs_nc_path = yangtze.fetch("raven-routing-sample/VIC_streaminputs.nc")
+    vic_temperatures_nc_path = yangtze.fetch("raven-routing-sample/VIC_temperatures.nc")
 
-    observation_data_nc_path = get_local_testdata("raven-routing-sample/WSC02LE024.nc")
+    observation_data_nc_path = yangtze.fetch("raven-routing-sample/WSC02LE024.nc")
 
     streaminputs = xr.open_dataset(vic_streaminputs_nc_path)
 
diff --git a/tests/test_ensemble.py b/tests/test_ensemble.py
index 1086ab7d..7f525fc4 100644
--- a/tests/test_ensemble.py
+++ b/tests/test_ensemble.py
@@ -15,13 +15,13 @@
 
 
 # @pytest.mark.xfail()
-def test_enkf(get_local_testdata, salmon_hru, tmp_path):
+def test_enkf(salmon_hru, tmp_path, yangtze):
     """Test one run of Ensemble Kalman Filter data assimilation."""
     cls = GR4JCN
     # name = "GR4JCN"
     data_type = ["RAINFALL", "TEMP_MIN", "TEMP_MAX", "SNOWFALL"]
 
-    salmon_file = get_local_testdata(
+    salmon_file = yangtze.fetch(
         "raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc"
     )
 
diff --git a/tests/test_external_dataset_access.py b/tests/test_external_dataset_access.py
index fb1ee1a1..b29fa5a9 100644
--- a/tests/test_external_dataset_access.py
+++ b/tests/test_external_dataset_access.py
@@ -7,12 +7,7 @@
 import xarray
 
 from ravenpy.extractors.forecasts import get_CASPAR_dataset, get_ECCC_dataset
-from ravenpy.utilities.testdata import (
-    _default_cache_dir,
-    get_file,
-    open_dataset,
-    query_folder,
-)
+from ravenpy.testing.utils import default_testdata_cache, open_dataset, yangtze
 
 
 @pytest.mark.online
@@ -34,7 +29,7 @@ def test_get_ECCC_dataset(self):
 class TestRemoteFileAccess:
     dap_url = "http://test.opendap.org:80/opendap/data/nc/"
     git_url = "https://github.com/Ouranosinc/raven-testdata"
-    branch = "master"
+    branch = "main"
 
     @pytest.mark.xfail(
         raises=urllib.error.URLError,
@@ -42,22 +37,24 @@ class TestRemoteFileAccess:
         strict=False,
     )
     def test_get_file_default_cache(self):
-        file = get_file(name="ostrich-hbvec/raven-hbvec-salmon.rvi", branch=self.branch)
+        file = yangtze(branch=self.branch).fetch(
+            fname="ostrich-hbvec/raven-hbvec-salmon.rvi"
+        )
 
-        assert Path(_default_cache_dir).exists()
-        assert file.is_file()
-        with file.open() as f:
+        assert Path(default_testdata_cache).exists()
+        assert Path(file).is_file()
+        with Path(file).open() as f:
             header = f.read()
             assert ":FileType          rvi ASCII Raven 2.8.2" in header
 
     def test_open_dataset(self):
         ds = open_dataset(
             name="raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc",
-            branch=self.branch,
+            yangtze_kwargs={"branch": self.branch},
         )
 
         assert (
-            Path(_default_cache_dir)
+            Path(default_testdata_cache)
             .joinpath(
                 self.branch,
                 "raven-gr4j-cemaneige",
@@ -66,64 +63,3 @@ def test_open_dataset(self):
             .exists()
         )
         assert isinstance(ds, xarray.Dataset)
-
-    def test_open_dataset_false_cache(self):
-        ds = open_dataset(
-            name="raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily_3d.nc",
-            branch=self.branch,
-            cache=False,
-        )
-
-        assert (
-            not Path(_default_cache_dir)
-            .joinpath(
-                "raven-gr4j-cemaneige",
-                "Salmon-River-Near-Prince-George_meteo_daily_3d.nc",
-            )
-            .exists()
-        )
-        assert isinstance(ds, xarray.Dataset)
-
-    @pytest.mark.xfail(
-        raises=OSError, reason="test.opendap.org is offline", strict=False
-    )
-    def test_dap_access(self):
-        ds = open_dataset(
-            name="20070917-MODIS_A-JPL-L2P-A2007260000000.L2_LAC_GHRSST-v01.nc",
-            dap_url=self.dap_url,
-        )
-
-        assert isinstance(ds, xarray.Dataset)
-
-
-@pytest.mark.online
-class TestQueryFolder:
-    git_url = "https://github.com/Ouranosinc/raven-testdata"
-    branch = "master"
-
-    @pytest.mark.xfail(reason="Query folder is API rate limited", strict=False)
-    def test_query_specific_folder(self):
-        folder = query_folder(folder="raven-gr4j-cemaneige", branch=self.branch)
-        assert len(folder) == 8
-
-    @pytest.mark.xfail(reason="Query folder is API rate limited", strict=False)
-    def test_query_folder_patterns(self):
-        mohyse = query_folder(
-            folder="/regionalisation_data/tests/", pattern="MOHYSE", branch=self.branch
-        )
-        assert len(mohyse) == 1
-        assert mohyse[0] == str(
-            Path("regionalisation_data", "tests", "MOHYSE_parameters.csv")
-        )
-
-    @pytest.mark.xfail(reason="Query folder is API rate limited", strict=False)
-    def test_query_folder_patterns_excessive_slashes(self):
-        mohyse = query_folder(
-            folder="///regionalisation_data/////tests///",
-            pattern="MOHYSE",
-            branch=self.branch,
-        )
-        assert len(mohyse) == 1
-        assert mohyse[0] == str(
-            Path("regionalisation_data", "tests", "MOHYSE_parameters.csv")
-        )
diff --git a/tests/test_extractor.py b/tests/test_extractor.py
index b822e8bc..0d65ff85 100644
--- a/tests/test_extractor.py
+++ b/tests/test_extractor.py
@@ -2,8 +2,8 @@
 from ravenpy.extractors.routing_product import BasinMakerExtractor, open_shapefile
 
 
-def test_basinmaker_extractor(get_local_testdata, tmp_path):
-    routing_product_shp_path = get_local_testdata(
+def test_basinmaker_extractor(tmp_path, yangtze):
+    routing_product_shp_path = yangtze.fetch(
         "basinmaker/drainage_region_0175_v2-1/finalcat_info_v2-1.zip"
     )
     df = open_shapefile(
diff --git a/tests/test_forecasting.py b/tests/test_forecasting.py
index 407a20eb..14ba5c82 100644
--- a/tests/test_forecasting.py
+++ b/tests/test_forecasting.py
@@ -30,7 +30,7 @@ def test_climatology_esp(minimal_emulator, tmp_path):
     assert len(esp.hydrograph.time) == minimal_emulator.duration + 1
 
 
-def test_hindcast_climatology_esp(minimal_emulator, tmp_path, get_local_testdata):
+def test_hindcast_climatology_esp(minimal_emulator, tmp_path, yangtze):
     config = minimal_emulator.model_copy(deep=True)
     hc = hindcast_climatology_esp(
         config,
@@ -47,7 +47,7 @@ def test_hindcast_climatology_esp(minimal_emulator, tmp_path, get_local_testdata
     }
 
     # Construct climpred HindcastEnsemble
-    salmon_file = get_local_testdata(
+    salmon_file = yangtze.fetch(
         "raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc"
     )
     qobs = xr.open_dataset(salmon_file).qobs
@@ -64,13 +64,13 @@ def test_hindcast_climatology_esp(minimal_emulator, tmp_path, get_local_testdata
     assert rank_histo_verif.q_sim.shape[0] == config.duration + 1
 
 
-def test_forecasting_GEPS(numeric_config, get_local_testdata):
+def test_forecasting_GEPS(numeric_config, yangtze):
     """Test to perform a forecast using auto-queried ECCC data aggregated on THREDDS."""
     name, wup = numeric_config
     if name != "GR4JCN":
         pytest.skip("Test only for GR4JCN model.")
 
-    geps = get_local_testdata("eccc_forecasts/geps_watershed.nc")
+    geps = yangtze.fetch("eccc_forecasts/geps_watershed.nc")
 
     # Prepare a RAVEN model run using historical data, GR4JCN in this case.
     # This is a dummy run to get initial states. In a real forecast situation,
diff --git a/tests/test_geo_utilities.py b/tests/test_geo_utilities.py
index eeb1fa18..169bf832 100644
--- a/tests/test_geo_utilities.py
+++ b/tests/test_geo_utilities.py
@@ -28,33 +28,31 @@ def test_circular_mean_aspect(self):
             self.analysis.circular_mean_aspect(southwest_angles), 191.88055987
         )
 
-    def test_address_append(self, get_local_testdata):
+    def test_address_append(self, yangtze):
         non_existing_tarred_file = "polygons.tar"
 
-        assert "zip://" in self.io.address_append(
-            get_local_testdata(zipped_geojson_file)
-        )
+        assert "zip://" in self.io.address_append(yangtze.fetch(zipped_geojson_file))
         assert "tar://" in self.io.address_append(non_existing_tarred_file)
-        assert not self.io.address_append(get_local_testdata(geojson_file)).startswith(
+        assert not self.io.address_append(yangtze.fetch(geojson_file)).startswith(
             ("zip://", "tar://")
         )
 
-    def test_archive_sniffer(self, tmp_path, get_local_testdata):
-        probable_shp = self.io.archive_sniffer(get_local_testdata(zipped_geojson_file))
+    def test_archive_sniffer(self, tmp_path, yangtze):
+        probable_shp = self.io.archive_sniffer(yangtze.fetch(zipped_geojson_file))
         assert Path(probable_shp[0]).name == "mars.shp"
 
         probable_shp = self.io.archive_sniffer(
-            get_local_testdata(zipped_geojson_file),
+            yangtze.fetch(zipped_geojson_file),
             working_dir=tmp_path,
         )
         assert Path(probable_shp[0]).name == "mars.shp"
 
-    def test_archive_extract(self, tmp_path, get_local_testdata):
-        zipped_file = get_local_testdata(zipped_geojson_file)
+    def test_archive_extract(self, tmp_path, yangtze):
+        zipped_file = yangtze.fetch(zipped_geojson_file)
 
-        assert zipped_file.exists()
+        assert Path(zipped_file).exists()
 
-        files = list()
+        files = []
         with tempfile.TemporaryDirectory(dir=tmp_path) as tdir:
             files.extend(self.io.generic_extract_archive(zipped_file, output_dir=tdir))
             assert len(files) == 5
@@ -72,16 +70,16 @@ class TestFileInfoFuncs:
 
     non_existing_file = "unreal.zip"
 
-    def test_raster_datatype_sniffer(self, get_local_testdata):
-        datatype = self.io.raster_datatype_sniffer(get_local_testdata(raster_file))
+    def test_raster_datatype_sniffer(self, yangtze):
+        datatype = self.io.raster_datatype_sniffer(yangtze.fetch(raster_file))
         assert datatype.lower() == "uint8"
 
-    def test_crs_sniffer(self, get_local_testdata):
-        assert self.io.crs_sniffer(get_local_testdata(zipped_geojson_file)) == 4326
+    def test_crs_sniffer(self, yangtze):
+        assert self.io.crs_sniffer(yangtze.fetch(zipped_geojson_file)) == 4326
         assert set(
             self.io.crs_sniffer(
-                get_local_testdata(geojson_file),
-                get_local_testdata(raster_file),
+                yangtze.fetch(geojson_file),
+                yangtze.fetch(raster_file),
             )
         ) == {4326}
 
@@ -98,10 +96,10 @@ def test_single_file_check(self):
         with pytest.raises(NotImplementedError):
             self.checks.single_file_check(three)
 
-    def test_boundary_check(self, recwarn, get_local_testdata):
+    def test_boundary_check(self, recwarn, yangtze):
         # NOTE: does not presently accept zipped files.
-        geojson = get_local_testdata(geojson_file)
-        raster = get_local_testdata(raster_file)
+        geojson = yangtze.fetch(geojson_file)
+        raster = yangtze.fetch(raster_file)
 
         self.checks.boundary_check(raster, max_y=85.5)
         assert len(recwarn) == 0
@@ -122,10 +120,8 @@ class TestGdalOgrFunctions:
     fiona = pytest.importorskip("fiona")
     sgeo = pytest.importorskip("shapely.geometry")
 
-    def test_gdal_aspect_not_projected(self, tmp_path, get_local_testdata):
-        aspect_grid = self.analysis.gdal_aspect_analysis(
-            get_local_testdata(raster_file)
-        )
+    def test_gdal_aspect_not_projected(self, tmp_path, yangtze):
+        aspect_grid = self.analysis.gdal_aspect_analysis(yangtze.fetch(raster_file))
         np.testing.assert_almost_equal(
             self.analysis.circular_mean_aspect(aspect_grid), 10.91190, decimal=5
         )
@@ -135,7 +131,7 @@ def test_gdal_aspect_not_projected(self, tmp_path, get_local_testdata):
             prefix="aspect_", suffix=".tiff", delete=False, dir=tmp_path
         ).name
         aspect_grid = self.analysis.gdal_aspect_analysis(
-            get_local_testdata(raster_file),
+            yangtze.fetch(raster_file),
             set_output=aspect_tempfile,
         )
         np.testing.assert_almost_equal(
@@ -147,8 +143,8 @@ def test_gdal_aspect_not_projected(self, tmp_path, get_local_testdata):
     @pytest.mark.xfail(
         reason="Console commands have been modified in GDAL 3.11+", strict=False
     )
-    def test_gdal_slope_not_projected(self, tmp_path, get_local_testdata):
-        slope_grid = self.analysis.gdal_slope_analysis(get_local_testdata(raster_file))
+    def test_gdal_slope_not_projected(self, tmp_path, yangtze):
+        slope_grid = self.analysis.gdal_slope_analysis(yangtze.fetch(raster_file))
         np.testing.assert_almost_equal(slope_grid.min(), 0.0)
         np.testing.assert_almost_equal(slope_grid.mean(), 64.43654, decimal=5)
         np.testing.assert_almost_equal(slope_grid.max(), 89.71747, decimal=5)
@@ -157,7 +153,7 @@ def test_gdal_slope_not_projected(self, tmp_path, get_local_testdata):
             prefix="slope_", suffix=".tiff", delete=False, dir=tmp_path
         ).name
         slope_grid = self.analysis.gdal_slope_analysis(
-            get_local_testdata(raster_file),
+            yangtze.fetch(raster_file),
             set_output=slope_tempfile,
         )
         np.testing.assert_almost_equal(slope_grid.mean(), 64.4365427)
@@ -167,18 +163,18 @@ def test_gdal_slope_not_projected(self, tmp_path, get_local_testdata):
     @pytest.mark.xfail(
         reason="Console commands have been modified in GDAL 3.11+", strict=False
     )
-    def test_dem_properties(self, get_local_testdata):
-        dem_properties = self.analysis.dem_prop(get_local_testdata(raster_file))
+    def test_dem_properties(self, yangtze):
+        dem_properties = self.analysis.dem_prop(yangtze.fetch(raster_file))
         np.testing.assert_almost_equal(dem_properties["aspect"], 10.91190, decimal=5)
         np.testing.assert_almost_equal(dem_properties["elevation"], 79.0341, decimal=4)
         np.testing.assert_almost_equal(dem_properties["slope"], 64.43654, decimal=5)
 
-        with self.fiona.open(get_local_testdata(geojson_file)) as gj:
+        with self.fiona.open(yangtze.fetch(geojson_file)) as gj:
             feature = next(iter(gj))
             geom = self.sgeo.shape(feature["geometry"])
 
         region_dem_properties = self.analysis.dem_prop(
-            get_local_testdata(raster_file), geom=geom
+            yangtze.fetch(raster_file), geom=geom
         )
         np.testing.assert_almost_equal(
             region_dem_properties["aspect"], 280.681, decimal=3
@@ -191,8 +187,8 @@ def test_dem_properties(self, get_local_testdata):
         )
 
     # Slope values are high due to data values using Geographic CRS
-    def test_geom_properties(self, get_local_testdata):
-        with self.fiona.open(get_local_testdata(geojson_file)) as gj:
+    def test_geom_properties(self, yangtze):
+        with self.fiona.open(yangtze.fetch(geojson_file)) as gj:
             iterable = iter(gj)
             feature_1 = next(iterable)
             feature_2 = next(iterable)
@@ -224,13 +220,13 @@ class TestGenericGeoOperations:
     rasterio = pytest.importorskip("rasterio")
     sgeo = pytest.importorskip("shapely.geometry")
 
-    def test_vector_reprojection(self, tmp_path, get_local_testdata):
+    def test_vector_reprojection(self, tmp_path, yangtze):
         # TODO: It would be awesome if this returned a temporary filepath if no file given.
         reproj_file = tempfile.NamedTemporaryFile(
             prefix="reproj_", suffix=".geojson", delete=False, dir=tmp_path
         ).name
         self.geo.generic_vector_reproject(
-            get_local_testdata(geojson_file),
+            yangtze.fetch(geojson_file),
             projected=reproj_file,
             target_crs="EPSG:3348",
         )
@@ -250,14 +246,14 @@ def test_vector_reprojection(self, tmp_path, get_local_testdata):
         np.testing.assert_almost_equal(geom_properties["perimeter"], 9194343.1759303)
         np.testing.assert_almost_equal(geom_properties["gravelius"], 1.0212589)
 
-    def test_raster_warp(self, tmp_path, get_local_testdata):
+    def test_raster_warp(self, tmp_path, yangtze):
         # TODO: It would be awesome if this returned a temporary filepath if no file given.
         # TODO: either use `output` or `reprojected/warped` for these functions.
         reproj_file = tempfile.NamedTemporaryFile(
             prefix="reproj_", suffix=".tiff", delete=False, dir=tmp_path
         ).name
         self.geo.generic_raster_warp(
-            get_local_testdata(raster_file),
+            yangtze.fetch(raster_file),
             output=reproj_file,
             target_crs="EPSG:3348",
         )
@@ -275,12 +271,12 @@ def test_raster_warp(self, tmp_path, get_local_testdata):
             assert data.max() == 255
             np.testing.assert_almost_equal(data.mean(), 60.747, 3)
 
-    def test_warped_raster_slope(self, tmp_path, get_local_testdata):
+    def test_warped_raster_slope(self, tmp_path, yangtze):
         reproj_file = tempfile.NamedTemporaryFile(
             prefix="reproj_", suffix=".tiff", delete=False, dir=tmp_path
         ).name
         self.geo.generic_raster_warp(
-            get_local_testdata(raster_file),
+            yangtze.fetch(raster_file),
             output=reproj_file,
             target_crs="EPSG:3348",
         )
@@ -290,12 +286,12 @@ def test_warped_raster_slope(self, tmp_path, get_local_testdata):
         np.testing.assert_almost_equal(slope_grid.mean(), 0.0035, 2)
         np.testing.assert_almost_equal(slope_grid.max(), 0.35, 2)
 
-    def test_warped_raster_aspect(self, tmp_path, get_local_testdata):
+    def test_warped_raster_aspect(self, tmp_path, yangtze):
         reproj_file = tempfile.NamedTemporaryFile(
             prefix="reproj_", suffix=".tiff", delete=False, dir=tmp_path
         ).name
         self.geo.generic_raster_warp(
-            get_local_testdata(raster_file),
+            yangtze.fetch(raster_file),
             output=reproj_file,
             target_crs="EPSG:3348",
         )
@@ -305,8 +301,8 @@ def test_warped_raster_aspect(self, tmp_path, get_local_testdata):
             self.analysis.circular_mean_aspect(aspect_grid), 7.7397, decimal=3
         )
 
-    def test_raster_clip(self, tmp_path, get_local_testdata):
-        with self.fiona.open(get_local_testdata(geojson_file)) as gj:
+    def test_raster_clip(self, tmp_path, yangtze):
+        with self.fiona.open(yangtze.fetch(geojson_file)) as gj:
             feature = next(iter(gj))
             geom = self.sgeo.shape(feature["geometry"])
 
@@ -314,7 +310,7 @@ def test_raster_clip(self, tmp_path, get_local_testdata):
             prefix="reproj_", suffix=".tiff", delete=False, dir=tmp_path
         ).name
         self.geo.generic_raster_clip(
-            get_local_testdata(raster_file),
+            yangtze.fetch(raster_file),
             clipped_file,
             geometry=geom,
         )
@@ -327,8 +323,8 @@ def test_raster_clip(self, tmp_path, get_local_testdata):
             assert data.max() == 255
             np.testing.assert_almost_equal(data.mean(), 102.8222965)
 
-    def test_shapely_pyproj_transform(self, get_local_testdata):
-        with self.fiona.open(get_local_testdata(geojson_file)) as gj:
+    def test_shapely_pyproj_transform(self, yangtze):
+        with self.fiona.open(yangtze.fetch(geojson_file)) as gj:
             feature = next(iter(gj))
             geom = self.sgeo.shape(feature["geometry"])
 
@@ -347,8 +343,8 @@ class TestGIS:
     io = pytest.importorskip("ravenpy.utilities.io")
     sgeo = pytest.importorskip("shapely.geometry")
 
-    def test_get_bbox_single(self, get_local_testdata):
-        vector = get_local_testdata(geojson_file)
+    def test_get_bbox_single(self, yangtze):
+        vector = yangtze.fetch(geojson_file)
 
         w, s, n, e = self.io.get_bbox(vector, all_features=False)
         np.testing.assert_almost_equal(w, -139.8514262)
@@ -356,8 +352,8 @@ def test_get_bbox_single(self, get_local_testdata):
         np.testing.assert_almost_equal(n, -117.4753973)
         np.testing.assert_almost_equal(e, 29.6327068)
 
-    def test_get_bbox_all(self, get_local_testdata):
-        vector = get_local_testdata(geojson_file)
+    def test_get_bbox_all(self, yangtze):
+        vector = yangtze.fetch(geojson_file)
 
         w, s, n, e = self.io.get_bbox(vector)
         np.testing.assert_almost_equal(w, -139.8514262)
@@ -365,8 +361,8 @@ def test_get_bbox_all(self, get_local_testdata):
         np.testing.assert_almost_equal(n, -38.7397456)
         np.testing.assert_almost_equal(e, 64.1757015)
 
-    def test_feature_contains(self, get_local_testdata):
-        vector = get_local_testdata(geojson_file)
+    def test_feature_contains(self, yangtze):
+        vector = yangtze.fetch(geojson_file)
 
         point = -69.0, 45
         assert isinstance(self.checks.feature_contains(point, vector), dict)
diff --git a/tests/test_geoserver.py b/tests/test_geoserver.py
index 42b7222f..40502e23 100644
--- a/tests/test_geoserver.py
+++ b/tests/test_geoserver.py
@@ -178,7 +178,7 @@ class TestWCS:
 
     saskatoon = "polygons/Saskatoon.geojson"
 
-    def test_get_raster_wcs(self, tmp_path, get_local_testdata):
+    def test_get_raster_wcs(self, tmp_path, yangtze):
         # TODO: This CRS needs to be redefined using modern pyproj-compatible strings.
         nalcms_crs = "+proj=laea +lat_0=45 +lon_0=-100 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs=True"
 
@@ -186,7 +186,7 @@ def test_get_raster_wcs(self, tmp_path, get_local_testdata):
             prefix="reprojected_", suffix=".json", dir=tmp_path
         ) as projected:
             self.geo.generic_vector_reproject(
-                get_local_testdata(self.saskatoon),
+                yangtze.fetch(self.saskatoon),
                 projected.name,
                 target_crs=nalcms_crs,
             )
diff --git a/tests/test_graphs.py b/tests/test_graphs.py
index cfff710a..555e1ab4 100644
--- a/tests/test_graphs.py
+++ b/tests/test_graphs.py
@@ -9,8 +9,8 @@
 
 
 class TestGraph:
-    def test_ts_fit_graph(self, get_local_testdata, tmp_path):
-        raven_hydrograph = get_local_testdata(
+    def test_ts_fit_graph(self, tmp_path, yangtze):
+        raven_hydrograph = yangtze.fetch(
             "hydro_simulations/raven-gr4j-cemaneige-sim_hmets-0_Hydrographs.nc",
         )
         file = tmp_path / "raven-gr4j-cemaneige-sim_hmets-0_Hydrographs.nc"
diff --git a/tests/test_hindcasting.py b/tests/test_hindcasting.py
index a7f980bc..3c31b2f4 100644
--- a/tests/test_hindcasting.py
+++ b/tests/test_hindcasting.py
@@ -1,4 +1,5 @@
 import datetime as dt
+import pathlib
 import sys
 
 import pytest
@@ -22,8 +23,8 @@
 
 
 class TestHindcasting:
-    def test_hindcasting_GEPS(self, get_local_testdata, salmon_hru, tmp_path):
-        ts20 = get_local_testdata("caspar_eccc_hindcasts/geps_watershed.nc")
+    def test_hindcasting_GEPS(self, salmon_hru, tmp_path, yangtze):
+        ts20 = yangtze.fetch("caspar_eccc_hindcasts/geps_watershed.nc")
 
         hru = salmon_hru["land"]
         data_kwds = {
@@ -91,9 +92,11 @@ def test_hindcasting_GEPS(self, get_local_testdata, salmon_hru, tmp_path):
         (3, 11) > sys.version_info >= (3, 10),
         reason="climpred is unstable in Python 3.10",
     )
-    def test_climpred_hindcast_verif(self, get_local_testdata, salmon_hru, tmp_path):
-        ts = get_local_testdata(
-            "raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc"
+    def test_climpred_hindcast_verif(self, salmon_hru, tmp_path, yangtze):
+        ts = pathlib.Path(
+            yangtze.fetch(
+                "raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc"
+            )
         )
         # Make a local copy to evade double-ownership of file - first file
         ts_tmp1 = tmp_path / "salmon_river_near_prince_george-tmp1.nc"
diff --git a/tests/test_nb_graphs.py b/tests/test_nb_graphs.py
index 2cf697f0..10e6e1d9 100644
--- a/tests/test_nb_graphs.py
+++ b/tests/test_nb_graphs.py
@@ -8,22 +8,22 @@ class TestNBGraphs:
 
     nbg = pytest.importorskip("ravenpy.utilities.nb_graphs")
 
-    def test_hydrograph(self, get_local_testdata):
-        with xr.open_dataset(get_local_testdata(self.hydrographs)) as ds:
+    def test_hydrograph(self, yangtze):
+        with xr.open_dataset(yangtze.fetch(self.hydrographs)) as ds:
             self.nbg.hydrographs(ds)
 
-    def test_mean_annual_hydrograph(self, get_local_testdata):
-        with xr.open_dataset(get_local_testdata(self.hydrographs)) as ds:
+    def test_mean_annual_hydrograph(self, yangtze):
+        with xr.open_dataset(yangtze.fetch(self.hydrographs)) as ds:
             self.nbg.mean_annual_hydrograph(ds)
 
-    def test_spaghetti_annual_hydrograph(self, get_local_testdata):
-        with xr.open_dataset(get_local_testdata(self.hydrographs)) as ds:
+    def test_spaghetti_annual_hydrograph(self, yangtze):
+        with xr.open_dataset(yangtze.fetch(self.hydrographs)) as ds:
             self.nbg.spaghetti_annual_hydrograph(ds)
 
-    def test_ts_fit_graph(self, get_local_testdata):
+    def test_ts_fit_graph(self, yangtze):
         from xclim.indicators.generic import fit, stats
 
-        with xr.open_dataset(get_local_testdata(self.hydrographs)) as ds:
+        with xr.open_dataset(yangtze.fetch(self.hydrographs)) as ds:
             ts = stats(ds.q_sim.load(), op="max", freq="ME")
         with set_options(check_missing="skip"):
             params = fit(ts, dist="gamma")
diff --git a/tests/test_ravenpy.py b/tests/test_ravenpy.py
index bd798ebd..ccec5cf4 100644
--- a/tests/test_ravenpy.py
+++ b/tests/test_ravenpy.py
@@ -26,7 +26,7 @@ def test_ensemble_reader(gr4jcn_config, tmp_path):
     ens = EnsembleReader(runs=runs, dim="parameters")
     assert len(ens.hydrograph.parameters) == 2
 
-    # Create list of output paths using glob
+    # Create a list of output paths using glob
     paths = p.glob("**/output")
 
     ens = EnsembleReader(paths=paths, dim="parameters")
@@ -39,6 +39,7 @@ def test_package_metadata():
 
     assert project is not None
     assert project.submodule_search_locations is not None
+
     location = project.submodule_search_locations[0]
 
     metadata = pathlib.Path(location).resolve().joinpath("__init__.py")
diff --git a/tests/test_regionalisation.py b/tests/test_regionalisation.py
index 5f8ab81a..7f2897c9 100644
--- a/tests/test_regionalisation.py
+++ b/tests/test_regionalisation.py
@@ -11,7 +11,7 @@
 
 
 class TestRegionalization:
-    def test_full_example(self, symbolic_config, get_local_testdata):
+    def test_full_example(self, symbolic_config):
         name, config = symbolic_config
         method = "SP_IDW"
 
diff --git a/tests/test_rvs.py b/tests/test_rvs.py
index d142cbb0..d6d57195 100644
--- a/tests/test_rvs.py
+++ b/tests/test_rvs.py
@@ -1,5 +1,5 @@
 import datetime as dt
-from collections.abc import Sequence
+import pathlib
 from typing import Union
 
 import cftime
@@ -83,8 +83,8 @@ class MySymbolicEmulator(Config):
     assert num.rain_snow_transition.temp == 0.5
 
 
-def test_solution(get_local_testdata):
-    sol = get_local_testdata("gr4j_cemaneige/solution.rvc")
+def test_solution(yangtze):
+    sol = pathlib.Path(yangtze.fetch("gr4j_cemaneige/solution.rvc"))
     conf = Config().set_solution(sol)
     assert len(conf.hru_state_variable_table) == 1
     assert conf.hru_state_variable_table[0].data["ATMOSPHERE"] == 821.98274
@@ -97,12 +97,10 @@ def test_solution(get_local_testdata):
     assert ":BasinIndex 1 watershed" in conf.rvc
 
 
-def test_rvh_from_extractor(get_local_testdata):
+def test_rvh_from_extractor(yangtze):
     from ravenpy.extractors import BasinMakerExtractor, open_shapefile
 
-    shp = get_local_testdata(
-        "basinmaker/drainage_region_0175_v2-1/finalcat_info_v2-1.zip"
-    )
+    shp = yangtze.fetch("basinmaker/drainage_region_0175_v2-1/finalcat_info_v2-1.zip")
     bm = BasinMakerExtractor(open_shapefile(shp))
 
     # Smoke test
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 9e8ff7aa..1c24f40b 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -3,8 +3,8 @@
 from ravenpy.config.utils import nc_specs
 
 
-def test_nc_specs(get_local_testdata):
-    f = get_local_testdata(
+def test_nc_specs(yangtze):
+    f = yangtze.fetch(
         "raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc"
     )
     attrs = nc_specs(f, "PRECIP", station_idx=1, alt_names=("rain",))
diff --git a/tox.ini b/tox.ini
index ec1c41a2..dccf2d87 100644
--- a/tox.ini
+++ b/tox.ini
@@ -53,7 +53,7 @@ passenv =
     GDAL_VERSION
     GITHUB_*
     LD_LIBRARY_PATH
-    RAVENPY_*
+    RAVEN_*
     UPSTREAM_BRANCH
 extras =
     dev

From 69ac0670917d27bfcd1cd2cc54aded098d7754df Mon Sep 17 00:00:00 2001
From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com>
Date: Tue, 10 Jun 2025 13:46:34 -0400
Subject: [PATCH 05/29] test adjustments, add pooch

---
 environment-dev.yml                   | 1 +
 pyproject.toml                        | 1 +
 tests/test_commands.py                | 2 +-
 tests/test_external_dataset_access.py | 5 +++++
 tests/test_utils.py                   | 9 +++++++++
 5 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/environment-dev.yml b/environment-dev.yml
index c9d2605a..aa82e9d2 100644
--- a/environment-dev.yml
+++ b/environment-dev.yml
@@ -57,6 +57,7 @@ dependencies:
   - isort ==6.0.0
   - mypy >=1.14.1
   - numpydoc >=1.8.0
+  - pooch >=1.8.0
   - pre-commit >=3.5.0
   - pylint >=3.3.0
   - pytest >=8.2.2
diff --git a/pyproject.toml b/pyproject.toml
index 88bd1795..6f00cf4c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -80,6 +80,7 @@ dev = [
   "nbval",
   "numpydoc >=1.8.0",
   "pip >=25.0",
+  "pooch >=1.8.0",
   "pre-commit >=3.5.0",
   "pylint >=3.3.0",
   "pytest >=8.3.2",
diff --git a/tests/test_commands.py b/tests/test_commands.py
index 46eef5b9..ce30fa0f 100644
--- a/tests/test_commands.py
+++ b/tests/test_commands.py
@@ -469,7 +469,7 @@ class Test(RV):
         )
 
     t = Test(gw=f)
-    assert t.gw.root == f
+    assert t.gw.root == pathlib.Path(f)
 
 
 def test_subbasin_properties():
diff --git a/tests/test_external_dataset_access.py b/tests/test_external_dataset_access.py
index b29fa5a9..e97f7eee 100644
--- a/tests/test_external_dataset_access.py
+++ b/tests/test_external_dataset_access.py
@@ -47,6 +47,11 @@ def test_get_file_default_cache(self):
             header = f.read()
             assert ":FileType          rvi ASCII Raven 2.8.2" in header
 
+    @pytest.mark.xfail(
+        raises=urllib.error.HTTPError,
+        reason="Transitory failures expected",
+        strict=False,
+    )
     def test_open_dataset(self):
         ds = open_dataset(
             name="raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc",
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 1c24f40b..16f4017b 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,7 +1,13 @@
 import pytest
+from packaging.version import Version
+from pydap import __version__ as __pydap_version__
 
 from ravenpy.config.utils import nc_specs
 
+older_pydap = False
+if Version(__pydap_version__) < Version("3.5.5"):
+    older_pydap = True
+
 
 def test_nc_specs(yangtze):
     f = yangtze.fetch(
@@ -18,6 +24,9 @@ def test_nc_specs_bad(bad_netcdf):
 
 
 @pytest.mark.online
+@pytest.mark.skipif(
+    older_pydap, reason="pydap version 3.5.5 is required for this test", strict=False
+)
 def test_dap_specs():
     # Link to THREDDS Data Server netCDF testdata
     tds = "https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/testdata/raven"

From 677bcf7feaf25fa7dd94fa71e02f364fc2acc2a2 Mon Sep 17 00:00:00 2001
From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com>
Date: Tue, 10 Jun 2025 13:58:55 -0400
Subject: [PATCH 06/29] update pre-commit, fix pydantic regression

---
 .github/workflows/main.yml |  2 +-
 .pre-commit-config.yaml    | 13 +++++++------
 tests/conftest.py          |  2 +-
 tests/emulators.py         | 22 ----------------------
 4 files changed, 9 insertions(+), 30 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 3e82d3b8..924e40df 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -7,7 +7,7 @@ on:
   pull_request:
 
 env:
-  RAVEN_TESTING_DATA_BRANCH: master
+  RAVEN_TESTDATA_BRANCH: new-system
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 0a98647a..577fd043 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -7,6 +7,7 @@ repos:
     hooks:
       - id: pyupgrade
         args: [ '--py39-plus' ]
+        exclude: ^tests/conftest\.py$
   - repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v5.0.0
     hooks:
@@ -50,9 +51,9 @@ repos:
     hooks:
       - id: isort
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.11.12
+    rev: v0.11.13
     hooks:
-      - id: ruff
+      - id: ruff-check
         args: [ '--fix' ]
       # - id: ruff-format
   - repo: https://github.com/pycqa/flake8
@@ -74,11 +75,11 @@ repos:
     hooks:
       - id: nbqa-pyupgrade
         args: [ '--py39-plus' ]
-        additional_dependencies: [ 'pyupgrade==3.19.1' ]
+        additional_dependencies: [ 'pyupgrade==3.20.0' ]
       - id: nbqa-black
         additional_dependencies: [ 'black==25.1.0' ]
       - id: nbqa-isort
-        additional_dependencies: [ 'isort==6.0.0' ]
+        additional_dependencies: [ 'isort==6.0.1' ]
   - repo: https://github.com/kynan/nbstripout
     rev: 0.8.1
     hooks:
@@ -86,7 +87,7 @@ repos:
         files: ".ipynb"
         args: [ '--extra-keys=metadata.kernelspec' ]
   - repo: https://github.com/keewis/blackdoc
-    rev: v0.3.9
+    rev: v0.3.10
     hooks:
       - id: blackdoc
         additional_dependencies: [ 'black==25.1.0' ]
@@ -113,7 +114,7 @@ repos:
       - id: zizmor
         args: [ '--config=.zizmor.yml' ]
   - repo: https://github.com/gitleaks/gitleaks
-    rev: v8.27.0
+    rev: v8.27.2
     hooks:
       - id: gitleaks
   - repo: meta
diff --git a/tests/conftest.py b/tests/conftest.py
index 92e0f60d..391bc858 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -177,7 +177,7 @@ class P(Params):
     class TestConfig(Config):
         params: P = P()
         calendar: o.Calendar = Field("JULIAN", alias="Calendar")
-        air_snow_coeff: Sym | None = Field(1 - P.X1, alias="AirSnowCoeff")
+        air_snow_coeff: Optional[Sym] = Field(1 - P.X1, alias="AirSnowCoeff")
 
     return TestConfig, P
 
diff --git a/tests/emulators.py b/tests/emulators.py
index c2e4ae59..643935e4 100644
--- a/tests/emulators.py
+++ b/tests/emulators.py
@@ -363,25 +363,3 @@ def config_rv(tmp_path_factory, numeric_config):
     out = tmp_path_factory.mktemp(name) / "config"
     conf.write_rv(out)
     yield name, out
-
-
-@pytest.fixture
-def dummy_config():
-    """Return an almost empty config class and the parameter dataclass."""
-    from pydantic import Field
-    from pydantic.dataclasses import dataclass
-
-    from ravenpy.config import options as o
-    from ravenpy.config.base import Sym, SymConfig, Variable
-    from ravenpy.config.rvs import Config
-
-    @dataclass(config=SymConfig)
-    class P:
-        X1: Sym = Variable("X1")
-
-    class TestConfig(Config):
-        params: P = P()
-        calendar: o.Calendar = Field("JULIAN", alias="Calendar")
-        air_snow_coeff: Sym = Field(1 - P.X1, alias="AirSnowCoeff")
-
-    return TestConfig, P

From f1c38865f3b7776954adf25c89585e9a3fe5fcdc Mon Sep 17 00:00:00 2001
From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com>
Date: Tue, 10 Jun 2025 14:28:31 -0400
Subject: [PATCH 07/29] rebuild dataclass

---
 tests/conftest.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 391bc858..4f48df18 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -164,7 +164,7 @@ def input3d(salmon, threadsafe_data_dir):
 def dummy_config():
     """Return an almost empty config class and the parameter dataclass."""
     from pydantic import Field
-    from pydantic.dataclasses import dataclass
+    from pydantic.dataclasses import dataclass, rebuild_dataclass
 
     from ravenpy.config import options as o
     from ravenpy.config.base import Params, Sym, SymConfig, Variable
@@ -174,10 +174,12 @@ def dummy_config():
     class P(Params):
         X1: Sym = Variable("X1")
 
+    rebuild_dataclass(P)
+
     class TestConfig(Config):
         params: P = P()
         calendar: o.Calendar = Field("JULIAN", alias="Calendar")
-        air_snow_coeff: Optional[Sym] = Field(1 - P.X1, alias="AirSnowCoeff")
+        air_snow_coeff: Sym = Field(1 - P.X1, alias="AirSnowCoeff")
 
     return TestConfig, P
 

From a5f09547af8b4321104b44ca727e9dc5cf84d70f Mon Sep 17 00:00:00 2001
From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com>
Date: Tue, 10 Jun 2025 15:10:47 -0400
Subject: [PATCH 08/29] try adding caching

---
 .github/workflows/main.yml | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 924e40df..ae133474 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -117,6 +117,12 @@ jobs:
       - name: Install CI libraries
         run: |
           python3 -m pip install --require-hashes -r CI/requirements_ci.txt
+      - name: Testing data caching
+        uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
+        with:
+          path: |
+
+          key: ${{ hashFiles('src/ravenpy/testing/registry.txt') }}-${{ env.RAVEN_TESTDATA_BRANCH }}
       - name: Test with tox and report coverage
         run: |
           if [ "${{ matrix.tox-env }}" != "false" ]; then
@@ -187,6 +193,12 @@ jobs:
         run: |
           micromamba list
           python -m pip check || true
+      - name: Testing data caching
+        uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
+        with:
+          path: |
+            .tox
+          key: ${{ hashFiles('src/ravenpy/testing/registry.txt') }}-${{ env.RAVEN_TESTDATA_BRANCH }}
       - name: Test RavenPy
         run: |
           python -m pytest --numprocesses=logical --cov=src/ravenpy --cov-report=lcov

From 1d726d38dfb0ff6433a511ea88af2e4a4a9893da Mon Sep 17 00:00:00 2001
From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com>
Date: Tue, 10 Jun 2025 15:17:13 -0400
Subject: [PATCH 09/29] use proper location

---
 .github/workflows/main.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index ae133474..b6069310 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -121,7 +121,7 @@ jobs:
         uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
         with:
           path: |
-
+            ${{ matrix.os == 'macos-latest' && 'Library/Caches/raven-testdata' || '~/.cache/raven-testdata' }}
           key: ${{ hashFiles('src/ravenpy/testing/registry.txt') }}-${{ env.RAVEN_TESTDATA_BRANCH }}
       - name: Test with tox and report coverage
         run: |
@@ -197,7 +197,7 @@ jobs:
         uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
         with:
           path: |
-            .tox
+            ${{ matrix.os == 'macos-latest' && 'Library/Caches/raven-testdata' || '~/.cache/raven-testdata' }}
           key: ${{ hashFiles('src/ravenpy/testing/registry.txt') }}-${{ env.RAVEN_TESTDATA_BRANCH }}
       - name: Test RavenPy
         run: |

From c28490c00bde8bf2b49668f8f9b093d6bc196795 Mon Sep 17 00:00:00 2001
From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com>
Date: Tue, 10 Jun 2025 15:51:00 -0400
Subject: [PATCH 10/29] add retry logic

---
 src/ravenpy/testing/utils.py | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/src/ravenpy/testing/utils.py b/src/ravenpy/testing/utils.py
index 2dc389a2..e5c996ed 100644
--- a/src/ravenpy/testing/utils.py
+++ b/src/ravenpy/testing/utils.py
@@ -404,6 +404,7 @@ def populate_testing_data(
     temp_folder: Path | None = None,
     repo: str = TESTDATA_REPO_URL,
     branch: str = TESTDATA_BRANCH,
+    retry: int = 3,
     local_cache: Path = TESTDATA_CACHE_DIR,
 ) -> None:
     """
@@ -417,6 +418,8 @@ def populate_testing_data(
         URL of the repository to use when fetching testing datasets.
     branch : str, optional
         Branch of ravenpy-testdata to use when fetching testing datasets.
+    retry : int
+        Number of times to retry downloading the files in case of failure. Defaults to 3.
     local_cache : Path
         The path to the local cache. Defaults to the location set by the platformdirs library.
         The testing data will be downloaded to this local cache.
@@ -427,14 +430,21 @@ def populate_testing_data(
     # Download the files
     errored_files = []
     for file in load_registry():
-        try:
-            n.fetch(file)
-        except HTTPError:  # noqa: PERF203
-            msg = f"File `{file}` not accessible in remote repository."
+        msg = f"Downloading file `{file}` from remote repository..."
+        logging.info(msg)
+        for attempt in range(retry):
+            try:
+                n.fetch(file)
+            except HTTPError:  # noqa: PERF203
+                msg = f"Failed to download file `{file}` on attempt {attempt + 1}."
+                logging.info(msg)
+            else:
+                logging.info("File was downloaded successfully.")
+                break
+        else:
+            msg = f"Failed to download file `{file}` after {retry} attempts."
             logging.error(msg)
             errored_files.append(file)
-        else:
-            logging.info("Files were downloaded successfully.")
 
     if errored_files:
         logging.error(

From cc7c156dda833b7e0cd408a8ab07eb505f9dc40e Mon Sep 17 00:00:00 2001
From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com>
Date: Tue, 10 Jun 2025 15:59:45 -0400
Subject: [PATCH 11/29] separate cache folders

---
 .github/workflows/main.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index b6069310..1fcf4955 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -122,7 +122,7 @@ jobs:
         with:
           path: |
             ${{ matrix.os == 'macos-latest' && 'Library/Caches/raven-testdata' || '~/.cache/raven-testdata' }}
-          key: ${{ hashFiles('src/ravenpy/testing/registry.txt') }}-${{ env.RAVEN_TESTDATA_BRANCH }}
+          key: ${{ hashFiles('src/ravenpy/testing/registry.txt') }}-${{ env.RAVEN_TESTDATA_BRANCH }}-pip-${{ matrix.os }}
       - name: Test with tox and report coverage
         run: |
           if [ "${{ matrix.tox-env }}" != "false" ]; then
@@ -198,7 +198,7 @@ jobs:
         with:
           path: |
             ${{ matrix.os == 'macos-latest' && 'Library/Caches/raven-testdata' || '~/.cache/raven-testdata' }}
-          key: ${{ hashFiles('src/ravenpy/testing/registry.txt') }}-${{ env.RAVEN_TESTDATA_BRANCH }}
+          key: ${{ hashFiles('src/ravenpy/testing/registry.txt') }}-${{ env.RAVEN_TESTDATA_BRANCH }}-conda-${{ matrix.os }}
       - name: Test RavenPy
         run: |
           python -m pytest --numprocesses=logical --cov=src/ravenpy --cov-report=lcov

From cb51dadd307e25ccd7aaca79ea788a4bae6aaa4f Mon Sep 17 00:00:00 2001
From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com>
Date: Tue, 10 Jun 2025 16:10:10 -0400
Subject: [PATCH 12/29] explicit caching

---
 .github/workflows/main.yml | 30 +++++++++++++++++++++++-------
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 1fcf4955..a5246c7d 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -117,12 +117,20 @@ jobs:
       - name: Install CI libraries
         run: |
           python3 -m pip install --require-hashes -r CI/requirements_ci.txt
-      - name: Testing data caching
+
+      - name: Cache test data (macOS)
+        if: matrix.os == 'macos-latest'
         uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
         with:
-          path: |
-            ${{ matrix.os == 'macos-latest' && 'Library/Caches/raven-testdata' || '~/.cache/raven-testdata' }}
-          key: ${{ hashFiles('src/ravenpy/testing/registry.txt') }}-${{ env.RAVEN_TESTDATA_BRANCH }}-pip-${{ matrix.os }}
+          path: ~/Library/Caches/raven-testdata
+          key: ${{ hashFiles('src/ravenpy/testing/registry.txt') }}-${{ env.RAVEN_TESTDATA_BRANCH }}-${{ matrix.os }}
+      - name: Cache test data (Ubuntu)
+        if: matrix.os == 'ubuntu-latest'
+        uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
+        with:
+          path: ~/.cache/raven-testdata
+          key: ${{ hashFiles('src/ravenpy/testing/registry.txt') }}-${{ env.RAVEN_TESTDATA_BRANCH }}-${{ matrix.os }}
+
       - name: Test with tox and report coverage
         run: |
           if [ "${{ matrix.tox-env }}" != "false" ]; then
@@ -193,12 +201,20 @@ jobs:
         run: |
           micromamba list
           python -m pip check || true
-      - name: Testing data caching
+
+      - name: Cache test data (macOS)
+        if: matrix.os == 'macos-latest'
         uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
         with:
-          path: |
-            ${{ matrix.os == 'macos-latest' && 'Library/Caches/raven-testdata' || '~/.cache/raven-testdata' }}
+          path: ~/Library/Caches/raven-testdata
           key: ${{ hashFiles('src/ravenpy/testing/registry.txt') }}-${{ env.RAVEN_TESTDATA_BRANCH }}-conda-${{ matrix.os }}
+      - name: Cache test data (Ubuntu)
+        if: matrix.os == 'ubuntu-latest'
+        uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
+        with:
+          path: ~/.cache/raven-testdata
+          key: ${{ hashFiles('src/ravenpy/testing/registry.txt') }}-${{ env.RAVEN_TESTDATA_BRANCH }}-conda-${{ matrix.os }}
+
       - name: Test RavenPy
         run: |
           python -m pytest --numprocesses=logical --cov=src/ravenpy --cov-report=lcov

From ca035f20cdce0f1a029067b895a2d3216ec75572 Mon Sep 17 00:00:00 2001
From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com>
Date: Tue, 10 Jun 2025 16:31:02 -0400
Subject: [PATCH 13/29] add testdata-version.yml checker

---
 .github/workflows/testdata-version.yml | 95 ++++++++++++++++++++++++++
 1 file changed, 95 insertions(+)
 create mode 100644 .github/workflows/testdata-version.yml

diff --git a/.github/workflows/testdata-version.yml b/.github/workflows/testdata-version.yml
new file mode 100644
index 00000000..835ad5e2
--- /dev/null
+++ b/.github/workflows/testdata-version.yml
@@ -0,0 +1,95 @@
+name: Verify Testing Data
+
+on:
+  pull_request:
+    types:
+      - opened
+      - reopened
+      - synchronize
+    paths:
+      - .github/workflows/main.yml
+
+permissions:
+  contents: read
+
+jobs:
+  use-latest-tag:
+    name: Check Latest raven-testdata Tag
+    runs-on: ubuntu-latest
+    if: |
+     (github.event.pull_request.head.repo.full_name == github.event.pull_request.base.repo.full_name)
+    permissions:
+      pull-requests: write
+    steps:
+      - name: Harden Runner
+        uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
+        with:
+          disable-sudo: true
+          egress-policy: block
+          allowed-endpoints: >
+            api.github.com:443
+            github.com:443
+      - name: Checkout Repository
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          persist-credentials: false
+      - name: Find raven-testdata Tag and CI Testing Branch
+        run: |
+          RAVEN_TESTDATA_TAG="$( \
+          git -c 'versionsort.suffix=-' \
+          ls-remote --exit-code --refs --sort='version:refname' --tags https://github.com/Ouranosinc/raven-testdata '*.*.*' \
+          | tail --lines=1 \
+          | cut --delimiter='/' --fields=3)"
+          echo "RAVEN_TESTDATA_TAG=${RAVEN_TESTDATA_TAG}" >> $GITHUB_ENV
+          RAVEN_TESTDATA_BRANCH="$(grep -E "RAVEN_TESTDATA_BRANCH" .github/workflows/main.yml | cut -d ' ' -f4)"
+          echo "RAVEN_TESTDATA_BRANCH=${RAVEN_TESTDATA_BRANCH}" >> $GITHUB_ENV
+      - name: Report Versions Found
+        run: |
+          echo "Latest raven-testdata tag: ${RAVEN_TESTDATA_TAG}"
+          echo "Tag for raven-testdata in CI: ${RAVEN_TESTDATA_BRANCH}"
+        env:
+          RAVEN_TESTDATA_TAG: ${{ env.RAVEN_TESTDATA_TAG }}
+          RAVEN_TESTDATA_BRANCH: ${{ env.RAVEN_TESTDATA_BRANCH }}
+      - name: Find Comment
+        uses: peter-evans/find-comment@3eae4d37986fb5a8592848f6a574fdf654e61f9e # v3.1.0
+        id: fc
+        with:
+          issue-number: ${{ github.event.pull_request.number }}
+          comment-author: 'github-actions[bot]'
+          body-includes: It appears that this Pull Request modifies the `main.yml` workflow.
+      - name: Compare Versions
+        if: ${{( env.RAVEN_TESTDATA_TAG != env.RAVEN_TESTDATA_BRANCH )}}
+        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
+        with:
+          script: |
+            core.setFailed('Configured `raven-testdata` tag is not `latest`.')
+      - name: Update Failure Comment
+        if: ${{ failure() }}
+        uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4.0.0
+        with:
+          comment-id: ${{ steps.fc.outputs.comment-id }}
+          issue-number: ${{ github.event.pull_request.number }}
+          body: |
+            > [!WARNING]
+            > It appears that this Pull Request modifies the `main.yml` workflow.
+
+            On inspection, it seems that the `RAVEN_TESTDATA_BRANCH` environment variable is set to a tag that is not the latest in the `Ouranosinc/raven-testdata` repository.
+
+            This value must match the most recent tag (`${{ env.RAVEN_TESTDATA_TAG }}`) in order to merge this Pull Request.
+
+            If this PR depends on changes in a new testing dataset branch, be sure to tag a new version of `Ouranosinc/raven-testdata` once your changes have been merged to its `main` branch.
+          edit-mode: replace
+      - name: Update Success Comment
+        if: ${{ success() }}
+        uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4.0.0
+        with:
+          comment-id: ${{ steps.fc.outputs.comment-id }}
+          issue-number: ${{ github.event.pull_request.number }}
+          body: |
+            > [!NOTE]
+            > It appears that this Pull Request modifies the `main.yml` workflow.
+
+            On inspection, the `RAVEN_TESTDATA_BRANCH` environment variable is set to the most recent tag (`${{ env.RAVEN_TESTDATA_TAG }}`).
+
+            No further action is required.
+          edit-mode: replace

From 7c1fef8dc98289ad64b2143b2f16d7dce21c7cde Mon Sep 17 00:00:00 2001
From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com>
Date: Wed, 11 Jun 2025 09:46:31 -0400
Subject: [PATCH 14/29] prefetch testing data, better caching

---
 .github/workflows/main.yml | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index a5246c7d..828eb63d 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -85,12 +85,10 @@ jobs:
             pypi.org:443
             raw.githubusercontent.com:443
             test.opendap.org:80
-
       - name: Checkout Repository
         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
         with:
           persist-credentials: false
-
       - name: Set up Python${{ matrix.python-version }}
         uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
         with:
@@ -118,19 +116,27 @@ jobs:
         run: |
           python3 -m pip install --require-hashes -r CI/requirements_ci.txt
 
-      - name: Cache test data (macOS)
+      - name: Environment caching (macOS)
         if: matrix.os == 'macos-latest'
         uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
         with:
-          path: ~/Library/Caches/raven-testdata
+          path: |
+            .tox
+            ~/Library/Caches/raven-testdata
           key: ${{ hashFiles('src/ravenpy/testing/registry.txt') }}-${{ env.RAVEN_TESTDATA_BRANCH }}-${{ matrix.os }}
-      - name: Cache test data (Ubuntu)
+      - name: Environment caching (Ubuntu)
         if: matrix.os == 'ubuntu-latest'
         uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
         with:
-          path: ~/.cache/raven-testdata
+          path: |
+            .tox
+            ~/.cache/raven-testdata
           key: ${{ hashFiles('src/ravenpy/testing/registry.txt') }}-${{ env.RAVEN_TESTDATA_BRANCH }}-${{ matrix.os }}
 
+      - name: Prefetch RavenPy test data
+        run: |
+          python -c "import ravenpy.testing.utils as rtu; rtu.populate_testing_data()"
+
       - name: Test with tox and report coverage
         run: |
           if [ "${{ matrix.tox-env }}" != "false" ]; then
@@ -206,15 +212,21 @@ jobs:
         if: matrix.os == 'macos-latest'
         uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
         with:
-          path: ~/Library/Caches/raven-testdata
+          path: |
+            ~/Library/Caches/raven-testdata
           key: ${{ hashFiles('src/ravenpy/testing/registry.txt') }}-${{ env.RAVEN_TESTDATA_BRANCH }}-conda-${{ matrix.os }}
       - name: Cache test data (Ubuntu)
         if: matrix.os == 'ubuntu-latest'
         uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
         with:
-          path: ~/.cache/raven-testdata
+          path: |
+            ~/.cache/raven-testdata
           key: ${{ hashFiles('src/ravenpy/testing/registry.txt') }}-${{ env.RAVEN_TESTDATA_BRANCH }}-conda-${{ matrix.os }}
 
+      - name: Prefetch RavenPy test data
+        run: |
+          python -c "import ravenpy.testing.utils as rtu; rtu.populate_testing_data()"
+
       - name: Test RavenPy
         run: |
           python -m pytest --numprocesses=logical --cov=src/ravenpy --cov-report=lcov

From 629822814c5c167aec8a5aa422bf96246968f46a Mon Sep 17 00:00:00 2001
From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com>
Date: Wed, 11 Jun 2025 09:53:16 -0400
Subject: [PATCH 15/29] move caching to tox

---
 .github/workflows/main.yml |  4 ----
 tox.ini                    | 10 ++++++----
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 828eb63d..5804ded8 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -133,10 +133,6 @@ jobs:
             ~/.cache/raven-testdata
           key: ${{ hashFiles('src/ravenpy/testing/registry.txt') }}-${{ env.RAVEN_TESTDATA_BRANCH }}-${{ matrix.os }}
 
-      - name: Prefetch RavenPy test data
-        run: |
-          python -c "import ravenpy.testing.utils as rtu; rtu.populate_testing_data()"
-
       - name: Test with tox and report coverage
         run: |
           if [ "${{ matrix.tox-env }}" != "false" ]; then
diff --git a/tox.ini b/tox.ini
index dccf2d87..35621092 100644
--- a/tox.ini
+++ b/tox.ini
@@ -13,10 +13,10 @@ opts =
 
 [gh]
 python =
-    3.10 = py3.10-coverage-upstream
-    3.11 = py3.11-coverage
-    3.12 = py3.12-coverage
-    3.13 = py3.13 # coveralls not supported on 3.13
+    3.10 = py3.10-coverage-upstream-prefetch
+    3.11 = py3.11-coverage-prefetch
+    3.12 = py3.12-coverage-prefetch
+    3.13 = py3.13-prefetch # coveralls not supported on 3.13
 
 [testenv:lint]
 skip_install = True
@@ -75,6 +75,8 @@ commands =
     python -m pip install --upgrade --force-reinstall --no-deps --no-cache-dir --no-build-isolation gdal[numpy]=={env:GDAL_VERSION}.*
     ; Install raven-hydro from the upstream branch
     upstream: python -m pip install --upgrade --force-reinstall --no-deps --no-cache-dir git+https://github.com/Ouranosinc/raven-hydro.git@{env:UPSTREAM_BRANCH}
+    ; Prefetch testing data
+    prefetch: python -c "import ravenpy.testing.utils as rtu; rtu.populate_testing_data(branch='{env:RAVEN_TESTDATA_BRANCH}')"
     ; Run tests
     pytest {posargs}
     coverage: - coveralls

From 0ee4add7491ff10c9fa818b1b311eea49bbc0f9c Mon Sep 17 00:00:00 2001
From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com>
Date: Wed, 11 Jun 2025 13:45:17 -0400
Subject: [PATCH 16/29] add prefetch

---
 .github/workflows/main.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 5804ded8..3b4cbb08 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -136,11 +136,11 @@ jobs:
       - name: Test with tox and report coverage
         run: |
           if [ "${{ matrix.tox-env }}" != "false" ]; then
-            python3 -m tox -e ${{ matrix.tox-env }}
+            python3 -m tox -e ${{ matrix.tox-env }}-prefetch
           elif [ "${{ matrix.python-version }}" != "3.13" ]; then
-            python3 -m tox -e py${{ matrix.python-version }}-coverage
+            python3 -m tox -e py${{ matrix.python-version }}-prefetch-coverage
           else
-            python3 -m tox -e py${{ matrix.python-version }}
+            python3 -m tox -e py${{ matrix.python-version }}-prefetch
           fi
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

From 7dfcf43662f9c1e86e8ebcabe20c06bbefe1c61c Mon Sep 17 00:00:00 2001
From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com>
Date: Thu, 12 Jun 2025 10:17:12 -0400
Subject: [PATCH 17/29] avoid race condition when downloading registry.txt

---
 src/ravenpy/testing/utils.py | 50 ++++++++++++++++++++++++++----------
 1 file changed, 37 insertions(+), 13 deletions(-)

diff --git a/src/ravenpy/testing/utils.py b/src/ravenpy/testing/utils.py
index e5c996ed..af513014 100644
--- a/src/ravenpy/testing/utils.py
+++ b/src/ravenpy/testing/utils.py
@@ -7,6 +7,7 @@
 import logging
 import os
 import re
+import tempfile
 import time
 import warnings
 from collections.abc import Callable
@@ -208,7 +209,9 @@ def testing_setup_warnings():
 
 
 def load_registry(
-    branch: str = TESTDATA_BRANCH, repo: str = TESTDATA_REPO_URL
+    branch: str = TESTDATA_BRANCH,
+    repo: str = TESTDATA_REPO_URL,
+    force_download: bool = False,
 ) -> dict[str, str]:
     """
     Load the registry file for the test data.
@@ -219,12 +222,22 @@ def load_registry(
         Branch of the repository to use when fetching testing datasets.
     repo : str
         URL of the repository to use when fetching testing datasets.
+    force_download : bool
+        If True, force the download of the registry file even if it already exists.
 
     Returns
     -------
     dict
         Dictionary of filenames and hashes.
     """
+
+    def load_registry_from_file(
+        _registry_file: str | Path,
+    ) -> dict[str, str]:
+        """Load the registry from a file."""
+        with Path(_registry_file).open(encoding="utf-8") as f:
+            return {line.split()[0]: line.split()[1] for line in f}
+
     if not repo.endswith("/"):
         repo = f"{repo}/"
     remote_registry = audit_url(
@@ -247,23 +260,34 @@ def load_registry(
         urlretrieve(remote_registry, registry_file)  # noqa: S310
 
     elif branch != default_testdata_version:
-        custom_registry_folder = Path(
-            str(ilr.files("ravenpy").joinpath(f"testing/{branch}"))
-        )
-        custom_registry_folder.mkdir(parents=True, exist_ok=True)
-        registry_file = custom_registry_folder.joinpath("registry.txt")
-        urlretrieve(remote_registry, registry_file)  # noqa: S310
+        if force_download:
+            # If the registry file does not exist, download it, or if force_download is True, download it again
+            with tempfile.TemporaryDirectory() as tmp_dir:
+                custom_registry_folder = Path(tmp_dir).joinpath("testing", branch)
+                custom_registry_folder.mkdir(parents=True, exist_ok=True)
+                registry_file = custom_registry_folder.joinpath("registry.txt")
+                urlretrieve(remote_registry, registry_file)  # noqa: S310
+                return load_registry_from_file(registry_file)
+        else:
+            # If the branch is not the default version, check if the registry file exists
+            custom_registry_folder = Path(
+                str(ilr.files("ravenpy").joinpath("testing", branch))
+            )
+            custom_registry_folder.mkdir(parents=True, exist_ok=True)
+            registry_file = custom_registry_folder.joinpath("registry.txt")
+            with FileLock(custom_registry_folder.joinpath(".lock")):
+                if not registry_file.exists():
+                    urlretrieve(remote_registry, registry_file)  # noqa: S310
+            return load_registry_from_file(registry_file)
 
     else:
         registry_file = Path(str(ilr.files("ravenpy").joinpath("testing/registry.txt")))
 
     if not registry_file.exists():
-        raise FileNotFoundError(f"Registry file not found: {registry_file}")
+        msg = f"Registry file not found: {registry_file}"
+        raise FileNotFoundError(msg)
 
-    # Load the registry file
-    with registry_file.open(encoding="utf-8") as f:
-        registry = {line.split()[0]: line.split()[1] for line in f}
-    return registry
+    return load_registry_from_file(registry_file)
 
 
 def yangtze(
@@ -419,7 +443,7 @@ def populate_testing_data(
     branch : str, optional
         Branch of ravenpy-testdata to use when fetching testing datasets.
     retry : int
-        Number of times to retry downloading the files in case of failure. Defaults to 3.
+        Number of times to retry downloading the files in case of failure. Default: 3.
     local_cache : Path
         The path to the local cache. Defaults to the location set by the platformdirs library.
         The testing data will be downloaded to this local cache.

From 0233553a047d1798352b02e942fbd86a25fd9182 Mon Sep 17 00:00:00 2001
From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com>
Date: Thu, 12 Jun 2025 10:48:57 -0400
Subject: [PATCH 18/29] handle case where raven binary is installed via conda
 and test is run using tox

---
 tests/test_missing.py | 9 ++++++++-
 tox.ini               | 3 ++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/tests/test_missing.py b/tests/test_missing.py
index 055c65eb..7330bdca 100644
--- a/tests/test_missing.py
+++ b/tests/test_missing.py
@@ -1,3 +1,4 @@
+import logging
 import os
 import shutil
 import sys
@@ -53,7 +54,13 @@ def test_missing_raven_binary(self, monkeypatch, tmpdir, hide_module):
         del sys.modules["ravenpy.config.defaults"]
 
         # Now the tool should be "missing"
-        assert shutil.which("raven") is None
+        # if running from tox in a conda environment, the raven binary is likely to be present
+        if os.environ.get("CONDA_PREFIX") and os.environ.get("TOX"):
+            logging.info(
+                "Running in a development conda environment with tox. Raven is expected to be present."
+            )
+        else:
+            assert shutil.which("raven") is None
 
         # Loading the module should raise a RuntimeError
         with pytest.raises(RuntimeError):
diff --git a/tox.ini b/tox.ini
index 35621092..47973e8d 100644
--- a/tox.ini
+++ b/tox.ini
@@ -47,6 +47,7 @@ setenv =
     PYTEST_ADDOPTS = --numprocesses=logical --durations=10 --cov=ravenpy
     PYTHONPATH = {toxinidir}
     UPSTREAM_BRANCH = main
+    TOX = {envname}
 passenv =
     CI
     COVERALLS_*
@@ -76,7 +77,7 @@ commands =
     ; Install raven-hydro from the upstream branch
     upstream: python -m pip install --upgrade --force-reinstall --no-deps --no-cache-dir git+https://github.com/Ouranosinc/raven-hydro.git@{env:UPSTREAM_BRANCH}
     ; Prefetch testing data
-    prefetch: python -c "import ravenpy.testing.utils as rtu; rtu.populate_testing_data(branch='{env:RAVEN_TESTDATA_BRANCH}')"
+    prefetch: python -c "import ravenpy.testing.utils as rtu; rtu.populate_testing_data(branch={env:RAVEN_TESTDATA_BRANCH})"
     ; Run tests
     pytest {posargs}
     coverage: - coveralls

From f9bd93b857c6c291a69f971d0d6e235a1863e4ee Mon Sep 17 00:00:00 2001
From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com>
Date: Thu, 12 Jun 2025 10:49:19 -0400
Subject: [PATCH 19/29] audit coveralls connection

---
 .github/workflows/main.yml | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 3b4cbb08..a2f103ab 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -243,11 +243,7 @@ jobs:
         uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
         with:
           disable-sudo: true
-          egress-policy: block
-          allowed-endpoints: >
-            coveralls.io:443
-            github.com:443
-            objects.githubusercontent.com:443
+          egress-policy: audit
       - name: Coveralls Finished
         uses: coverallsapp/github-action@648a8eb78e6d50909eff900e4ec85cab4524a45b # v2.3.6
         with:

From 168854546bfe8774721f10d81f1ce4efd1b7a095 Mon Sep 17 00:00:00 2001
From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com>
Date: Thu, 12 Jun 2025 10:49:58 -0400
Subject: [PATCH 20/29] support force_download option and use locking more

---
 src/ravenpy/testing/utils.py          | 22 +++++++++++++---------
 tests/test_external_dataset_access.py |  2 +-
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/src/ravenpy/testing/utils.py b/src/ravenpy/testing/utils.py
index af513014..c0cc9a95 100644
--- a/src/ravenpy/testing/utils.py
+++ b/src/ravenpy/testing/utils.py
@@ -250,14 +250,15 @@ def load_registry_from_file(
     if repo != default_testdata_repo_url:
         external_repo_name = urlparse(repo).path.split("/")[-2]
         external_branch_name = branch.split("/")[-1]
-        registry_file = Path(
-            str(
-                ilr.files("ravenpy").joinpath(
-                    f"testing/registry.{external_repo_name}.{external_branch_name}.txt"
-                )
-            )
+        testing_folder = Path(str(ilr.files("ravenpy").joinpath("testing")))
+        registry_file = testing_folder.joinpath(
+            f"registry.{external_repo_name}.{external_branch_name}.txt"
         )
-        urlretrieve(remote_registry, registry_file)  # noqa: S310
+        lockfile = testing_folder.joinpath(".lock")
+        with FileLock(lockfile):
+            if not registry_file.exists():
+                urlretrieve(remote_registry, registry_file)  # noqa: S310
+        lockfile.unlink(missing_ok=True)
 
     elif branch != default_testdata_version:
         if force_download:
@@ -295,6 +296,7 @@ def yangtze(
     branch: str = TESTDATA_BRANCH,
     cache_dir: str | Path = TESTDATA_CACHE_DIR,
     allow_updates: bool = True,
+    force_download: bool = False,
 ):
     """
     Pooch registry instance for RavenPy test data.
@@ -309,6 +311,8 @@ def yangtze(
         The path to the directory where the data files are stored.
     allow_updates : bool
         If True, allow updates to the data files. Default is True.
+    force_download : bool
+        If True, force the download of the registry file even if it already exists.
 
     Returns
     -------
@@ -320,7 +324,7 @@ def yangtze(
     There are three environment variables that can be used to control the behaviour of this registry:
         - ``RAVENPY_TESTDATA_CACHE_DIR``: If this environment variable is set, it will be used as the
           base directory to store the data files.
-          The directory should be an absolute path (i.e., it should start with ``/``).
+          The directory should be an absolute path (i.e. it should start with ``/``).
           Otherwise, the default location will be used (based on ``platformdirs``, see :py:func:`pooch.os_cache`).
         - ``RAVENPY_TESTDATA_REPO_URL``: If this environment variable is set, it will be used as the URL of
           the repository to use when fetching datasets. Otherwise, the default repository will be used.
@@ -356,7 +360,7 @@ def yangtze(
         version=default_testdata_version,
         version_dev=branch,
         allow_updates=allow_updates,
-        registry=load_registry(branch=branch, repo=repo),
+        registry=load_registry(branch=branch, repo=repo, force_download=force_download),
     )
 
     # Add a custom fetch method to the Pooch instance
diff --git a/tests/test_external_dataset_access.py b/tests/test_external_dataset_access.py
index e97f7eee..d778daf6 100644
--- a/tests/test_external_dataset_access.py
+++ b/tests/test_external_dataset_access.py
@@ -55,7 +55,7 @@ def test_get_file_default_cache(self):
     def test_open_dataset(self):
         ds = open_dataset(
             name="raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc",
-            yangtze_kwargs={"branch": self.branch},
+            yangtze_kwargs={"branch": self.branch, "force_download": True},
         )
 
         assert (

From 6be0289346690bba9bf44483121cb43a54aa9544 Mon Sep 17 00:00:00 2001
From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com>
Date: Thu, 12 Jun 2025 10:55:12 -0400
Subject: [PATCH 21/29] quotation marks

---
 tox.ini | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tox.ini b/tox.ini
index 47973e8d..77cab174 100644
--- a/tox.ini
+++ b/tox.ini
@@ -77,7 +77,7 @@ commands =
     ; Install raven-hydro from the upstream branch
     upstream: python -m pip install --upgrade --force-reinstall --no-deps --no-cache-dir git+https://github.com/Ouranosinc/raven-hydro.git@{env:UPSTREAM_BRANCH}
     ; Prefetch testing data
-    prefetch: python -c "import ravenpy.testing.utils as rtu; rtu.populate_testing_data(branch={env:RAVEN_TESTDATA_BRANCH})"
+                                                                prefetch: python -c "import ravenpy.testing.utils as rtu; rtu.populate_testing_data(branch=\"{env:RAVEN_TESTDATA_BRANCH})\""
     ; Run tests
     pytest {posargs}
     coverage: - coveralls

From 9c064abc21548104651e36224ba177a49001ffc4 Mon Sep 17 00:00:00 2001
From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com>
Date: Thu, 12 Jun 2025 11:04:56 -0400
Subject: [PATCH 22/29] typo fix

---
 tox.ini | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tox.ini b/tox.ini
index 77cab174..341c68d8 100644
--- a/tox.ini
+++ b/tox.ini
@@ -77,7 +77,7 @@ commands =
     ; Install raven-hydro from the upstream branch
     upstream: python -m pip install --upgrade --force-reinstall --no-deps --no-cache-dir git+https://github.com/Ouranosinc/raven-hydro.git@{env:UPSTREAM_BRANCH}
     ; Prefetch testing data
-                                                                prefetch: python -c "import ravenpy.testing.utils as rtu; rtu.populate_testing_data(branch=\"{env:RAVEN_TESTDATA_BRANCH})\""
+                                                                prefetch: python -c "import ravenpy.testing.utils as rtu; rtu.populate_testing_data(branch=\"{env:RAVEN_TESTDATA_BRANCH}\")"
     ; Run tests
     pytest {posargs}
     coverage: - coveralls

From 0d33c722c21c58f7d4163c2aac9bf2870593d17f Mon Sep 17 00:00:00 2001
From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com>
Date: Thu, 12 Jun 2025 11:08:17 -0400
Subject: [PATCH 23/29] egress policy audit

---
 .github/workflows/main.yml | 31 +++----------------------------
 1 file changed, 3 insertions(+), 28 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index a2f103ab..794f68a2 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -70,21 +70,8 @@ jobs:
       - name: Harden Runner
         uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
         with:
-          egress-policy: block
-          allowed-endpoints: >
-            api.github.com:443
-            azure.archive.ubuntu.com:80
-            coveralls.io:443
-            esm.ubuntu.com:443
-            files.pythonhosted.org:443
-            github.com:443
-            motd.ubuntu.com:443
-            objects.githubusercontent.com:443
-            packages.microsoft.com:443
-            pavics.ouranos.ca:443
-            pypi.org:443
-            raw.githubusercontent.com:443
-            test.opendap.org:80
+          disable-sudo: false
+          egress-policy: audit
       - name: Checkout Repository
         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
         with:
@@ -167,18 +154,7 @@ jobs:
         uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
         with:
           disable-sudo: true
-          egress-policy: block
-          allowed-endpoints: >
-            api.github.com:443
-            conda.anaconda.org:443
-            coveralls.io:443
-            files.pythonhosted.org:443
-            github.com:443
-            objects.githubusercontent.com:443
-            pavics.ouranos.ca:443
-            pypi.org:443
-            raw.githubusercontent.com:443
-            test.opendap.org:80
+          egress-policy: audit
       - name: Checkout Repository
         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
         with:
@@ -203,7 +179,6 @@ jobs:
         run: |
           micromamba list
           python -m pip check || true
-
       - name: Cache test data (macOS)
         if: matrix.os == 'macos-latest'
         uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3

From fee9fb81092955edeb6346e368a7e83263eb8b19 Mon Sep 17 00:00:00 2001
From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com>
Date: Thu, 12 Jun 2025 12:14:14 -0400
Subject: [PATCH 24/29] update CHANGELOG.rst, fix some typos and remove
 unneeded imports

---
 CHANGELOG.rst     | 11 ++++++++++-
 tests/conftest.py |  2 --
 tox.ini           |  2 +-
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index c287bae6..63d004dc 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -7,7 +7,15 @@ v0.18.3 (unreleased)
 
 New features
 ^^^^^^^^^^^^
-* Added `parsers.parse_rv` to extract a Command value from an RV file.
+* Added `parsers.parse_rv` to extract a Command value from an RV file. (PR #503)
+* New module `ravenpy.testing` has been added to provide utility functions and support for testing and testing data management. (PR #513)
+
+Breaking changes
+^^^^^^^^^^^^^^^^
+* `ravenpy` now requires `pooch>=1.8.0` for downloading and caching remote testing data. (PR #513)
+* `ravenpy.utilities.testdata` has been refactored to new module `ravenpy.testing`. The `publish_release_notes` function is now located in `ravenpy.utilities.publishing`. (PR #513)
+* The `ravenpy.testing.utils` module now provides a `yangtze()` class for fetching and caching the `raven-testdata` testing data. This replaces the previous `get_local_testdata` and `get_file` functions. (PR #513)
+* The `ravenpy.testing.utils.open_dataset` function no longer supports OPeNDAP URLs or local file paths. Instead, it uses the `yangtze()` class to fetch datasets from the testing data repository or the local cache. Users should now use `xarray.open_dataset()` directly for OPeNDAP URLs or local files. (PR #513)
 
 Bug fixes
 ^^^^^^^^^
@@ -17,6 +25,7 @@ Bug fixes
 Internal changes
 ^^^^^^^^^^^^^^^^
 * `ravenpy` now requires `xclim>=0.57.0` and `xsdba` (v0.4.0+). (PR #511)
+* The `tests` folder no longer contains and `__init__.py` file. As such, it is no longer treated as a package. `pytest` fixtures from `emulators.py` are now directly imported into `conftest.py` for use in tests and existing fixtures have been modified to use the new `yangtze()` class for fetching testing data. (PR #513)
 
 v0.18.2 (2025-05-05)
 --------------------
diff --git a/tests/conftest.py b/tests/conftest.py
index 4f48df18..d2103f97 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -2,9 +2,7 @@
 
 import logging
 import os
-from collections.abc import Callable
 from pathlib import Path
-from typing import Optional
 
 import pytest
 import xarray as xr
diff --git a/tox.ini b/tox.ini
index 341c68d8..24abdaf8 100644
--- a/tox.ini
+++ b/tox.ini
@@ -77,7 +77,7 @@ commands =
     ; Install raven-hydro from the upstream branch
     upstream: python -m pip install --upgrade --force-reinstall --no-deps --no-cache-dir git+https://github.com/Ouranosinc/raven-hydro.git@{env:UPSTREAM_BRANCH}
     ; Prefetch testing data
-                                                                prefetch: python -c "import ravenpy.testing.utils as rtu; rtu.populate_testing_data(branch=\"{env:RAVEN_TESTDATA_BRANCH}\")"
+    prefetch: python -c "import ravenpy.testing.utils as rtu; rtu.populate_testing_data(branch=\"{env:RAVEN_TESTDATA_BRANCH}\")"
     ; Run tests
     pytest {posargs}
     coverage: - coveralls

From 3e6d96dc6e335ead68ed49fa8b8d0541a677d259 Mon Sep 17 00:00:00 2001
From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com>
Date: Thu, 12 Jun 2025 14:11:07 -0400
Subject: [PATCH 25/29] use new raven-testdata tag, adjust registry.txt, adjust
 test expecatations

---
 .github/workflows/main.yml            |  2 +-
 CHANGELOG.rst                         |  2 +-
 pyproject.toml                        |  1 +
 src/ravenpy/testing/registry.txt      | 47 ++++++++++++++-------------
 src/ravenpy/testing/utils.py          |  4 +--
 tests/test_external_dataset_access.py | 28 ++++++++++------
 6 files changed, 48 insertions(+), 36 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 794f68a2..23dfb628 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -7,7 +7,7 @@ on:
   pull_request:
 
 env:
-  RAVEN_TESTDATA_BRANCH: new-system
+  RAVEN_TESTDATA_BRANCH: v2025.6.12
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 63d004dc..c33924a5 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -25,7 +25,7 @@ Bug fixes
 Internal changes
 ^^^^^^^^^^^^^^^^
 * `ravenpy` now requires `xclim>=0.57.0` and `xsdba` (v0.4.0+). (PR #511)
-* The `tests` folder no longer contains and `__init__.py` file. As such, it is no longer treated as a package. `pytest` fixtures from `emulators.py` are now directly imported into `conftest.py` for use in tests and existing fixtures have been modified to use the new `yangtze()` class for fetching testing data. (PR #513)
+* The `tests` folder no longer contains an `__init__.py` file and is no longer treated as a package. `pytest` fixtures from `emulators.py` are now directly imported into `conftest.py` for use in tests, and existing `pytest` fixtures have been modified to use the new `yangtze()` class for fetching testing data. (PR #513)
 
 v0.18.2 (2025-05-05)
 --------------------
diff --git a/pyproject.toml b/pyproject.toml
index 54f3ab55..2933d192 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -243,6 +243,7 @@ include = [
   "src/ravenpy/**/*.py",
   "src/ravenpy/data/**/*.csv",
   "src/ravenpy/data/**/*.zip",
+  "src/ravenpy/testing/registry.txt",
   "setup.cfg",
   "tests/*.py",
   "tests/test.cfg",
diff --git a/src/ravenpy/testing/registry.txt b/src/ravenpy/testing/registry.txt
index e7f08fe1..5c03c34d 100644
--- a/src/ravenpy/testing/registry.txt
+++ b/src/ravenpy/testing/registry.txt
@@ -1,13 +1,16 @@
 basinmaker/drainage_region_0175_v2-1/finalcat_info_v2-1.zip sha256:a761d4a2284d728ff1c846db84d22dbb37afb3acc293f8b3110a56a99bc283c9
 caspar_eccc_hindcasts/geps_watershed.nc sha256:a7eddbdca773b0c66140fbfe042f1bb889a3cb1f368c0393d0598be58bea5208
 cec_nalcms2010_30m/cec_nalcms_subQC.tiff sha256:31143499c1368b7e7d73a5f33b9184aabb2eb869337352f0905853d39eb5675f
-clrh/mattawin/06FB002.rvh sha256:9ab58a048a358b159f681e90fbb129084232ba58a467d148b8b0e7f154222204
-clrh/mattawin/Lakes.rvh sha256:5d39d2dbaa96089b38952436583da76ba70a6ab587da5cadcb8d9034a684c7ef
-clrh/mattawin/channel_properties.rvp sha256:6b48f275e0fc5f1f323346502fb9a11f6c8869620f1ab89d9c24a6fef7d4633a
+clrh/mattawin/06FB002.rvh sha256:a20ea1c6cbe59d296808e12a92d0dd54475e60c29cb64c5d98b705da726d111d
+clrh/mattawin/Lakes.rvh sha256:ee731b88e1037c14fc04d7a8fdfa6832e0e897a3e5046545e08ec7d39a13af58
+clrh/mattawin/channel_properties.rvp sha256:e9de55b9232c90a5cca1d7ffc11ec35cbf077098d72078a64c3e66a2a1d3c23a
+cmip5/nasa_nex-gddp-1.0_day_inmcm4_historical+rcp45_nex-gddp_1971-1972_subset.nc sha256:74dcdf179c21d35bef6184b67945b3c75de69e452bbae944911c277baded845f
+cmip5/tas_Amon_CanESM2_rcp85_r1i1p1_200601-210012_subset.nc sha256:dadf0f786e02f260a43d7b38fccf2baa846cb1e94e872e006e18bd8b04a1cdd7
 donneesqc_mrc_poly/mrc_subset.gml sha256:89a7a47a008a04b5821d6fbd9d11a84b163b341e23798e2375bc0cf7cd29043e
 donneesqc_mrc_poly/mrc_subset.zip sha256:021bebe8abdea2ca0f4c605d6d38981d46bf4ec0c642dadf1be3aa34522cfd0d
 earthenv_dem_90m/earthenv_dem90_southernQuebec.tiff sha256:310f38da0439300f22751de434a9dbcff23d20376db19aff4e260573b526beff
 eccc_forecasts/geps_watershed.nc sha256:f63cab6543e86bdd6b96599b16cd5fecf9da2e022dd08b6351b6852fec598d21
+era5/tas_pr_20180101-20180108.nc sha256:40689a8bf36f0750fdc76c1b4139e0a405418291bce03ab69eaed8c728bbf6bc
 famine/famine_input.nc sha256:0026f20c141dc007f9041d1964e831c767a72206eb3690ce1ae328d940e1e6a4
 gr4j_cemaneige/solution.rvc sha256:99b25947c22a99ccdd5a738923ab17af100cee8d61c3e73969e07ee2472b457c
 hydro_simulations/raven-gr4j-cemaneige-sim_hmets-0_Hydrographs.nc sha256:a1763cb78ab81cae8080792cf99c52a42b0dff1f72f6b7b4862c421811599371
@@ -18,32 +21,32 @@ nasa/Mars_MGS_MOLA_DEM_georeferenced_region_compressed.tiff sha256:3190c6799724b
 nrcan/NRCAN_1971-1972_subset.nc sha256:7de9def968a7f3d804255be2b86976e47b0edc3a80e5bf4ad608c1adf2f01d40
 nrcan/NRCAN_2006-2007_subset.nc sha256:a029261f1b74cd688e0b7f70ab3a770e9e9e4f5f005b3a370707803550c1c646
 polygons/Basin_10.zip sha256:d611ec4d082fc7a26fbcfbcd843c04c155e66f2bc044896374218a19af4fc6d9
-polygons/Saskatoon.geojson sha256:5277ccb26affb3d9a7697d2b605fd0367b5621627f409ea17003b36c735e762b
-polygons/mars.geojson sha256:4eb7c1d20f43cf41d886026dd6b4601d087d352fc8988d5f43fb44402d07af14
+polygons/Saskatoon.geojson sha256:3f3e6cff46cc82df1954024fb2e2d2185ddcf2e08cd69bf53d0f810a6b95e1c1
+polygons/mars.geojson sha256:1bcb4625dfc37be4ca20a410495f2489ce0acccc5a210bc831e6f7d86187bf2e
 polygons/mars.zip sha256:504c509f4d7e6a6344e23a76831cba78491017b168f7c4b28752a3ad13d1f8cc
 polygons/mississippi.geojson sha256:089445dc9db5c93d851a2a654523871fae65996dd6123b40bfc4b0cbef32916e
 raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc sha256:68d234ae5a14051f8d0849ab204aa31c8d65dab4eaec65764306a23540cd8e9d
 raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily_2d.nc sha256:2171a33280e7d2deff582bc2f7bb903a6dd520848be0c09495ae938f7c214f61
 raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily_3d.nc sha256:fb61ca9fa69b3c2ccb56af9dd92211bed72aa386ef2aa941013ed2df238b21b7
-raven-gr4j-cemaneige/raven-gr4j-salmon.rvc sha256:465e617460e41b1ca80423a7722f080374407e5aa120be2121ab6a8f84b58bf6
-raven-gr4j-cemaneige/raven-gr4j-salmon.rvh sha256:8695bcc319ff833999e7914315e7cbc07edcc4d18e6be451ff570db23619b89b
-raven-gr4j-cemaneige/raven-gr4j-salmon.rvi sha256:1c1e087a746fab027cf11d348760422fdcb589e9fb70703fd2fb218781d21f35
-raven-gr4j-cemaneige/raven-gr4j-salmon.rvp sha256:a066e32951dd6cf248e753f2850e7146539f777f501de9f4f1a33def97ff4120
-raven-gr4j-cemaneige/raven-gr4j-salmon.rvt sha256:b0a6bf6835d8d43830f8477ca09c5542622d0db4d0f9e3eb6f663527f5606ef3
-raven-hmets/Salmon-River-Near-Prince-George_Qobs_daily.rvt sha256:ad4a92d6eb48f9501bc10bd88626135d8018392ecbeb99908e9f64ce76467d90
+raven-gr4j-cemaneige/raven-gr4j-salmon.rvc sha256:f8fc7bfdec65c66ffc1f801eb9df9c61b7210b15f485ac272021a5700fb3c91e
+raven-gr4j-cemaneige/raven-gr4j-salmon.rvh sha256:3e6ddd95a1d7e12a018eb750ced4f4b8e03c66cce356c84f066818ca67675e50
+raven-gr4j-cemaneige/raven-gr4j-salmon.rvi sha256:671cdb8f8da4c8799a21c84e87964949760d9b719d432d29ebc2bc2005742d2b
+raven-gr4j-cemaneige/raven-gr4j-salmon.rvp sha256:cad0642498ce6f6719a11dbeb0bedde9242f7848be10c825d42d20a869434674
+raven-gr4j-cemaneige/raven-gr4j-salmon.rvt sha256:42ddc92bdaaa438ac3c35af90f5cd17656d7c02a880166a55ec627e4f884acf5
+raven-hmets/Salmon-River-Near-Prince-George_Qobs_daily.rvt sha256:eabb541d1e25655d7b1ae56ac5c3d780e82524d113b0bfe7bfb93aae5f841c88
 raven-hmets/Salmon-River-Near-Prince-George_meteo_daily.rvt sha256:e1b47934873caa9b9ac44c8d3069e73520432b5e245124c255b3f5c52f940c17
-raven-hmets/raven-hmets-salmon.rvc sha256:8a79de8c19ff43a21688f494d1e5c6cb07227e8a83b9560c72b934db56eaa17f
-raven-hmets/raven-hmets-salmon.rvh sha256:b602fc0e523ef87888a0985f06b27c6d391ef7bfcef94f1104832b2a25b59ac4
-raven-hmets/raven-hmets-salmon.rvi sha256:f40e4db30a2569790cabc435568ee92fb528166fe95f5265988b3608de3327d2
-raven-hmets/raven-hmets-salmon.rvp sha256:4066a35d2638adab8b608a0721e41ed620f9ab9d7aa0837134b51aee560e3835
-raven-hmets/raven-hmets-salmon.rvt sha256:a32125fc1557cd4d460205a42cea0761203febf3cc7bc6ca9e4c0447292df018
-raven-mohyse/Salmon-River-Near-Prince-George_Qobs_daily.rvt sha256:ad4a92d6eb48f9501bc10bd88626135d8018392ecbeb99908e9f64ce76467d90
+raven-hmets/raven-hmets-salmon.rvc sha256:d32af7a2c46f819a4dd2b12ac2eaee8895b00f6f21935864cbd09b5fcd3b506c
+raven-hmets/raven-hmets-salmon.rvh sha256:f513b6721970a66934689f725fa7cf20722990ce3af73c27a9c3fb663a87b7bd
+raven-hmets/raven-hmets-salmon.rvi sha256:dd6a5922d2e6177ffe720d37672f8401acc3c6aaa64e2512d75e1031b2d72399
+raven-hmets/raven-hmets-salmon.rvp sha256:ef0a55ee7b7aebbb065ac4ff183fd5e36c00d3232599cced2374ffe4ca1657c4
+raven-hmets/raven-hmets-salmon.rvt sha256:ba01918bfeb2b9b983a14aa7c99b8d784974d7d88a539e6ad8d6226b0e179330
+raven-mohyse/Salmon-River-Near-Prince-George_Qobs_daily.rvt sha256:eabb541d1e25655d7b1ae56ac5c3d780e82524d113b0bfe7bfb93aae5f841c88
 raven-mohyse/Salmon-River-Near-Prince-George_meteo_daily.rvt sha256:e1b47934873caa9b9ac44c8d3069e73520432b5e245124c255b3f5c52f940c17
-raven-mohyse/raven-mohyse-salmon.rvc sha256:282a3ad02ffecb16f3f9ee5c4357a809254499039a4ab51d6b537a2ca9f2224f
-raven-mohyse/raven-mohyse-salmon.rvh sha256:80373ec8a80ee1aba9e8e5c04ea68d1f44de5fd693706dc5ebdeec6783548151
-raven-mohyse/raven-mohyse-salmon.rvi sha256:f6414972f4d170f9aa8532f5dcab45518bbd6938b2e2e952631381e05c6bcf1b
-raven-mohyse/raven-mohyse-salmon.rvp sha256:6f6440be3205407974479c9f7a36527fcb3973e074e3b8ab314a12d13befa7b1
-raven-mohyse/raven-mohyse-salmon.rvt sha256:4420c6db1f981b283fd82d8227efa8a88604ae01140afb35350b2a0fe9e3ab18
+raven-mohyse/raven-mohyse-salmon.rvc sha256:0c50193578e68d00dc9dd2a3dec727504bcc0051efd033b9f8ec9c2d6af7f1ab
+raven-mohyse/raven-mohyse-salmon.rvh sha256:e3463b9c36d913e479f169d06370f686f99a11ae75139130289111e5cd497ec9
+raven-mohyse/raven-mohyse-salmon.rvi sha256:0aed624bfc4e75d1aa8132e48ae4b6796067ffa9d6614568966be391a5c6cbf3
+raven-mohyse/raven-mohyse-salmon.rvp sha256:3a1a937649c2294319e3bb727d1df312d2bd3c46b71d97497d558da9dad3810a
+raven-mohyse/raven-mohyse-salmon.rvt sha256:6a7ca7ff4a45a1670beb7f344b4363cc97c27f657f6bac88a2b40e5d86c48fd8
 raven-routing-sample/OTT_sub.zip sha256:f90de4ad5467dae7bcd061156c147cd09ec7d9b3182ac86cefaa1816a541db9b
 raven-routing-sample/VIC_streaminputs.nc sha256:ffea0e6b1095eabb417e048d3cc807d7964036ffda3e9cbe478dadb20908d7e9
 raven-routing-sample/VIC_streaminputs_weights.rvt sha256:2912c3604a5461dca27255bbdca5d319afd91a4534257391264a14f8c95bffb8
diff --git a/src/ravenpy/testing/utils.py b/src/ravenpy/testing/utils.py
index c0cc9a95..92d9b362 100644
--- a/src/ravenpy/testing/utils.py
+++ b/src/ravenpy/testing/utils.py
@@ -55,7 +55,7 @@
     "yangtze",
 ]
 
-default_testdata_version = "v2025.5.16"
+default_testdata_version = "v2025.6.12"
 """Default version of the testing data to use when fetching datasets."""
 
 default_testdata_repo_url = (
@@ -261,8 +261,8 @@ def load_registry_from_file(
         lockfile.unlink(missing_ok=True)
 
     elif branch != default_testdata_version:
+        # If force_download is True, download to a transient directory for testing purposes
         if force_download:
-            # If the registry file does not exist, download it, or if force_download is True, download it again
             with tempfile.TemporaryDirectory() as tmp_dir:
                 custom_registry_folder = Path(tmp_dir).joinpath("testing", branch)
                 custom_registry_folder.mkdir(parents=True, exist_ok=True)
diff --git a/tests/test_external_dataset_access.py b/tests/test_external_dataset_access.py
index d778daf6..bc7f96a3 100644
--- a/tests/test_external_dataset_access.py
+++ b/tests/test_external_dataset_access.py
@@ -7,7 +7,12 @@
 import xarray
 
 from ravenpy.extractors.forecasts import get_CASPAR_dataset, get_ECCC_dataset
-from ravenpy.testing.utils import default_testdata_cache, open_dataset, yangtze
+from ravenpy.testing.utils import (
+    default_testdata_cache,
+    default_testdata_version,
+    open_dataset,
+    yangtze,
+)
 
 
 @pytest.mark.online
@@ -47,21 +52,24 @@ def test_get_file_default_cache(self):
             header = f.read()
             assert ":FileType          rvi ASCII Raven 2.8.2" in header
 
-    @pytest.mark.xfail(
-        raises=urllib.error.HTTPError,
-        reason="Transitory failures expected",
-        strict=False,
-    )
-    def test_open_dataset(self):
+    def test_open_dataset(
+        self,
+        tmp_path,
+    ):
+        cache_dir = tmp_path / "yangtze_cache"
         ds = open_dataset(
             name="raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc",
-            yangtze_kwargs={"branch": self.branch, "force_download": True},
+            yangtze_kwargs={
+                "branch": self.branch,
+                "cache_dir": cache_dir,
+                "force_download": True,
+            },
         )
 
         assert (
-            Path(default_testdata_cache)
+            Path(cache_dir)
             .joinpath(
-                self.branch,
+                default_testdata_version,
                 "raven-gr4j-cemaneige",
                 "Salmon-River-Near-Prince-George_meteo_daily.nc",
             )

From 9cc2b6c8930ff6df6a382bf874a20163566bb868 Mon Sep 17 00:00:00 2001
From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com>
Date: Thu, 12 Jun 2025 17:39:22 -0400
Subject: [PATCH 26/29] update notebooks to be more user friendly, add missing
 registry entries, remove explicit platformdirs installation

---
 ...ct_geographical_watershed_properties.ipynb |  6 +++--
 .../03_Extracting_forcing_data.ipynb          |  8 ++++---
 .../04_Emulating_hydrological_models.ipynb    |  6 +++--
 .../05_Advanced_RavenPy_configuration.ipynb   |  2 +-
 docs/notebooks/06_Raven_calibration.ipynb     |  8 ++++---
 .../07_Making_and_using_hotstart_files.ipynb  |  8 ++++---
 ...tting_and_bias_correcting_CMIP6_data.ipynb |  5 ++--
 ...drological_impacts_of_climate_change.ipynb |  8 +++----
 docs/notebooks/10_Data_assimilation.ipynb     | 22 ++++++++++--------
 .../11_Climatological_ESP_forecasting.ipynb   |  8 +++----
 ...2_Performing_hindcasting_experiments.ipynb |  8 +++----
 .../Assess_probabilistic_flood_risk.ipynb     |  6 ++---
 ...omparing_hindcasts_and_ESP_forecasts.ipynb |  8 +++----
 .../Distributed_hydrological_modelling.ipynb  | 14 +++++------
 .../Hydrological_realtime_forecasting.ipynb   |  8 +++----
 docs/notebooks/Perform_Regionalization.ipynb  |  8 ++++---
 .../Running_HMETS_with_CANOPEX_dataset.ipynb  | 21 ++++++++---------
 docs/notebooks/Sensitivity_analysis.ipynb     | 19 +++++++++++----
 docs/notebooks/time_series_analysis.ipynb     | 23 +++++++++++--------
 environment-dev.yml                           |  1 -
 pyproject.toml                                |  1 -
 src/ravenpy/testing/registry.txt              |  5 ++++
 22 files changed, 114 insertions(+), 89 deletions(-)

diff --git a/docs/notebooks/02_Extract_geographical_watershed_properties.ipynb b/docs/notebooks/02_Extract_geographical_watershed_properties.ipynb
index f2363a0d..8fb13cb3 100644
--- a/docs/notebooks/02_Extract_geographical_watershed_properties.ipynb
+++ b/docs/notebooks/02_Extract_geographical_watershed_properties.ipynb
@@ -39,9 +39,11 @@
     "import rasterio\n",
     "from birdy import WPSClient\n",
     "\n",
-    "# Utility that simplifies fetching and caching test data hosted on GitHub\n",
+    "# Utility that simplifies working with test data hosted on GitHub\n",
     "from ravenpy.testing.utils import yangtze\n",
     "\n",
+    "get_file = yangtze().fetch\n",
+    "\n",
     "# This is the URL of the Geoserver that will perform the computations for us.\n",
     "url = os.environ.get(\n",
     "    \"WPS_URL\", \"https://pavics.ouranos.ca/twitcher/ows/proxy/raven/wps\"\n",
@@ -75,7 +77,7 @@
     "feature_url = \"input.geojson\"\n",
     "\"\"\"\n",
     "# However, to keep things tidy, we have also prepared a version that can be accessed easily for demonstration purposes:\n",
-    "feature_url = yangtze().fetch(\"notebook_inputs/input.geojson\")\n",
+    "feature_url = get_file(\"notebook_inputs/input.geojson\")\n",
     "df = gpd.read_file(feature_url)\n",
     "display(df)\n",
     "df.plot()"
diff --git a/docs/notebooks/03_Extracting_forcing_data.ipynb b/docs/notebooks/03_Extracting_forcing_data.ipynb
index 064812ed..f834d25c 100644
--- a/docs/notebooks/03_Extracting_forcing_data.ipynb
+++ b/docs/notebooks/03_Extracting_forcing_data.ipynb
@@ -31,8 +31,10 @@
     "import xarray as xr\n",
     "from clisops.core import subset\n",
     "\n",
-    "# Utility that simplifies fetching and caching test data hosted on GitHub\n",
-    "from ravenpy.testing.utils import yangtze"
+    "# Utility that simplifies working with test data hosted on GitHub\n",
+    "from ravenpy.testing.utils import yangtze\n",
+    "\n",
+    "get_file = yangtze().fetch"
    ]
   },
   {
@@ -64,7 +66,7 @@
     "\"\"\"\n",
     "# However, to keep things tidy, we have also prepared a version that can be accessed easily for\n",
     "# demonstration purposes:\n",
-    "basin_contour = yangtze().fetch(\"notebook_inputs/input.geojson\")\n",
+    "basin_contour = get_file(\"notebook_inputs/input.geojson\")\n",
     "\n",
     "# Also, we can specify which timeframe we want to extract. Here let's focus on a 10-year period\n",
     "reference_start_day = dt.datetime(1985, 12, 31)\n",
diff --git a/docs/notebooks/04_Emulating_hydrological_models.ipynb b/docs/notebooks/04_Emulating_hydrological_models.ipynb
index b09f0fa2..d935ab25 100644
--- a/docs/notebooks/04_Emulating_hydrological_models.ipynb
+++ b/docs/notebooks/04_Emulating_hydrological_models.ipynb
@@ -45,7 +45,9 @@
     "from ravenpy.config import commands as rc\n",
     "\n",
     "# Utility that simplifies fetching and caching test data hosted on GitHub\n",
-    "from ravenpy.testing.utils import yangtze"
+    "from ravenpy.testing.utils import yangtze\n",
+    "\n",
+    "get_file = yangtze().fetch"
    ]
   },
   {
@@ -150,7 +152,7 @@
     "\"\"\"\n",
     "\n",
     "# In our case, we will prefer to link to existing, pre-computed and locally stored files to keep things tidy:\n",
-    "ERA5_full = yangtze().fetch(\"notebook_inputs/ERA5_weather_data.nc\")\n",
+    "ERA5_full = get_file(\"notebook_inputs/ERA5_weather_data.nc\")\n",
     "\n",
     "\n",
     "# We need to define some configuration options that all models will need. See each line for more details on their use.\n",
diff --git a/docs/notebooks/05_Advanced_RavenPy_configuration.ipynb b/docs/notebooks/05_Advanced_RavenPy_configuration.ipynb
index 14612554..28a552f1 100644
--- a/docs/notebooks/05_Advanced_RavenPy_configuration.ipynb
+++ b/docs/notebooks/05_Advanced_RavenPy_configuration.ipynb
@@ -29,7 +29,7 @@
     "# Utility that simplifies fetching and caching data hosted on GitHub\n",
     "from ravenpy.testing.utils import yangtze\n",
     "\n",
-    "yangtze = yangtze()"
+    "get_file = yangtze()"
    ]
   },
   {
diff --git a/docs/notebooks/06_Raven_calibration.ipynb b/docs/notebooks/06_Raven_calibration.ipynb
index eb253164..75414499 100644
--- a/docs/notebooks/06_Raven_calibration.ipynb
+++ b/docs/notebooks/06_Raven_calibration.ipynb
@@ -34,9 +34,11 @@
     "from ravenpy.config import commands as rc\n",
     "from ravenpy.config import emulators\n",
     "\n",
-    "# Utility that simplifies fetching and caching test data hosted on GitHub\n",
+    "# Utility that simplifies working with test data hosted on GitHub\n",
     "from ravenpy.testing.utils import yangtze\n",
-    "from ravenpy.utilities.calibration import SpotSetup"
+    "from ravenpy.utilities.calibration import SpotSetup\n",
+    "\n",
+    "get_file = yangtze().fetch"
    ]
   },
   {
@@ -56,7 +58,7 @@
    "outputs": [],
    "source": [
     "# We get the netCDF for testing on a server. You can replace the yangtze method with a string containing the absolute or relative path to your own netCDF\n",
-    "nc_file = yangtze().fetch(\n",
+    "nc_file = get_file(\n",
     "    \"raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc\"\n",
     ")\n",
     "\n",
diff --git a/docs/notebooks/07_Making_and_using_hotstart_files.ipynb b/docs/notebooks/07_Making_and_using_hotstart_files.ipynb
index 903c7207..9f54d69c 100644
--- a/docs/notebooks/07_Making_and_using_hotstart_files.ipynb
+++ b/docs/notebooks/07_Making_and_using_hotstart_files.ipynb
@@ -42,8 +42,10 @@
     "from ravenpy.config import commands as rc\n",
     "from ravenpy.config import emulators\n",
     "\n",
-    "# Utility that simplifies fetching and caching test data hosted on GitHub\n",
-    "from ravenpy.testing.utils import yangtze"
+    "# Utility that simplifies working with test data hosted on GitHub\n",
+    "from ravenpy.testing.utils import yangtze\n",
+    "\n",
+    "get_file = yangtze().fetch"
    ]
   },
   {
@@ -67,7 +69,7 @@
     ")\n",
     "\n",
     "# Get dataset:\n",
-    "ERA5_full = yangtze().fetch(\"notebook_inputs/ERA5_weather_data.nc\")\n",
+    "ERA5_full = get_file(\"notebook_inputs/ERA5_weather_data.nc\")\n",
     "\n",
     "# Set alternative names for netCDF variables\n",
     "alt_names = {\n",
diff --git a/docs/notebooks/08_Getting_and_bias_correcting_CMIP6_data.ipynb b/docs/notebooks/08_Getting_and_bias_correcting_CMIP6_data.ipynb
index 42e8f21d..a064e0df 100644
--- a/docs/notebooks/08_Getting_and_bias_correcting_CMIP6_data.ipynb
+++ b/docs/notebooks/08_Getting_and_bias_correcting_CMIP6_data.ipynb
@@ -40,9 +40,10 @@
     "from clisops.core import average, subset\n",
     "from numba.core.errors import NumbaDeprecationWarning\n",
     "\n",
-    "# Utility that simplifies fetching and caching test data hosted on GitHub\n",
+    "# Utility that simplifies working with test data hosted on GitHub\n",
     "from ravenpy.testing.utils import yangtze\n",
     "\n",
+    "get_file = yangtze().fetch\n",
     "tmp = Path(tempfile.mkdtemp())\n",
     "\n",
     "warnings.simplefilter(\"ignore\", category=NumbaDeprecationWarning)"
@@ -78,7 +79,7 @@
     "# You can replace the getfile method by a string containing the path to your own geojson.\n",
     "\n",
     "# Get basin contour.\n",
-    "basin_contour = yangtze().fetch(\"notebook_inputs/input.geojson\")\n",
+    "basin_contour = get_file(\"notebook_inputs/input.geojson\")\n",
     "\n",
     "reference_start_day = dt.datetime(1980, 12, 31)\n",
     "reference_end_day = dt.datetime(1991, 1, 1)\n",
diff --git a/docs/notebooks/09_Hydrological_impacts_of_climate_change.ipynb b/docs/notebooks/09_Hydrological_impacts_of_climate_change.ipynb
index 4ca5467c..ca55a9f2 100644
--- a/docs/notebooks/09_Hydrological_impacts_of_climate_change.ipynb
+++ b/docs/notebooks/09_Hydrological_impacts_of_climate_change.ipynb
@@ -34,10 +34,10 @@
     "from ravenpy.config import commands as rc\n",
     "from ravenpy.config import emulators\n",
     "\n",
-    "# Utility that simplifies fetching and caching test data hosted on GitHub\n",
+    "# Utility that simplifies working with test data hosted on GitHub\n",
     "from ravenpy.testing.utils import yangtze\n",
     "\n",
-    "yangtze = yangtze()\n",
+    "get_file = yangtze().fetch\n",
     "\n",
     "warnings.filterwarnings(\"ignore\")"
    ]
@@ -74,7 +74,7 @@
     "# We get the netCDF for testing on a server. You can replace the getfile method by a string containing the path\n",
     "# to your own netCDF\n",
     "\n",
-    "reference_nc = yangtze.fetch(\"notebook_inputs/reference_dataset.nc\")\n",
+    "reference_nc = get_file(\"notebook_inputs/reference_dataset.nc\")\n",
     "\n",
     "# Alternate names for the data in the climate data NetCDF files\n",
     "alt_names = {\n",
@@ -138,7 +138,7 @@
     "end_date = dt.datetime(2090, 12, 31)\n",
     "\n",
     "# Get the future period dataset (path)\n",
-    "future_nc = yangtze.fetch(\"notebook_inputs/future_dataset.nc\")\n",
+    "future_nc = get_file(\"notebook_inputs/future_dataset.nc\")\n",
     "\n",
     "# Start a new model instance, again in this case a GR4JCN model emulator.\n",
     "m = emulators.GR4JCN(\n",
diff --git a/docs/notebooks/10_Data_assimilation.ipynb b/docs/notebooks/10_Data_assimilation.ipynb
index ea8dd42f..1dea9c17 100644
--- a/docs/notebooks/10_Data_assimilation.ipynb
+++ b/docs/notebooks/10_Data_assimilation.ipynb
@@ -18,13 +18,6 @@
     "We will first start by importing important packages, gathering important datasets and configuration settings as we have seen previously."
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -46,13 +39,22 @@
     "from ravenpy.config import emulators\n",
     "from ravenpy.config import options as o\n",
     "\n",
-    "# Utility that simplifies fetching and caching test data hosted on GitHub\n",
+    "# Utility that simplifies working with test data hosted on GitHub\n",
     "from ravenpy.testing.utils import yangtze\n",
     "\n",
-    "warnings.simplefilter(\"ignore\", category=NumbaDeprecationWarning)\n",
+    "get_file = yangtze().fetch\n",
     "\n",
+    "warnings.simplefilter(\"ignore\", category=NumbaDeprecationWarning)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
     "# Import hydrometeorological data\n",
-    "salmon_meteo = yangtze().fetch(\n",
+    "salmon_meteo = get_file(\n",
     "    \"raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc\"\n",
     ")\n",
     "\n",
diff --git a/docs/notebooks/11_Climatological_ESP_forecasting.ipynb b/docs/notebooks/11_Climatological_ESP_forecasting.ipynb
index e4a7b568..20a17599 100644
--- a/docs/notebooks/11_Climatological_ESP_forecasting.ipynb
+++ b/docs/notebooks/11_Climatological_ESP_forecasting.ipynb
@@ -31,11 +31,11 @@
     "from ravenpy.config import commands as rc\n",
     "from ravenpy.config import emulators\n",
     "\n",
-    "# Utility that simplifies fetching and caching test data hosted on GitHub\n",
+    "# Utility that simplifies working with test data hosted on GitHub\n",
     "from ravenpy.testing.utils import yangtze\n",
     "from ravenpy.utilities import forecasting\n",
     "\n",
-    "yangtze = yangtze()"
+    "get_file = yangtze().fetch"
    ]
   },
   {
@@ -57,9 +57,7 @@
    "source": [
     "# Get the selected watershed's time series. You can use your own time-series for your catchment by replacing\n",
     "# this line with the name / path of your input file.\n",
-    "ts = yangtze.fetch(\n",
-    "    \"raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc\"\n",
-    ")\n",
+    "ts = get_file(\"raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc\")\n",
     "\n",
     "# This is the forecast start date, on which the forecasts will be launched.\n",
     "start_date = dt.datetime(1980, 6, 1)\n",
diff --git a/docs/notebooks/12_Performing_hindcasting_experiments.ipynb b/docs/notebooks/12_Performing_hindcasting_experiments.ipynb
index 3664a183..6370a45e 100644
--- a/docs/notebooks/12_Performing_hindcasting_experiments.ipynb
+++ b/docs/notebooks/12_Performing_hindcasting_experiments.ipynb
@@ -33,11 +33,11 @@
     "from ravenpy.config.emulators import GR4JCN\n",
     "from ravenpy.extractors.forecasts import get_CASPAR_dataset\n",
     "\n",
-    "# Utility that simplifies fetching and caching test data hosted on GitHub\n",
+    "# Utility that simplifies working with test data hosted on GitHub\n",
     "from ravenpy.testing.utils import yangtze\n",
     "from ravenpy.utilities import forecasting\n",
     "\n",
-    "yangtze = yangtze()\n",
+    "get_file = yangtze().fetch\n",
     "\n",
     "tmp = Path(tempfile.mkdtemp())"
    ]
@@ -64,7 +64,7 @@
     "ts_hindcast, _ = get_CASPAR_dataset(\"GEPS\", hdate)\n",
     "\n",
     "# Get basin contour\n",
-    "basin_contour = yangtze.fetch(\"notebook_inputs/salmon_river.geojson\")\n",
+    "basin_contour = get_file(\"notebook_inputs/salmon_river.geojson\")\n",
     "\n",
     "# Subset the data for the region of interest and take the mean to get a single vector\n",
     "with xr.set_options(keep_attrs=True):\n",
@@ -109,7 +109,7 @@
     "# )\n",
     "\n",
     "# TODO: We will use ERA5 data for Salmon River because it covers the correct period.\n",
-    "ts = yangtze.fetch(\"notebook_inputs/ERA5_weather_data_Salmon.nc\")\n",
+    "ts = get_file(\"notebook_inputs/ERA5_weather_data_Salmon.nc\")\n",
     "\n",
     "# This is the model start date, on which the simulation will be launched for a certain duration\n",
     "# to set up the initial states. We will then save the final states as a launching point for the\n",
diff --git a/docs/notebooks/Assess_probabilistic_flood_risk.ipynb b/docs/notebooks/Assess_probabilistic_flood_risk.ipynb
index e0d654cf..f98e2b82 100644
--- a/docs/notebooks/Assess_probabilistic_flood_risk.ipynb
+++ b/docs/notebooks/Assess_probabilistic_flood_risk.ipynb
@@ -26,10 +26,10 @@
     "from matplotlib import pyplot as plt\n",
     "from numba.core.errors import NumbaDeprecationWarning\n",
     "\n",
-    "# Utility that simplifies fetching and caching test data hosted on GitHub\n",
+    "# Utility that simplifies working with test data hosted on GitHub\n",
     "from ravenpy.testing.utils import yangtze\n",
     "\n",
-    "yangtze = yangtze()\n",
+    "get_file = yangtze().fetch\n",
     "\n",
     "warnings.simplefilter(\"ignore\", category=NumbaDeprecationWarning)"
    ]
@@ -51,7 +51,7 @@
    "source": [
     "# Get the data that we will be using for the demonstration.\n",
     "file = \"raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc\"\n",
-    "ts = xr.open_dataset(yangtze.fetch(file)).qobs\n",
+    "ts = xr.open_dataset(get_file(file)).qobs\n",
     "\n",
     "# Perform the frequency analysis for various return periods. We compute 2, 5, 10, 25, 50 and 100 year return\n",
     "# periods, but later on we will only compare the forecasts to the 2 year return period.\n",
diff --git a/docs/notebooks/Comparing_hindcasts_and_ESP_forecasts.ipynb b/docs/notebooks/Comparing_hindcasts_and_ESP_forecasts.ipynb
index b74dd54b..7d9d9249 100644
--- a/docs/notebooks/Comparing_hindcasts_and_ESP_forecasts.ipynb
+++ b/docs/notebooks/Comparing_hindcasts_and_ESP_forecasts.ipynb
@@ -32,11 +32,11 @@
     "from ravenpy.config.emulators import GR4JCN\n",
     "from ravenpy.extractors.forecasts import get_CASPAR_dataset\n",
     "\n",
-    "# Utility that simplifies fetching and caching test data hosted on GitHub\n",
+    "# Utility that simplifies working with test data hosted on GitHub\n",
     "from ravenpy.testing.utils import yangtze\n",
     "from ravenpy.utilities import forecasting\n",
     "\n",
-    "yangtze = yangtze()"
+    "get_file = yangtze().fetch"
    ]
   },
   {
@@ -60,7 +60,7 @@
     "\n",
     "# Define the catchment contour. Here we use the Salmon River file we previously generated using the Delineator\n",
     "# in Tutorial Notebook 01.\n",
-    "basin_contour = yangtze.fetch(\"notebook_inputs/salmon_river.geojson\")\n",
+    "basin_contour = get_file(\"notebook_inputs/salmon_river.geojson\")\n",
     "\n",
     "# Define some of the catchment properties. Could also be replaced by a call to the properties WPS as in\n",
     "# the Tutorial Notebook 02.\n",
@@ -74,7 +74,7 @@
     "\n",
     "# Observed weather data for the Salmon river. We extracted this using Tutorial Notebook 03 and the\n",
     "# salmon_river.geojson file as the contour.\n",
-    "ts = yangtze.fetch(\"notebook_inputs/ERA5_weather_data_Salmon.nc\")\n",
+    "ts = get_file(\"notebook_inputs/ERA5_weather_data_Salmon.nc\")\n",
     "\n",
     "# Set alternative names for netCDF variables\n",
     "alt_names = {\n",
diff --git a/docs/notebooks/Distributed_hydrological_modelling.ipynb b/docs/notebooks/Distributed_hydrological_modelling.ipynb
index 6e968009..bc38c852 100644
--- a/docs/notebooks/Distributed_hydrological_modelling.ipynb
+++ b/docs/notebooks/Distributed_hydrological_modelling.ipynb
@@ -33,10 +33,10 @@
     "    upstream_from_coords,\n",
     ")\n",
     "\n",
-    "# Utility that simplifies fetching and caching test data hosted on GitHub\n",
+    "# Utility that simplifies working with test data hosted on GitHub\n",
     "from ravenpy.testing.utils import yangtze\n",
     "\n",
-    "yangtze = yangtze()\n",
+    "get_file = yangtze().fetch\n",
     "\n",
     "tmp_path = Path(tempfile.mkdtemp())"
    ]
@@ -54,8 +54,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Get path to pre-downloaded BasinMaker Routing product database for our catchment\n",
-    "shp_path = yangtze.fetch(\"basinmaker/drainage_region_0175_v2-1/finalcat_info_v2-1.zip\")\n",
+    "# Get path to a pre-downloaded BasinMaker Routing product database for our catchment\n",
+    "shp_path = get_file(\"basinmaker/drainage_region_0175_v2-1/finalcat_info_v2-1.zip\")\n",
     "\n",
     "# Note that for this to work, the coordinates must be in the small\n",
     "# BasinMaker example (drainage_region_0175)\n",
@@ -90,7 +90,7 @@
    "outputs": [],
    "source": [
     "# Streamflow observations file\n",
-    "qobs_fn = yangtze.fetch(\"matapedia/Qobs_Matapedia_01BD009.nc\")\n",
+    "qobs_fn = get_file(\"matapedia/Qobs_Matapedia_01BD009.nc\")\n",
     "\n",
     "# Make an observation gauge from the observed streamflow\n",
     "qobs = rc.ObservationData.from_nc(qobs_fn, alt_names=(\"discharge\",))"
@@ -110,7 +110,7 @@
    "outputs": [],
    "source": [
     "# Meteo observations file\n",
-    "meteo_grid_fn = yangtze.fetch(\"matapedia/Matapedia_meteo_data_stations.nc\")\n",
+    "meteo_grid_fn = get_file(\"matapedia/Matapedia_meteo_data_stations.nc\")\n",
     "\n",
     "# Alternate names for variables in the files\n",
     "alt_names = {\n",
@@ -172,7 +172,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Show the hydrographs object\n",
+    "# Show the hydrograph object\n",
     "display(distributed_outputs.hydrograph)"
    ]
   },
diff --git a/docs/notebooks/Hydrological_realtime_forecasting.ipynb b/docs/notebooks/Hydrological_realtime_forecasting.ipynb
index 5da47b07..8b148ce6 100644
--- a/docs/notebooks/Hydrological_realtime_forecasting.ipynb
+++ b/docs/notebooks/Hydrological_realtime_forecasting.ipynb
@@ -30,11 +30,11 @@
     "from ravenpy.config import emulators\n",
     "from ravenpy.extractors.forecasts import get_recent_ECCC_forecast\n",
     "\n",
-    "# Utility that simplifies fetching and caching test data hosted on GitHub\n",
+    "# Utility that simplifies working with test data hosted on GitHub\n",
     "from ravenpy.testing.utils import yangtze\n",
     "from ravenpy.utilities import forecasting\n",
     "\n",
-    "yangtze = yangtze()"
+    "get_file = yangtze().fetch"
    ]
   },
   {
@@ -45,7 +45,7 @@
    "source": [
     "# Define the catchment contour. Here we use the Salmon River file we previously generated using the Delineator\n",
     "# in Tutorial Notebook 01.\n",
-    "basin_contour = yangtze().fetch(\"notebook_inputs/salmon_river.geojson\")\n",
+    "basin_contour = get_file(\"notebook_inputs/salmon_river.geojson\")\n",
     "\n",
     "# Get the most recent ECCC forecast data from the Geomet extraction tool:\n",
     "forecast_data = get_recent_ECCC_forecast(\n",
@@ -81,7 +81,7 @@
     "\n",
     "# Observed weather data for the Salmon river. We extracted this using Tutorial Notebook 03 and the\n",
     "# salmon_river.geojson file as the contour. Used for the model warm-up.\n",
-    "ts = yangtze().fetch(\"notebook_inputs/ERA5_weather_data_Salmon.nc\")\n",
+    "ts = get_file(\"notebook_inputs/ERA5_weather_data_Salmon.nc\")\n",
     "\n",
     "# Set alternative names for netCDF variables\n",
     "alt_names = {\n",
diff --git a/docs/notebooks/Perform_Regionalization.ipynb b/docs/notebooks/Perform_Regionalization.ipynb
index 3b4a7034..6ac40d05 100644
--- a/docs/notebooks/Perform_Regionalization.ipynb
+++ b/docs/notebooks/Perform_Regionalization.ipynb
@@ -26,13 +26,15 @@
     "from ravenpy.config import commands as rc\n",
     "from ravenpy.config import emulators\n",
     "\n",
-    "# Utility that simplifies fetching and caching test data hosted on GitHub\n",
+    "# Utility that simplifies working with test data hosted on GitHub\n",
     "from ravenpy.testing.utils import yangtze\n",
     "from ravenpy.utilities.regionalization import (\n",
     "    read_gauged_params,\n",
     "    read_gauged_properties,\n",
     "    regionalize,\n",
-    ")"
+    ")\n",
+    "\n",
+    "get_file = yangtze().fetch"
    ]
   },
   {
@@ -49,7 +51,7 @@
    "outputs": [],
    "source": [
     "# Get the forcing dataset for the ungauged watershed\n",
-    "ts = yangtze().fetch(\"notebook_inputs/ERA5_weather_data_Salmon.nc\")\n",
+    "ts = get_file(\"notebook_inputs/ERA5_weather_data_Salmon.nc\")\n",
     "\n",
     "# Get HRUs of ungauged watershed\n",
     "hru = dict(\n",
diff --git a/docs/notebooks/Running_HMETS_with_CANOPEX_dataset.ipynb b/docs/notebooks/Running_HMETS_with_CANOPEX_dataset.ipynb
index ca1776fe..6e20d989 100644
--- a/docs/notebooks/Running_HMETS_with_CANOPEX_dataset.ipynb
+++ b/docs/notebooks/Running_HMETS_with_CANOPEX_dataset.ipynb
@@ -28,10 +28,12 @@
     "from ravenpy.config import commands as rc\n",
     "from ravenpy.config import emulators\n",
     "\n",
-    "# Utility that simplifies fetching and caching test data hosted on GitHub\n",
+    "# Utility that simplifies working with test data hosted on GitHub\n",
     "from ravenpy.testing.utils import yangtze\n",
     "from ravenpy.utilities.calibration import SpotSetup\n",
     "\n",
+    "get_file = yangtze().fetch\n",
+    "\n",
     "# Make a temporary folder\n",
     "tmp = Path(tempfile.mkdtemp())\n",
     "\n",
@@ -87,9 +89,7 @@
    "source": [
     "# With this info, we can gather some properties from the CANOPEX database. This same database is used for\n",
     "# regionalization, so let's query it there where more information is available:\n",
-    "tmp = pd.read_csv(\n",
-    "    yangtze().fetch(\"regionalisation_data/gauged_catchment_properties.csv\")\n",
-    ")\n",
+    "tmp = pd.read_csv(get_file(\"regionalisation_data/gauged_catchment_properties.csv\"))\n",
     "\n",
     "basin_area = float(tmp[\"area\"][watershedID])\n",
     "basin_latitude = float(tmp[\"latitude\"][watershedID])\n",
@@ -275,12 +275,12 @@
     "diag = spot_setup.diagnostics\n",
     "\n",
     "# Print the NSE and the parameter set in 2 different ways:\n",
-    "print(\"Nash-Sutcliffe value is: \" + str(diag[\"DIAG_NASH_SUTCLIFFE\"]))\n",
+    "print(f\"Nash-Sutcliffe value is: {diag['DIAG_NASH_SUTCLIFFE']}\")\n",
     "\n",
     "# Get all the values of each iteration\n",
     "results = sampler.getdata()\n",
     "\n",
-    "# Get the raw resutlts directly in an array\n",
+    "# Get the raw results directly in an array\n",
     "params = spotpy.analyser.get_best_parameterset(results)[0]\n",
     "params"
    ]
@@ -328,12 +328,9 @@
    "outputs": [],
    "source": [
     "# You can also get statistics from the data directly.\n",
-    "print(\"Max: \", q.max().values)\n",
-    "print(\"Mean: \", q.mean().values)\n",
-    "print(\n",
-    "    \"Monthly means: \",\n",
-    "    q.groupby(\"time.month\").mean(dim=\"time\").values,\n",
-    ")"
+    "print(f\"Max: {q.max().values}\")\n",
+    "print(f\"Mean: {q.mean().values}\")\n",
+    "print(f\"Monthly means: {q.groupby('time.month').mean(dim='time').values}\")"
    ]
   },
   {
diff --git a/docs/notebooks/Sensitivity_analysis.ipynb b/docs/notebooks/Sensitivity_analysis.ipynb
index a3604a8c..c7ed8421 100644
--- a/docs/notebooks/Sensitivity_analysis.ipynb
+++ b/docs/notebooks/Sensitivity_analysis.ipynb
@@ -23,9 +23,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "tags": []
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "# Import required packages:\n",
@@ -42,11 +40,22 @@
     "from ravenpy.config import commands as rc\n",
     "from ravenpy.config import emulators\n",
     "\n",
-    "# Utility that simplifies fetching and caching test data hosted on GitHub\n",
+    "# Utility that simplifies working with test data hosted on GitHub\n",
     "from ravenpy.testing.utils import yangtze\n",
     "\n",
+    "get_file = yangtze().fetch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
     "# We get the netCDF from a server. You can replace the `get_file` function by a string containing the path to your own netCDF.\n",
-    "nc_file = yangtze(branch=\"new-system\").fetch(\n",
+    "nc_file = get_file(\n",
     "    \"raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc\"\n",
     ")\n",
     "\n",
diff --git a/docs/notebooks/time_series_analysis.ipynb b/docs/notebooks/time_series_analysis.ipynb
index 0c3ce76a..d002b8c9 100644
--- a/docs/notebooks/time_series_analysis.ipynb
+++ b/docs/notebooks/time_series_analysis.ipynb
@@ -20,14 +20,24 @@
     "import xclim\n",
     "from pandas.plotting import register_matplotlib_converters\n",
     "\n",
-    "# Utility that simplifies fetching and caching test data hosted on GitHub\n",
+    "# Utility that simplifies working with test data hosted on GitHub\n",
     "from ravenpy.testing.utils import yangtze\n",
     "\n",
-    "register_matplotlib_converters()\n",
+    "get_file = yangtze().fetch\n",
     "\n",
+    "register_matplotlib_converters()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
     "# Get the file we will use to analyze flows\n",
     "file = \"hydro_simulations/raven-gr4j-cemaneige-sim_hmets-0_Hydrographs.nc\"\n",
-    "ds = yangtze().fetch(file)"
+    "ds = get_file(file)\n",
+    "ds"
    ]
   },
   {
@@ -39,13 +49,6 @@
     "The base flow index is the minimum 7-day average flow divided by the mean flow."
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/environment-dev.yml b/environment-dev.yml
index aa82e9d2..52633221 100644
--- a/environment-dev.yml
+++ b/environment-dev.yml
@@ -22,7 +22,6 @@ dependencies:
   - owslib >=0.29.1
   - pandas >=2.2.0
   - pint >=0.24.4
-  - platformdirs >=4.3.6
   - pydantic >=2.11
   - pydap >=3.4.0,<3.5.5  # pydap 3.5.5 is not currently supported by `xarray` (v2025.3.1)
   - pymetalink >=6.5.2
diff --git a/pyproject.toml b/pyproject.toml
index 2933d192..fe461022 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -48,7 +48,6 @@ dependencies = [
   "owslib >=0.29.1",
   "pandas >=2.2.0",
   "pint >=0.24.4",
-  "platformdirs >=4.3.6",
   "pydantic >=2.11",
   "pydap >=3.4.0,<3.5.5", # pydap 3.5.5 is not currently supported by `xarray` (v2025.3.1)
   "pymbolic >=2024.2",
diff --git a/src/ravenpy/testing/registry.txt b/src/ravenpy/testing/registry.txt
index 5c03c34d..3fa00d8b 100644
--- a/src/ravenpy/testing/registry.txt
+++ b/src/ravenpy/testing/registry.txt
@@ -18,6 +18,10 @@ matapedia/Matapedia_meteo_data_2D.nc sha256:1c4a10f7d7964e7943838c9b7725723a8074
 matapedia/Matapedia_meteo_data_stations.nc sha256:20892444705b504598cadb816694626a93d46a7edac89f6068bc4e476b21e451
 matapedia/Qobs_Matapedia_01BD009.nc sha256:d94e6776954b4e4d05ce185889799b255db7e300f9e438129f53cfde6cb85b07
 nasa/Mars_MGS_MOLA_DEM_georeferenced_region_compressed.tiff sha256:3190c6799724b37c11c326db25cf3cebf57d87e0133e818e6aba18da91d43453
+notebook_inputs/ERA5_weather_data.nc sha256:a9906c347717080ab5d1b1f3dfc38cfdfe7a1f0a7a5063f403db87b15d063d19
+notebook_inputs/future_dataset.nc sha256:bc1340d3c3b22d4cf32b70c6b2c7b336332b31c03428f8ad57ed570002cee7b5
+notebook_inputs/input.geojson sha256:fd6ed2fcc4c22ceeb3dfecd5439b6a1236e26785f141e67b6140d3e6bf88f22f
+notebook_inputs/salmon_river.geojson sha256:93f7238ef44c0f15b7abce4c9e6cb6afdd1663530df6eae9b430c9770a4b1f52
 nrcan/NRCAN_1971-1972_subset.nc sha256:7de9def968a7f3d804255be2b86976e47b0edc3a80e5bf4ad608c1adf2f01d40
 nrcan/NRCAN_2006-2007_subset.nc sha256:a029261f1b74cd688e0b7f70ab3a770e9e9e4f5f005b3a370707803550c1c646
 polygons/Basin_10.zip sha256:d611ec4d082fc7a26fbcfbcd843c04c155e66f2bc044896374218a19af4fc6d9
@@ -57,5 +61,6 @@ raven-routing-sample/WSC02LE024.nc sha256:e76aa242b06eae78c6a9980592550abd154496
 raven-routing-sample/era5-test-dataset-crop.nc sha256:9e3088282022372c4737b04d3360ea88982d749d234ca24ca07743e0f6b28bde
 raven-routing-sample/finalcat_hru_info.zip sha256:dd6818455d9e967d000d4fbc3f33c5b68af9d1babe75b093e2bb29847acaf59a
 raven-routing-sample/lievre_hrus_v21.zip sha256:2dc6d6ab21f5b009e437da4324438d3cea3934ca0cfd72a0ed69a1deb3c8b6e3
+regionalisation_data/tests/gauged_catchment_properties.csv sha256:3a5bc6bed7a1fab9a1870c59fd0f860beb9ecc09236fbe3c478c915143c8a04d
 watershed_vector/Basin_test.zip sha256:c5c02f7fe8be37c22813715cdf30e03eb163113a6610d862ce280a2203895a7e
 watershed_vector/LSJ_LL.zip sha256:036d4a607a6d1f11eb4e73915be68fcad11477fecc927742f5b53d21d105bc5b

From 934a5b05c76641ef9d293adf713318f3ebe5400e Mon Sep 17 00:00:00 2001
From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com>
Date: Fri, 13 Jun 2025 09:43:20 -0400
Subject: [PATCH 27/29] use regex, hide yangtze_kwargs

---
 src/ravenpy/testing/utils.py | 36 +++++++++++++++---------------------
 tests/conftest.py            |  4 ++--
 2 files changed, 17 insertions(+), 23 deletions(-)

diff --git a/src/ravenpy/testing/utils.py b/src/ravenpy/testing/utils.py
index 92d9b362..13f8c557 100644
--- a/src/ravenpy/testing/utils.py
+++ b/src/ravenpy/testing/utils.py
@@ -33,7 +33,8 @@
     import pooch
 except ImportError:
     warnings.warn(
-        "The `pooch` library is not installed. The default cache directory for testing data will not be set."
+        "The `pooch` library is not installed. "
+        "The default cache directory for testing data will not be set."
     )
     pooch = None
 
@@ -153,14 +154,9 @@ def _get_ravenpy_dependencies():
         ravenpy_metadata = ilm.metadata("ravenpy")
         requires = ravenpy_metadata.get_all("Requires-Dist")
         requires = [
-            req.split("[")[0]
-            .split(";")[0]
-            .split(">")[0]
-            .split("<")[0]
-            .split("=")[0]
-            .split("!")[0]
-            .strip()
+            re.match(r"^[A-Za-z0-9_.\-]+", req).group(0)
             for req in requires
+            if re.match(r"^[A-Za-z0-9_.\-]+", req)
         ]
         sorted_deps = sorted(list(set(requires) - {"ravenpy"}))
 
@@ -322,13 +318,13 @@ def yangtze(
     Notes
     -----
     There are three environment variables that can be used to control the behaviour of this registry:
-        - ``RAVENPY_TESTDATA_CACHE_DIR``: If this environment variable is set, it will be used as the
+        - ``RAVEN_TESTDATA_CACHE_DIR``: If this environment variable is set, it will be used as the
           base directory to store the data files.
           The directory should be an absolute path (i.e. it should start with ``/``).
           Otherwise, the default location will be used (based on ``platformdirs``, see :py:func:`pooch.os_cache`).
-        - ``RAVENPY_TESTDATA_REPO_URL``: If this environment variable is set, it will be used as the URL of
+        - ``RAVEN_TESTDATA_REPO_URL``: If this environment variable is set, it will be used as the URL of
           the repository to use when fetching datasets. Otherwise, the default repository will be used.
-        - ``RAVENPY_TESTDATA_BRANCH``: If this environment variable is set, it will be used as the branch of
+        - ``RAVEN_TESTDATA_BRANCH``: If this environment variable is set, it will be used as the branch of
           the repository to use when fetching datasets. Otherwise, the default branch will be used.
 
     Examples
@@ -396,7 +392,7 @@ def _downloader(
 
 def open_dataset(
     name: str,
-    yangtze_kwargs: dict[str, Path | str | bool] | None = None,
+    _yangtze_kwargs: dict[str, Path | str | bool] | None = None,
     **xr_kwargs: Any,
 ) -> Dataset:
     r"""
@@ -408,7 +404,7 @@ def open_dataset(
     ----------
     name : str
         Name of the file containing the dataset.
-    yangtze_kwargs : dict
+    _yangtze_kwargs : dict
         Keyword arguments passed to the yangtze function.
     **xr_kwargs : Any
         Keyword arguments passed to xarray.open_dataset.
@@ -423,9 +419,9 @@ def open_dataset(
     xarray.open_dataset : Open and read a dataset from a file or file-like object.
     yangtze : Pooch wrapper for accessing the RavenPy testing data.
     """
-    if yangtze_kwargs is None:
+    if _yangtze_kwargs is None:
         yangtze_kwargs = {}
-    return _open_dataset(yangtze(**yangtze_kwargs).fetch(name), **xr_kwargs)
+    return _open_dataset(yangtze(**_yangtze_kwargs).fetch(name), **xr_kwargs)
 
 
 def populate_testing_data(
@@ -445,7 +441,7 @@ def populate_testing_data(
     repo : str, optional
         URL of the repository to use when fetching testing datasets.
     branch : str, optional
-        Branch of ravenpy-testdata to use when fetching testing datasets.
+        Branch of raven-testdata to use when fetching testing datasets.
     retry : int
         Number of times to retry downloading the files in case of failure. Default: 3.
     local_cache : Path
@@ -475,10 +471,8 @@ def populate_testing_data(
             errored_files.append(file)
 
     if errored_files:
-        logging.error(
-            "The following files were unable to be downloaded: %s",
-            errored_files,
-        )
+        msg = f"The following files were unable to be downloaded: {errored_files}"
+        logging.error(msg)
 
 
 def gather_testing_data(
@@ -508,7 +502,7 @@ def gather_testing_data(
     if _cache_dir is None:
         raise ValueError(
             "The cache directory must be set. "
-            "Please set the `cache_dir` parameter or the `RAVENPY_DATA_DIR` environment variable."
+            "Please set the `cache_dir` parameter or the `RAVEN_TESTDATA_CACHE_DIR` environment variable."
         )
     cache_dir = Path(_cache_dir)
 
diff --git a/tests/conftest.py b/tests/conftest.py
index d2103f97..574c641c 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -51,7 +51,7 @@ def yangtze(threadsafe_data_dir, worker_id):
 @pytest.fixture(scope="session")
 def open_dataset(threadsafe_data_dir, worker_id):
     def _open_session_scoped_file(file: str | os.PathLike, **xr_kwargs):
-        yangtze_kwargs = {
+        _yangtze_kwargs = {
             "branch": TESTDATA_BRANCH,
             "repo": TESTDATA_REPO_URL,
             "cache_dir": (
@@ -62,7 +62,7 @@ def _open_session_scoped_file(file: str | os.PathLike, **xr_kwargs):
         xr_kwargs.setdefault("engine", "h5netcdf")
         return _open_dataset(
             file,
-            yangtze_kwargs=yangtze_kwargs,
+            _yangtze_kwargs=_yangtze_kwargs,
             **xr_kwargs,
         )
 

From 2ba6afd7e046caede4fc1fe47412f1de8e518a86 Mon Sep 17 00:00:00 2001
From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com>
Date: Fri, 13 Jun 2025 11:31:32 -0400
Subject: [PATCH 28/29] move convert functions, fix test

---
 src/ravenpy/testing/helpers.py        | 97 +++------------------------
 tests/conftest.py                     | 74 +++++++++++++++++++-
 tests/test_external_dataset_access.py |  2 +-
 3 files changed, 85 insertions(+), 88 deletions(-)

diff --git a/src/ravenpy/testing/helpers.py b/src/ravenpy/testing/helpers.py
index cc985c61..9ffc076d 100644
--- a/src/ravenpy/testing/helpers.py
+++ b/src/ravenpy/testing/helpers.py
@@ -5,8 +5,6 @@
 import xarray as xr
 
 __all__ = [
-    "convert_2d",
-    "convert_3d",
     "count_pixels",
     "make_bnds",
     "synthetic_gr4j_inputs",
@@ -27,22 +25,6 @@ def count_pixels(stats: dict, numeric_categories=False) -> int:
     return category_counts
 
 
-def synthetic_gr4j_inputs(path):
-    time = pd.date_range(start="2000-07-01", end="2002-07-01", freq="D")
-
-    pr = 3 * np.ones(len(time))
-    pr = xr.DataArray(pr, coords={"time": time}, dims="time", name="pr")
-    pr.to_netcdf(Path(path).joinpath("pr.nc"))
-
-    tas = 280 + 20 * np.cos(np.arange(len(time)) * 2 * np.pi / 365.0)
-    tas = xr.DataArray(tas, coords={"time": time}, dims="time", name="tas")
-    tas.to_netcdf(Path(path).joinpath("tas.nc"))
-
-    evap = 3 + 3 * np.cos(-30 + np.arange(len(time)) * 2 * np.pi / 365.0)
-    evap = xr.DataArray(evap, coords={"time": time}, dims="time", name="evap")
-    evap.to_netcdf(Path(path).joinpath("evap.nc"))
-
-
 def make_bnds(params, delta):
     """
     Return low and high parameter bounds by subtracting and adding delta*params to params.
@@ -64,74 +46,17 @@ def make_bnds(params, delta):
     return tuple(arr - d), tuple(arr + d)
 
 
-def convert_2d(fn):
-    """Take the 1D Salmon time series and convert it to a 2D time series.
-
-    Example
-    -------
-    >>> fn = "./testdata/raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc"
-    >>> fn2 = "./testdata/raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily_2d.nc"
-    >>> _convert_2d(fn).to_netcdf(fn2, "w")
-    """
-    features = {
-        "name": "Salmon",
-        "area": 4250.6,
-        "elevation": 843.0,
-        "latitude": 54.4848,
-        "longitude": -123.3659,
-    }
-    ds = xr.open_dataset(fn, decode_times=False).rename({"nstations": "region"})
-
-    out = xr.Dataset(
-        coords={
-            "lon": ds.lon.expand_dims("lon").squeeze("region"),
-            "lat": ds.lat.expand_dims("lat").squeeze("region"),
-            "time": ds.time,
-        }
-    )
-
-    for v in ds.data_vars:
-        if v not in ["lon", "lat"]:
-            out[v] = ds[v].expand_dims("region", axis=1)
-
-    # Add geometry feature variables
-    for key, val in features.items():
-        out[key] = xr.DataArray(name=key, data=[val], dims="region")
-
-    return out
-
-
-def convert_3d(fn):
-    """Take the 1D Salmon time series and convert it to a 3D time series.
-
-    Example
-    -------
-    >>> fn = "./testdata/raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc"
-    >>> fn3 = "./testdata/raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily_3d.nc"
-    >>> _convert_3d(fn).to_netcdf(fn3, "w")
-    """
-    elevation = [[843.0]]
-    ds = xr.open_dataset(fn, decode_times=False)
-
-    out = xr.Dataset(
-        coords={
-            "lon": ds.lon.expand_dims("lon").squeeze("nstations"),
-            "lat": ds.lat.expand_dims("lat").squeeze("nstations"),
-            "time": ds.time,
-        }
-    )
+def synthetic_gr4j_inputs(path):
+    time = pd.date_range(start="2000-07-01", end="2002-07-01", freq="D")
 
-    for v in ds.data_vars:
-        if v not in ["lon", "lat", "time"]:
-            out[v] = ds[v]
-            out[v] = out[v].expand_dims(
-                ["lon", "lat"]
-            )  # Needs to be in other step to keep attributes
+    pr = 3 * np.ones(len(time))
+    pr = xr.DataArray(pr, coords={"time": time}, dims="time", name="pr")
+    pr.to_netcdf(Path(path).joinpath("pr.nc"))
 
-    out["elevation"] = xr.DataArray(
-        data=elevation,
-        dims=["lon", "lat"],
-        attrs={"units": "m", "standard_name": "altitude"},
-    )
+    tas = 280 + 20 * np.cos(np.arange(len(time)) * 2 * np.pi / 365.0)
+    tas = xr.DataArray(tas, coords={"time": time}, dims="time", name="tas")
+    tas.to_netcdf(Path(path).joinpath("tas.nc"))
 
-    return out
+    evap = 3 + 3 * np.cos(-30 + np.arange(len(time)) * 2 * np.pi / 365.0)
+    evap = xr.DataArray(evap, coords={"time": time}, dims="time", name="evap")
+    evap.to_netcdf(Path(path).joinpath("evap.nc"))
diff --git a/tests/conftest.py b/tests/conftest.py
index 574c641c..18c3e93f 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -17,7 +17,6 @@
 )
 from xclim.indicators.generic import fit, stats
 
-from ravenpy.testing.helpers import convert_2d, convert_3d
 from ravenpy.testing.utils import (
     TESTDATA_BRANCH,
     TESTDATA_CACHE_DIR,
@@ -32,6 +31,79 @@
 from ravenpy.testing.utils import yangtze as _yangtze
 
 
+def convert_2d(fn):
+    """Take the 1D Salmon time series and convert it to a 2D time series.
+
+    Example
+    -------
+    >>> fn = "./testdata/raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc"
+    >>> fn2 = "./testdata/raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily_2d.nc"
+    >>> _convert_2d(fn).to_netcdf(fn2, "w")
+    """
+    features = {
+        "name": "Salmon",
+        "area": 4250.6,
+        "elevation": 843.0,
+        "latitude": 54.4848,
+        "longitude": -123.3659,
+    }
+    ds = xr.open_dataset(fn, decode_times=False).rename({"nstations": "region"})
+
+    out = xr.Dataset(
+        coords={
+            "lon": ds.lon.expand_dims("lon").squeeze("region"),
+            "lat": ds.lat.expand_dims("lat").squeeze("region"),
+            "time": ds.time,
+        }
+    )
+
+    for v in ds.data_vars:
+        if v not in ["lon", "lat"]:
+            out[v] = ds[v].expand_dims("region", axis=1)
+
+    # Add geometry feature variables
+    for key, val in features.items():
+        out[key] = xr.DataArray(name=key, data=[val], dims="region")
+
+    return out
+
+
+def convert_3d(fn):
+    """Take the 1D Salmon time series and convert it to a 3D time series.
+
+    Example
+    -------
+    >>> fn = "./testdata/raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc"
+    >>> fn3 = "./testdata/raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily_3d.nc"
+    >>> _convert_3d(fn).to_netcdf(fn3, "w")
+    """
+    elevation = [[843.0]]
+    ds = xr.open_dataset(fn, decode_times=False)
+
+    out = xr.Dataset(
+        coords={
+            "lon": ds.lon.expand_dims("lon").squeeze("nstations"),
+            "lat": ds.lat.expand_dims("lat").squeeze("nstations"),
+            "time": ds.time,
+        }
+    )
+
+    for v in ds.data_vars:
+        if v not in ["lon", "lat", "time"]:
+            out[v] = ds[v]
+            out[v] = out[v].expand_dims(
+                ["lon", "lat"]
+            )  # Needs to be in other step to keep attributes
+
+    out["elevation"] = xr.DataArray(
+        data=elevation,
+        dims=["lon", "lat"],
+        attrs={"units": "m", "standard_name": "altitude"},
+    )
+
+    return out
+
+
 @pytest.fixture(scope="session")
 def threadsafe_data_dir(tmp_path_factory) -> Path:
     return Path(tmp_path_factory.getbasetemp().joinpath("data"))
diff --git a/tests/test_external_dataset_access.py b/tests/test_external_dataset_access.py
index bc7f96a3..c55a1909 100644
--- a/tests/test_external_dataset_access.py
+++ b/tests/test_external_dataset_access.py
@@ -59,7 +59,7 @@ def test_open_dataset(
         cache_dir = tmp_path / "yangtze_cache"
         ds = open_dataset(
             name="raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc",
-            yangtze_kwargs={
+            _yangtze_kwargs={
                 "branch": self.branch,
                 "cache_dir": cache_dir,
                 "force_download": True,

From 55f0e9e41d63c535378e814cd41f1cff6727fd92 Mon Sep 17 00:00:00 2001
From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com>
Date: Mon, 23 Jun 2025 13:16:19 -0400
Subject: [PATCH 29/29] use get_file

---
 CHANGELOG.rst                                 |  2 +-
 ...ct_geographical_watershed_properties.ipynb |  4 +--
 .../03_Extracting_forcing_data.ipynb          |  4 +--
 .../04_Emulating_hydrological_models.ipynb    |  4 +--
 .../05_Advanced_RavenPy_configuration.ipynb   |  6 ++--
 docs/notebooks/06_Raven_calibration.ipynb     |  6 ++--
 .../07_Making_and_using_hotstart_files.ipynb  |  4 +--
 ...tting_and_bias_correcting_CMIP6_data.ipynb |  3 +-
 ...drological_impacts_of_climate_change.ipynb |  4 +--
 docs/notebooks/10_Data_assimilation.ipynb     |  4 +--
 .../11_Climatological_ESP_forecasting.ipynb   |  6 ++--
 ...2_Performing_hindcasting_experiments.ipynb |  4 +--
 .../Assess_probabilistic_flood_risk.ipynb     |  4 +--
 ...omparing_hindcasts_and_ESP_forecasts.ipynb |  6 ++--
 .../Distributed_hydrological_modelling.ipynb  |  4 +--
 .../Hydrological_realtime_forecasting.ipynb   |  6 ++--
 docs/notebooks/Perform_Regionalization.ipynb  |  6 ++--
 .../Running_HMETS_with_CANOPEX_dataset.ipynb  |  4 +--
 docs/notebooks/Sensitivity_analysis.ipynb     |  4 +--
 docs/notebooks/time_series_analysis.ipynb     |  4 +--
 src/ravenpy/testing/utils.py                  | 29 ++++++++++++++++++-
 21 files changed, 54 insertions(+), 64 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index c33924a5..370f4aa0 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -14,7 +14,7 @@ Breaking changes
 ^^^^^^^^^^^^^^^^
 * `ravenpy` now requires `pooch>=1.8.0` for downloading and caching remote testing data. (PR #513)
 * `ravenpy.utilities.testdata` has been refactored to new module `ravenpy.testing`. The `publish_release_notes` function is now located in `ravenpy.utilities.publishing`. (PR #513)
-* The `ravenpy.testing.utils` module now provides a `yangtze()` class for fetching and caching the `raven-testdata` testing data. This replaces the previous `get_local_testdata` and `get_file` functions. (PR #513)
+* The `ravenpy.testing.utils` module now provides a `yangtze()` class for fetching and caching the `raven-testdata` testing data. A convenience function (`get_file`) replaces the previous `get_local_testdata`. (PR #513)
 * The `ravenpy.testing.utils.open_dataset` function no longer supports OPeNDAP URLs or local file paths. Instead, it uses the `yangtze()` class to fetch datasets from the testing data repository or the local cache. Users should now use `xarray.open_dataset()` directly for OPeNDAP URLs or local files. (PR #513)
 
 Bug fixes
diff --git a/docs/notebooks/02_Extract_geographical_watershed_properties.ipynb b/docs/notebooks/02_Extract_geographical_watershed_properties.ipynb
index 8fb13cb3..9d02b8c2 100644
--- a/docs/notebooks/02_Extract_geographical_watershed_properties.ipynb
+++ b/docs/notebooks/02_Extract_geographical_watershed_properties.ipynb
@@ -40,9 +40,7 @@
     "from birdy import WPSClient\n",
     "\n",
     "# Utility that simplifies working with test data hosted on GitHub\n",
-    "from ravenpy.testing.utils import yangtze\n",
-    "\n",
-    "get_file = yangtze().fetch\n",
+    "from ravenpy.testing.utils import get_file\n",
     "\n",
     "# This is the URL of the Geoserver that will perform the computations for us.\n",
     "url = os.environ.get(\n",
diff --git a/docs/notebooks/03_Extracting_forcing_data.ipynb b/docs/notebooks/03_Extracting_forcing_data.ipynb
index f834d25c..6aad729f 100644
--- a/docs/notebooks/03_Extracting_forcing_data.ipynb
+++ b/docs/notebooks/03_Extracting_forcing_data.ipynb
@@ -32,9 +32,7 @@
     "from clisops.core import subset\n",
     "\n",
     "# Utility that simplifies working with test data hosted on GitHub\n",
-    "from ravenpy.testing.utils import yangtze\n",
-    "\n",
-    "get_file = yangtze().fetch"
+    "from ravenpy.testing.utils import get_file"
    ]
   },
   {
diff --git a/docs/notebooks/04_Emulating_hydrological_models.ipynb b/docs/notebooks/04_Emulating_hydrological_models.ipynb
index d935ab25..8de8c567 100644
--- a/docs/notebooks/04_Emulating_hydrological_models.ipynb
+++ b/docs/notebooks/04_Emulating_hydrological_models.ipynb
@@ -45,9 +45,7 @@
     "from ravenpy.config import commands as rc\n",
     "\n",
     "# Utility that simplifies fetching and caching test data hosted on GitHub\n",
-    "from ravenpy.testing.utils import yangtze\n",
-    "\n",
-    "get_file = yangtze().fetch"
+    "from ravenpy.testing.utils import get_file"
    ]
   },
   {
diff --git a/docs/notebooks/05_Advanced_RavenPy_configuration.ipynb b/docs/notebooks/05_Advanced_RavenPy_configuration.ipynb
index 28a552f1..7c388539 100644
--- a/docs/notebooks/05_Advanced_RavenPy_configuration.ipynb
+++ b/docs/notebooks/05_Advanced_RavenPy_configuration.ipynb
@@ -27,9 +27,7 @@
    "outputs": [],
    "source": [
     "# Utility that simplifies fetching and caching data hosted on GitHub\n",
-    "from ravenpy.testing.utils import yangtze\n",
-    "\n",
-    "get_file = yangtze()"
+    "from ravenpy.testing.utils import get_file"
    ]
   },
   {
@@ -51,7 +49,7 @@
     "# to make the calibration possible in the next notebook. Note that these configuration files also include links to the\n",
     "# required hydrometeorological database (NetCDF file).\n",
     "config = [\n",
-    "    yangtze.fetch(f\"raven-gr4j-cemaneige/raven-gr4j-salmon.{ext}\")\n",
+    "    get_file(f\"raven-gr4j-cemaneige/raven-gr4j-salmon.{ext}\")\n",
     "    for ext in [\"rvt\", \"rvc\", \"rvi\", \"rvh\", \"rvp\"]\n",
     "]\n",
     "config"
diff --git a/docs/notebooks/06_Raven_calibration.ipynb b/docs/notebooks/06_Raven_calibration.ipynb
index 75414499..3a24a51a 100644
--- a/docs/notebooks/06_Raven_calibration.ipynb
+++ b/docs/notebooks/06_Raven_calibration.ipynb
@@ -35,10 +35,8 @@
     "from ravenpy.config import emulators\n",
     "\n",
     "# Utility that simplifies working with test data hosted on GitHub\n",
-    "from ravenpy.testing.utils import yangtze\n",
-    "from ravenpy.utilities.calibration import SpotSetup\n",
-    "\n",
-    "get_file = yangtze().fetch"
+    "from ravenpy.testing.utils import get_file\n",
+    "from ravenpy.utilities.calibration import SpotSetup"
    ]
   },
   {
diff --git a/docs/notebooks/07_Making_and_using_hotstart_files.ipynb b/docs/notebooks/07_Making_and_using_hotstart_files.ipynb
index 9f54d69c..fb0a6a04 100644
--- a/docs/notebooks/07_Making_and_using_hotstart_files.ipynb
+++ b/docs/notebooks/07_Making_and_using_hotstart_files.ipynb
@@ -43,9 +43,7 @@
     "from ravenpy.config import emulators\n",
     "\n",
     "# Utility that simplifies working with test data hosted on GitHub\n",
-    "from ravenpy.testing.utils import yangtze\n",
-    "\n",
-    "get_file = yangtze().fetch"
+    "from ravenpy.testing.utils import get_file"
    ]
   },
   {
diff --git a/docs/notebooks/08_Getting_and_bias_correcting_CMIP6_data.ipynb b/docs/notebooks/08_Getting_and_bias_correcting_CMIP6_data.ipynb
index a064e0df..00c57f69 100644
--- a/docs/notebooks/08_Getting_and_bias_correcting_CMIP6_data.ipynb
+++ b/docs/notebooks/08_Getting_and_bias_correcting_CMIP6_data.ipynb
@@ -41,9 +41,8 @@
     "from numba.core.errors import NumbaDeprecationWarning\n",
     "\n",
     "# Utility that simplifies working with test data hosted on GitHub\n",
-    "from ravenpy.testing.utils import yangtze\n",
+    "from ravenpy.testing.utils import get_file\n",
     "\n",
-    "get_file = yangtze().fetch\n",
     "tmp = Path(tempfile.mkdtemp())\n",
     "\n",
     "warnings.simplefilter(\"ignore\", category=NumbaDeprecationWarning)"
diff --git a/docs/notebooks/09_Hydrological_impacts_of_climate_change.ipynb b/docs/notebooks/09_Hydrological_impacts_of_climate_change.ipynb
index ca55a9f2..8cdd93fa 100644
--- a/docs/notebooks/09_Hydrological_impacts_of_climate_change.ipynb
+++ b/docs/notebooks/09_Hydrological_impacts_of_climate_change.ipynb
@@ -35,9 +35,7 @@
     "from ravenpy.config import emulators\n",
     "\n",
     "# Utility that simplifies working with test data hosted on GitHub\n",
-    "from ravenpy.testing.utils import yangtze\n",
-    "\n",
-    "get_file = yangtze().fetch\n",
+    "from ravenpy.testing.utils import get_file\n",
     "\n",
     "warnings.filterwarnings(\"ignore\")"
    ]
diff --git a/docs/notebooks/10_Data_assimilation.ipynb b/docs/notebooks/10_Data_assimilation.ipynb
index 1dea9c17..1cde3cc8 100644
--- a/docs/notebooks/10_Data_assimilation.ipynb
+++ b/docs/notebooks/10_Data_assimilation.ipynb
@@ -40,9 +40,7 @@
     "from ravenpy.config import options as o\n",
     "\n",
     "# Utility that simplifies working with test data hosted on GitHub\n",
-    "from ravenpy.testing.utils import yangtze\n",
-    "\n",
-    "get_file = yangtze().fetch\n",
+    "from ravenpy.testing.utils import get_file\n",
     "\n",
     "warnings.simplefilter(\"ignore\", category=NumbaDeprecationWarning)"
    ]
diff --git a/docs/notebooks/11_Climatological_ESP_forecasting.ipynb b/docs/notebooks/11_Climatological_ESP_forecasting.ipynb
index 20a17599..e5df4ca7 100644
--- a/docs/notebooks/11_Climatological_ESP_forecasting.ipynb
+++ b/docs/notebooks/11_Climatological_ESP_forecasting.ipynb
@@ -32,10 +32,8 @@
     "from ravenpy.config import emulators\n",
     "\n",
     "# Utility that simplifies working with test data hosted on GitHub\n",
-    "from ravenpy.testing.utils import yangtze\n",
-    "from ravenpy.utilities import forecasting\n",
-    "\n",
-    "get_file = yangtze().fetch"
+    "from ravenpy.testing.utils import get_file\n",
+    "from ravenpy.utilities import forecasting"
    ]
   },
   {
diff --git a/docs/notebooks/12_Performing_hindcasting_experiments.ipynb b/docs/notebooks/12_Performing_hindcasting_experiments.ipynb
index 6370a45e..93ec4c5d 100644
--- a/docs/notebooks/12_Performing_hindcasting_experiments.ipynb
+++ b/docs/notebooks/12_Performing_hindcasting_experiments.ipynb
@@ -34,11 +34,9 @@
     "from ravenpy.extractors.forecasts import get_CASPAR_dataset\n",
     "\n",
     "# Utility that simplifies working with test data hosted on GitHub\n",
-    "from ravenpy.testing.utils import yangtze\n",
+    "from ravenpy.testing.utils import get_file\n",
     "from ravenpy.utilities import forecasting\n",
     "\n",
-    "get_file = yangtze().fetch\n",
-    "\n",
     "tmp = Path(tempfile.mkdtemp())"
    ]
   },
diff --git a/docs/notebooks/Assess_probabilistic_flood_risk.ipynb b/docs/notebooks/Assess_probabilistic_flood_risk.ipynb
index f98e2b82..d077dbcb 100644
--- a/docs/notebooks/Assess_probabilistic_flood_risk.ipynb
+++ b/docs/notebooks/Assess_probabilistic_flood_risk.ipynb
@@ -27,9 +27,7 @@
     "from numba.core.errors import NumbaDeprecationWarning\n",
     "\n",
     "# Utility that simplifies working with test data hosted on GitHub\n",
-    "from ravenpy.testing.utils import yangtze\n",
-    "\n",
-    "get_file = yangtze().fetch\n",
+    "from ravenpy.testing.utils import get_file\n",
     "\n",
     "warnings.simplefilter(\"ignore\", category=NumbaDeprecationWarning)"
    ]
diff --git a/docs/notebooks/Comparing_hindcasts_and_ESP_forecasts.ipynb b/docs/notebooks/Comparing_hindcasts_and_ESP_forecasts.ipynb
index 7d9d9249..5b59ba04 100644
--- a/docs/notebooks/Comparing_hindcasts_and_ESP_forecasts.ipynb
+++ b/docs/notebooks/Comparing_hindcasts_and_ESP_forecasts.ipynb
@@ -33,10 +33,8 @@
     "from ravenpy.extractors.forecasts import get_CASPAR_dataset\n",
     "\n",
     "# Utility that simplifies working with test data hosted on GitHub\n",
-    "from ravenpy.testing.utils import yangtze\n",
-    "from ravenpy.utilities import forecasting\n",
-    "\n",
-    "get_file = yangtze().fetch"
+    "from ravenpy.testing.utils import get_file\n",
+    "from ravenpy.utilities import forecasting"
    ]
   },
   {
diff --git a/docs/notebooks/Distributed_hydrological_modelling.ipynb b/docs/notebooks/Distributed_hydrological_modelling.ipynb
index bc38c852..9be90e1c 100644
--- a/docs/notebooks/Distributed_hydrological_modelling.ipynb
+++ b/docs/notebooks/Distributed_hydrological_modelling.ipynb
@@ -34,9 +34,7 @@
     ")\n",
     "\n",
     "# Utility that simplifies working with test data hosted on GitHub\n",
-    "from ravenpy.testing.utils import yangtze\n",
-    "\n",
-    "get_file = yangtze().fetch\n",
+    "from ravenpy.testing.utils import get_file\n",
     "\n",
     "tmp_path = Path(tempfile.mkdtemp())"
    ]
diff --git a/docs/notebooks/Hydrological_realtime_forecasting.ipynb b/docs/notebooks/Hydrological_realtime_forecasting.ipynb
index 8b148ce6..cd7fedeb 100644
--- a/docs/notebooks/Hydrological_realtime_forecasting.ipynb
+++ b/docs/notebooks/Hydrological_realtime_forecasting.ipynb
@@ -31,10 +31,8 @@
     "from ravenpy.extractors.forecasts import get_recent_ECCC_forecast\n",
     "\n",
     "# Utility that simplifies working with test data hosted on GitHub\n",
-    "from ravenpy.testing.utils import yangtze\n",
-    "from ravenpy.utilities import forecasting\n",
-    "\n",
-    "get_file = yangtze().fetch"
+    "from ravenpy.testing.utils import get_file\n",
+    "from ravenpy.utilities import forecasting"
    ]
   },
   {
diff --git a/docs/notebooks/Perform_Regionalization.ipynb b/docs/notebooks/Perform_Regionalization.ipynb
index 6ac40d05..69b6fe64 100644
--- a/docs/notebooks/Perform_Regionalization.ipynb
+++ b/docs/notebooks/Perform_Regionalization.ipynb
@@ -27,14 +27,12 @@
     "from ravenpy.config import emulators\n",
     "\n",
     "# Utility that simplifies working with test data hosted on GitHub\n",
-    "from ravenpy.testing.utils import yangtze\n",
+    "from ravenpy.testing.utils import get_file\n",
     "from ravenpy.utilities.regionalization import (\n",
     "    read_gauged_params,\n",
     "    read_gauged_properties,\n",
     "    regionalize,\n",
-    ")\n",
-    "\n",
-    "get_file = yangtze().fetch"
+    ")"
    ]
   },
   {
diff --git a/docs/notebooks/Running_HMETS_with_CANOPEX_dataset.ipynb b/docs/notebooks/Running_HMETS_with_CANOPEX_dataset.ipynb
index 6e20d989..f3ffb6ff 100644
--- a/docs/notebooks/Running_HMETS_with_CANOPEX_dataset.ipynb
+++ b/docs/notebooks/Running_HMETS_with_CANOPEX_dataset.ipynb
@@ -29,11 +29,9 @@
     "from ravenpy.config import emulators\n",
     "\n",
     "# Utility that simplifies working with test data hosted on GitHub\n",
-    "from ravenpy.testing.utils import yangtze\n",
+    "from ravenpy.testing.utils import get_file\n",
     "from ravenpy.utilities.calibration import SpotSetup\n",
     "\n",
-    "get_file = yangtze().fetch\n",
-    "\n",
     "# Make a temporary folder\n",
     "tmp = Path(tempfile.mkdtemp())\n",
     "\n",
diff --git a/docs/notebooks/Sensitivity_analysis.ipynb b/docs/notebooks/Sensitivity_analysis.ipynb
index c7ed8421..8113317b 100644
--- a/docs/notebooks/Sensitivity_analysis.ipynb
+++ b/docs/notebooks/Sensitivity_analysis.ipynb
@@ -41,9 +41,7 @@
     "from ravenpy.config import emulators\n",
     "\n",
     "# Utility that simplifies working with test data hosted on GitHub\n",
-    "from ravenpy.testing.utils import yangtze\n",
-    "\n",
-    "get_file = yangtze().fetch"
+    "from ravenpy.testing.utils import get_file"
    ]
   },
   {
diff --git a/docs/notebooks/time_series_analysis.ipynb b/docs/notebooks/time_series_analysis.ipynb
index d002b8c9..f54eb4c2 100644
--- a/docs/notebooks/time_series_analysis.ipynb
+++ b/docs/notebooks/time_series_analysis.ipynb
@@ -21,9 +21,7 @@
     "from pandas.plotting import register_matplotlib_converters\n",
     "\n",
     "# Utility that simplifies working with test data hosted on GitHub\n",
-    "from ravenpy.testing.utils import yangtze\n",
-    "\n",
-    "get_file = yangtze().fetch\n",
+    "from ravenpy.testing.utils import get_file\n",
     "\n",
     "register_matplotlib_converters()"
    ]
diff --git a/src/ravenpy/testing/utils.py b/src/ravenpy/testing/utils.py
index 13f8c557..405683b1 100644
--- a/src/ravenpy/testing/utils.py
+++ b/src/ravenpy/testing/utils.py
@@ -49,6 +49,7 @@
     "default_testdata_repo_url",
     "default_testdata_version",
     "gather_testing_data",
+    "get_file",
     "open_dataset",
     "populate_testing_data",
     "show_versions",
@@ -390,6 +391,32 @@ def _downloader(
     return _yangtze
 
 
+def get_file(
+    name: str,
+    _yangtze_kwargs: dict[str, Path | str | bool] | None = None,
+):
+    r"""
+    Convenience function to get a file from the RavenPy testing data using the `yangtze` class.
+
+    This is a thin wrapper around the `yangtze` class to make it easier to access RavenPy testing datasets.
+
+    Parameters
+    ----------
+    name : str
+        Name of the file containing the dataset.
+    _yangtze_kwargs : dict
+        Keyword arguments passed to the yangtze function.
+
+    Returns
+    -------
+    str
+        The path to the file.
+    """
+    if _yangtze_kwargs is None:
+        _yangtze_kwargs = {}
+    return yangtze(**_yangtze_kwargs).fetch(name)
+
+
 def open_dataset(
     name: str,
     _yangtze_kwargs: dict[str, Path | str | bool] | None = None,
@@ -420,7 +447,7 @@ def open_dataset(
     yangtze : Pooch wrapper for accessing the RavenPy testing data.
     """
     if _yangtze_kwargs is None:
-        yangtze_kwargs = {}
+        _yangtze_kwargs = {}
     return _open_dataset(yangtze(**_yangtze_kwargs).fetch(name), **xr_kwargs)