NOAA-GFDL · FlorianDeconinck · Jul 2, 2025 · Jul 2, 2025 · Jul 2, 2025 · oelbert
diff --git a/ndsl/__init__.py b/ndsl/__init__.py
@@ -1,4 +1,5 @@
 from . import dsl  # isort:skip
+from .logging import ndsl_log  # isort:skip
 from .comm.communicator import CubedSphereCommunicator, TileCommunicator
 from .comm.local_comm import LocalComm
 from .comm.mpi import MPIComm
@@ -22,7 +23,6 @@
 from .halo.updater import HaloUpdater, HaloUpdateRequest, VectorInterfaceHaloUpdater
 from .initialization.allocator import QuantityFactory
 from .initialization.sizer import GridSizer, SubtileGridSizer
-from .logging import ndsl_log
 from .monitor.netcdf_monitor import NetCDFMonitor
 from .namelist import Namelist
 from .performance.collector import NullPerformanceCollector, PerformanceCollector

diff --git a/ndsl/debug/debugger.py b/ndsl/debug/debugger.py
@@ -8,6 +8,7 @@
 
 from ndsl.logging import ndsl_log
 from ndsl.quantity import Quantity
+from ndsl.quantity.field_bundle import FieldBundle
 
 
 @dataclasses.dataclass
@@ -86,6 +87,8 @@ def save_as_dataset(self, data_as_dict, savename, is_in) -> None:
                     data_arrays[f"{name}.{field.name}"] = self._to_xarray(
                         getattr(data, field.name), field.name
                     )
+            elif isinstance(data, FieldBundle):
+                data_arrays[name] = data.quantity.field_as_xarray
             else:
                 data_arrays[name] = self._to_xarray(data, name)
 

diff --git a/ndsl/quantity/quantity.py b/ndsl/quantity/quantity.py
@@ -162,6 +162,8 @@ def from_data_array(
 
     def to_netcdf(self, path: str, name="var", rank: int = -1, all_data=False) -> None:
         if rank < 0 or MPI.COMM_WORLD.Get_rank() == rank:
+            if rank < 0:
+                rank = MPI.COMM_WORLD.Get_rank()
             if all_data:
                 self.data_as_xarray.to_dataset(name=name).to_netcdf(
                     f"{path}__r{rank}.nc4"

diff --git a/ndsl/stencils/testing/best_guess_diff.py b/ndsl/stencils/testing/best_guess_diff.py
@@ -0,0 +1,194 @@
+import argparse
+import pathlib
+
+import numpy as np
+import xarray as xr
+import yaml
+
+
+def get_parser():
+    parser = argparse.ArgumentParser(
+        "Attempt to diff two NetCDFs with similar data."
+        "Differences that can be reconcialed are strict domain vs halo, variable name mapping."
-        "Differences that can be reconcialed are strict domain vs halo, variable name mapping."
+        "Differences that can be reconciled are strict domain vs halo, variable name mapping."
-        "Differences that can be reconcialed are strict domain vs halo, variable name mapping."
+        "Differences that can be reconciled are strict domain vs halo, variable name mapping."
+        "They program will report on assumptions taken."
-        "They program will report on assumptions taken."
+        "The program will report on assumptions taken."
-        "They program will report on assumptions taken."
+        "The program will report on assumptions taken."
+    )
+    parser.add_argument(
+        "netcdf_A",
+        type=str,
+        help="path of NetCDFs, named A in the logs.",
-        help="path of NetCDFs, named A in the logs.",
+        help="path of first NetCDF file, named A in the logs.",
-        help="path of NetCDFs, named A in the logs.",
+        help="path of first NetCDF file, named A in the logs.",
+    )
+    parser.add_argument(
+        "netcdf_B",
+        type=str,
+        help="path of NetCDFs, named B in the logs.",
-        help="path of NetCDFs, named B in the logs.",
+        help="path of second NetCDF file, named B in the logs.",
-        help="path of NetCDFs, named B in the logs.",
+        help="path of second NetCDF file, named B in the logs.",
+    )
+    parser.add_argument(
+        "-nm",
+        "--name_mapping",
+        type=str,
+        help="[Optional] Yaml file describing the mapping of the names.",
+    )
+    parser.add_argument(
+        "-ha",
+        "--halo",
+        type=int,
+        default=3,
+        help="[Optional] Halo size if any, default to 3.",
+    )
+    return parser
+
+
+def main(
+    netcdf_A: str,
+    netcdf_B: str,
+    name_mapping: str | None = None,
+    halo: int = 3,
+):
+    A = xr.open_dataset(netcdf_A)
+    B = xr.open_dataset(netcdf_B)
+    name_map = {}
+    if name_mapping is not None:
+        with open(name_mapping) as f:
+            name_map = yaml.safe_load(f)
+
+    dataset = {}
+    for name_A, data_A in A.items():
+        print(f"Best guess for {name_A} from A:")
+        # Resolve name
+        resolved_name_B = None
+        if name_A not in B.keys():
+            if name_A not in name_map.keys():
+                print("  [Failed] name can't be found in B nor in name mapping")
+            else:
+                resolved_name_B = name_map[name_A]
+        else:
+            resolved_name_B = name_A
+
+        if resolved_name_B is None:
+            continue
+        print(f"  [Hyp] use {resolved_name_B} for B")
+
+        # Resolve domain size
+        data_B = B[resolved_name_B]
+        if len(data_A.shape) >= 5:
+            print(
+                "  [Hyp] A data dims are >= 5, assuming savepoints/rank are the firt two and going A[0, 0, ::]"
+            )
+            data_A = data_A[0, 0, ::]
+        if len(data_B.shape) >= 5:
+            print(
+                "  [Hyp] B data dims are >= 5, assuming savepoints/rank are the firt two and going B[0, 0, ::]"
+            )
+            data_B = data_B[0, 0, ::]
+
+        if len(data_A.shape) != len(data_B.shape):
+            print(
+                f"  [Failed] A is shape {len(data_A.shape)}, B is shape {len(data_B.shape)}: can't reconcile."
+            )
+            continue
+
+        # - Assume we have 0 == I, 1 == J now
+        resolved_I = None
+        A_uses_halo = False
+        B_uses_halo = False
+        if data_A.shape[0] != data_B.shape[0]:
+            if data_A.shape[0] < data_B.shape[0]:
+                if data_B.shape[0] - 2 * halo != data_A.shape[0]:
+                    print(
+                        f"  [Failed] B in dim I is too big, even with halo substracted {data_B.shape[0]} (halo: {halo})"
+                    )
+                else:
+                    B_uses_halo = True
+                    resolved_I = data_A.shape[0]
+            else:
+                if data_A.shape[0] - 2 * halo != data_B.shape[0]:
+                    print(
+                        f"  [Failed] A in dim I is too big, even with halo substracted {data_A.shape[0]} (halo: {halo})"
+                    )
+                else:
+                    A_uses_halo = True
+                    resolved_I = data_B.shape[0]
+        else:
+            resolved_I = data_A.shape[0]
+
+        if resolved_I is None:
+            continue
+
+        print(f"  [Hyp] Using {resolved_I} as I dim size")
+
+        resolved_J = None
+        if data_A.shape[1] != data_B.shape[1]:
+            if data_A.shape[1] < data_B.shape[1]:
+                if data_B.shape[1] - 2 * halo != data_A.shape[1]:
+                    print(
+                        f"  [Failed] B in dim J is too big, even with halo substracted {data_B.shape[1]} (halo: {halo})"
+                    )
+                else:
+                    resolved_J = data_A.shape[1]
+            else:
+                if data_A.shape[1] - 2 * halo != data_B.shape[1]:
+                    print(
+                        f"  [Failed] A in dim J is too big, even with halo substracted {data_A.shape[1]} (halo: {halo})"
+                    )
+                else:
+                    resolved_J = data_B.shape[1]
+        else:
+            resolved_J = data_A.shape[1]
+
+        if resolved_J is None:
+            continue
+
+        print(f"  [Hyp] Using {resolved_J} as J dim size")
+
+        # - Assume 2 == K
+        if data_A.shape[2] != data_B.shape[2]:
+            print(
+                f"  [Failed] Can't reconcile K dim: A ({data_A.shape[2]}) != B ({data_B.shape[2]})"
+            )
+            continue
+        resolved_K = data_A.shape[2]
+
+        print(f"  [Hyp] Using {resolved_K} as K dim size")
+
+        # We should now be ready to diff
+        if A_uses_halo:
+            data_A = data_A[halo:-halo, halo:-halo, ::]
+        if B_uses_halo:
+            data_B = data_B[halo:-halo, halo:-halo, ::]
+
+        dims = [f"D{i}_{s}" for i, s in enumerate(data_A.shape)]
+        absolute_diff = data_A.data - data_B.data
+        dataset[name_A] = xr.DataArray(
+            absolute_diff,
+            dims=dims,
+        )
+
+        # ULP diffs
+        max_values = np.maximum(
+            np.absolute(data_A.data.flatten()), np.absolute(data_B.data.flatten())
+        )
+        ulp_diff = np.divide(np.abs(absolute_diff.flatten()), np.spacing(max_values))
+        ulp_diff = np.sort(ulp_diff)
+        dataset[f"ulp_{name_A}"] = xr.DataArray(
+            ulp_diff,
+            dims=[f"GP_{ulp_diff.shape[0]}"],
+        )
+
+        print("  [Success]")
+
+    xr.Dataset(dataset).to_netcdf(f"best_guest_diff_{pathlib.Path(netcdf_A).stem}.nc4")
+
+
+def entry_point():
+    parser = get_parser()
+    args = parser.parse_args()
+    main(
+        netcdf_A=args.netcdf_A,
+        netcdf_B=args.netcdf_B,
+        name_mapping=args.name_mapping,
+        halo=args.halo,
+    )
+
+
+if __name__ == "__main__":
+    entry_point()
diff --git a/setup.py b/setup.py
@@ -65,6 +65,7 @@ def local_pkg(name: str, relative_path: str) -> str:
     entry_points={
         "console_scripts": [
             "ndsl-serialbox_to_netcdf = ndsl.stencils.testing.serialbox_to_netcdf:entry_point",
+            "best_guess_diff = ndsl.stencils.testing.best_guess_diff:entry_point",
         ]
     },
 )