Generating physically-consistent high-resolution climate data with hard-constrained neural networks (#137)

aniket2405 · pre-commit-ci[bot] · web-flow · commit ff43cc1f22a1 · 2025-02-24T07:23:15.000Z
* Generating physically-consistent high-resolution climate data with hard-constrained neural networks * Generating physically-consistent high-resolution climate data with hard-constrained neural networks * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Modify constraint layer and update UTs to comply with constraints Add functional logic for additive, multiplicative, and softmax physical constraints * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Use einops for tensor manipulation, update constraint set-up configuration and add UTs for each constraint type * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Minor fix due to rebase * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add default constraint as none and fix NormalizedLoss function when there are no constraints applied for forecasting * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Rearrange graph to grid only when constraints are to be applied * Fix test_forecaster_and_loss_irregular after rebase * Resolve ruff errors regarding docstrings * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
diff --git a/graph_weather/models/forecast.py b/graph_weather/models/forecast.py
@@ -3,13 +3,15 @@
 from typing import Optional
 
 import torch
+from einops import rearrange, repeat
 from huggingface_hub import PyTorchModelHubMixin
 
 from graph_weather.models import Decoder, Encoder, Processor
+from graph_weather.models.layers.constraint_layer import PhysicalConstraintLayer
 
 
 class GraphWeatherForecaster(torch.nn.Module, PyTorchModelHubMixin):
-    """Main weather prediction model from the paper"""
+    """Main weather prediction model from the paper with physical constraints"""
 
     def __init__(
         self,
@@ -29,6 +31,7 @@ def __init__(
         hidden_layers_decoder: int = 2,
         norm_type: str = "LayerNorm",
         use_checkpointing: bool = False,
+        constraint_type: str = "none",
     ):
         """
         Graph Weather Model based off https://arxiv.org/pdf/2202.07575.pdf
@@ -53,11 +56,24 @@ def __init__(
             norm_type: Type of norm for the MLPs
                 one of 'LayerNorm', 'GraphNorm', 'InstanceNorm', 'BatchNorm', 'MessageNorm', or None
             use_checkpointing: Use gradient checkpointing to reduce model memory
+            constraint_type: Type of constraint to apply for physical constraints
+                one of 'additive', 'multiplicative', 'softmax', or 'none'
         """
         super().__init__()
         self.feature_dim = feature_dim
+        self.constraint_type = constraint_type
         if output_dim is None:
             output_dim = self.feature_dim
+        self.output_dim = output_dim
+
+        # Compute the geographical grid shape from lat_lons.
+        unique_lats = sorted(set(lat for lat, _ in lat_lons))
+        unique_lons = sorted(set(lon for _, lon in lat_lons))
+        self.grid_shape = (len(unique_lats), len(unique_lons))  # (H, W)
+
+        # Store original node order and create grid mapping
+        self.original_lat_lons = lat_lons.copy()
+        self._create_grid_mapping(unique_lats, unique_lons)
 
         self.encoder = Encoder(
             lat_lons=lat_lons,
@@ -98,6 +114,51 @@ def __init__(
             use_checkpointing=use_checkpointing,
         )
 
+        # Add physical constraint layer if constraint_type is not "none"
+        if self.constraint_type != "none":
+            self.constraint = PhysicalConstraintLayer(
+                model=self,
+                grid_shape=self.grid_shape,
+                constraint_type=constraint_type,
+                upsampling_factor=1,
+            )
+
+    def _create_grid_mapping(self, unique_lats, unique_lons):
+        """Create (row,col) mapping for original node order"""
+        self.node_to_grid = []
+        for lat, lon in self.original_lat_lons:
+            row = int(
+                (lat - min(unique_lats))
+                / (max(unique_lats) - min(unique_lats))
+                * (len(unique_lats) - 1)
+            )
+            col = int(
+                (lon - min(unique_lons))
+                / (max(unique_lons) - min(unique_lons))
+                * (len(unique_lons) - 1)
+            )
+            self.node_to_grid.append((row, col))
+
+    def graph_to_grid(self, graph_tensor):
+        """
+
+        Convert graph tensor to grid using spatial mapping:
+        [B, N, C] -> [B, C, H, W]
+        """
+        batch_size, num_nodes, features = graph_tensor.shape
+        grid = torch.zeros(batch_size, features, *self.grid_shape)
+        for node_idx, (row, col) in enumerate(self.node_to_grid):
+            grid[..., row, col] = graph_tensor[..., node_idx, :]
+        return grid
+
+    def grid_to_graph(self, grid_tensor):
+        """Convert grid to graph tensor: [B, C, H, W] -> [B, N, C]"""
+        batch_size, features, H, W = grid_tensor.shape
+        graph = torch.zeros(batch_size, H * W, features)
+        for node_idx, (row, col) in enumerate(self.node_to_grid):
+            graph[..., node_idx, :] = grid_tensor[..., row, col]
+        return graph
+
     def forward(self, features: torch.Tensor) -> torch.Tensor:
         """
         Compute the new state of the forecast
@@ -111,4 +172,22 @@ def forward(self, features: torch.Tensor) -> torch.Tensor:
         x, edge_idx, edge_attr = self.encoder(features)
         x = self.processor(x, edge_idx, edge_attr)
         x = self.decoder(x, features[..., : self.feature_dim])
+
+        # Here, assume decoder output x is a 4D tensor,
+        # e.g. [B, output_dim, H, W] where H and W are grid dimensions.
+        # Convert graph output to grid format
+
+        # Apply physical constraints to decoder output
+        if self.constraint_type != "none":
+            x = rearrange(x, "b (h w) c -> b c h w", h=self.grid_shape[0], w=self.grid_shape[1])
+            # Extract the low-res reference from the input.
+            # (Original features has shape [B, num_nodes, feature_dim])
+            lr = features[..., : self.feature_dim]  # shape: [B, num_nodes, feature_dim]
+            # Convert from node format to grid format using the grid_shape computed in __init__
+            # From [B, num_nodes, feature_dim] to [B, feature_dim, H, W]
+            lr = rearrange(lr, "b (h w) c -> b c h w", h=self.grid_shape[0], w=self.grid_shape[1])
+            if lr.size(1) != x.size(1):
+                repeat_factor = x.size(1) // lr.size(1)
+                lr = repeat(lr, "b c h w -> b (r c) h w", r=repeat_factor)
+            x = self.constraint(x, lr)
         return x
diff --git a/graph_weather/models/layers/constraint_layer.py b/graph_weather/models/layers/constraint_layer.py
@@ -0,0 +1,188 @@
+"""Module for physical constraint layers used in graph weather models.
+
+This module implements several constraints on a network’s intermediate outputs,
+ensuring physical consistency with an input at a lower resolution.
+
+"""
+
+import torch
+import torch.nn as nn
+
+
+class PhysicalConstraintLayer(nn.Module):
+    """
+
+    This module implements several constraint types on the network’s intermediate outputs ỹ,
+    given the corresponding low-resolution input x. The following equations are implemented
+    (with all operations acting per patch – here, a patch is the full grid of H×W pixels):
+
+    Additive constraint:
+      y = ỹ + x - avg(ỹ)
+
+    Multiplicative constraint:
+      y = ỹ * ( x / avg(ỹ) )
+
+    Softmax constraint:
+      y = exp(ỹ) * ( x / sum(exp(ỹ)) )
+
+    We assume that both the intermediate outputs and the low-resolution reference are 4D
+    tensors in grid format, with shape [B, C, H, W], where n = H*W is the number of pixels
+    (or nodes) in a patch.
+    """
+
+    def __init__(
+        self, model, grid_shape, upsampling_factor, constraint_type="none", exp_factor=1.0
+    ):
+        """Initialize the PhysicalConstraintLayer.
+
+        Args:
+            model (nn.Module): The model containing the helper methods
+                'graph_to_grid' and 'grid_to_graph'.
+            grid_shape (tuple): Expected spatial dimensions (H, W) of the
+                high-resolution grid.
+            upsampling_factor (int): Factor by which the low-resolution grid is upsampled.
+            constraint_type (str, optional): The constraint to apply. Options are
+                'additive', 'multiplicative', or 'softmax'. Defaults to "none".
+            exp_factor (float, optional): Exponent factor for the softmax constraint.
+                Defaults to 1.0.
+        """
+        super().__init__()
+        self.model = model
+        self.constraint_type = constraint_type
+        self.grid_shape = grid_shape
+        self.exp_factor = exp_factor
+        self.upsampling_factor = upsampling_factor
+        self.pool = nn.AvgPool2d(kernel_size=upsampling_factor)
+
+    def forward(self, hr_graph, lr_graph):
+        """Apply the selected physical constraint.
+
+        Processes the high-resolution output and low-resolution input by converting
+        between graph and grid formats as needed, and then applying the specified constraint.
+
+        Args:
+            hr_graph (torch.Tensor): High-resolution model output in either graph (3D)
+                or grid (4D) format.
+            lr_graph (torch.Tensor): Low-resolution input in the corresponding
+                graph or grid format.
+
+        Returns:
+            torch.Tensor: The adjusted output in graph format.
+        """
+        # Check if inputs are in graph (3D) or grid (4D) formats.
+        if hr_graph.dim() == 3:
+            # Convert graph format to grid format
+            hr_grid = self.model.graph_to_grid(hr_graph)
+            lr_grid = self.model.graph_to_grid(lr_graph)
+        elif hr_graph.dim() == 4:
+            # Already in grid format: [B, C, H, W]
+            _, _, H, W = hr_graph.shape
+            if (H, W) != self.grid_shape:
+                raise ValueError(f"Expected spatial dimensions {self.grid_shape}, got {(H, W)}")
+            hr_grid = hr_graph
+            lr_grid = lr_graph
+        else:
+            raise ValueError("Input tensor must be either 3D (graph) or 4D (grid).")
+
+        # Apply constraint based on type in grid format
+        if self.constraint_type == "additive":
+            result = self.additive_constraint(hr_grid, lr_grid)
+        elif self.constraint_type == "multiplicative":
+            result = self.multiplicative_constraint(hr_grid, lr_grid)
+        elif self.constraint_type == "softmax":
+            result = self.softmax_constraint(hr_grid, lr_grid)
+        else:
+            raise ValueError(f"Unknown constraint type: {self.constraint_type}")
+
+        # Convert grid back to graph format
+        return self.model.grid_to_graph(result)
+
+    def additive_constraint(self, hr, lr):
+        """Enforces local conservation using an additive correction:
+        y = ỹ + ( x - avg(ỹ) )
+        where avg(ỹ) is computed per patch (via an average-pooling layer).
+
+        For the additive constraint we follow the paper’s formulation using a Kronecker
+        product to expand the discrepancy between the low-resolution field and the
+        average of the high-resolution output.
+
+        hr: high-resolution tensor [B, C, H_hr, W_hr]
+        lr: low-resolution tensor [B, C, h_lr, w_lr]
+        (with H_hr = upsampling_factor * h_lr & W_hr = upsampling_factor * w_lr)
+        """
+        # Convert grids to graph format using model's mapping
+        hr_graph = self.model.grid_to_graph(hr)
+        lr_graph = self.model.grid_to_graph(lr)
+
+        # Apply constraint logic
+        # Compute average over NODES
+        avg_hr = hr_graph.mean(dim=1, keepdim=True)
+        diff = lr_graph - avg_hr
+
+        # Expand difference using spatial mapping
+        diff_expanded = diff.repeat(1, self.upsampling_factor**2, 1)
+
+        # Apply correction and convert back to GRID format
+        adjusted_graph = hr_graph + diff_expanded
+        return self.model.graph_to_grid(adjusted_graph)
+
+    def multiplicative_constraint(self, hr, lr):
+        """Enforce conservation using a multiplicative correction in graph space.
+
+        The correction is applied by scaling the high-resolution output by a ratio computed
+        from the low-resolution input and the average of the high-resolution output.
+
+        Args:
+            hr (torch.Tensor): High-resolution tensor in grid format [B, C, H_hr, W_hr].
+            lr (torch.Tensor): Low-resolution tensor in grid format [B, C, h_lr, w_lr].
+
+        Returns:
+            torch.Tensor: Adjusted high-resolution tensor in grid format.
+        """
+        # Convert grids to graph format using model's mapping
+        hr_graph = self.model.grid_to_graph(hr)
+        lr_graph = self.model.grid_to_graph(lr)
+
+        # Apply constraint logic
+        # Compute average over NODES
+        avg_hr = hr_graph.mean(dim=1, keepdim=True)
+        lr_patch_avg = lr_graph.mean(dim=1, keepdim=True)
+
+        # Compute ratio and expand to match HR graph structure
+        ratio = lr_patch_avg / (avg_hr + 1e-8)
+
+        # Apply multiplicative correction and convert back to GRID format
+        adjusted_graph = hr_graph * ratio
+        return self.model.graph_to_grid(adjusted_graph)
+
+    def softmax_constraint(self, y, lr):
+        """Apply a softmax-based constraint correction.
+
+        The softmax correction scales the exponentiated high-resolution output so that the
+        sum over spatial blocks matches the low-resolution reference.
+
+        Args:
+            y (torch.Tensor): High-resolution tensor in grid format [B, C, H, W].
+            lr (torch.Tensor): Low-resolution tensor in grid format [B, C, h, w].
+
+        Returns:
+            torch.Tensor: Adjusted high-resolution tensor in grid format after applying
+            the softmax constraint.
+        """
+        # Apply the exponential function
+        y = torch.exp(self.exp_factor * y)
+
+        # Pool over spatial blocks
+        kernel_area = self.upsampling_factor**2
+        sum_y = self.pool(y) * kernel_area
+
+        # Ensure that lr * (1/sum_y) is contiguous
+        ratio = (lr * (1 / sum_y)).contiguous()
+
+        # Use device of lr for kron expansion:
+        device = lr.device
+        expansion = torch.ones((self.upsampling_factor, self.upsampling_factor), device=device)
+
+        # Expand the low-resolution ratio and correct the y values so that the block sum matches lr.
+        out = y * torch.kron(ratio, expansion)
+        return out
diff --git a/graph_weather/models/losses.py b/graph_weather/models/losses.py
@@ -32,10 +32,12 @@ def __init__(
         super().__init__()
         self.feature_variance = torch.tensor(feature_variance)
         assert not torch.isnan(self.feature_variance).any()
-        weights = []
-        for lat, lon in lat_lons:
-            weights.append(np.cos(lat * np.pi / 180.0))
-        self.weights = torch.tensor(weights, dtype=torch.float)
+        # Compute unique latitudes from the provided lat/lon pairs.
+        unique_lats = sorted(set(lat for lat, _ in lat_lons))
+        # Use the cosine of each unique latitude (converted to radians) as its weight.
+        self.weights = torch.tensor(
+            [np.cos(lat * np.pi / 180.0) for lat in unique_lats], dtype=torch.float
+        )
         self.normalize = normalize
         assert not torch.isnan(self.weights).any()
 
@@ -67,8 +69,24 @@ def forward(self, pred: torch.Tensor, target: torch.Tensor):
         assert not torch.isnan(out).any()
         # Mean of the physical variables
         out = out.mean(-1)
-        print(out.shape)
-        # Weight by the latitude, as that changes, so does the size of the pixel
-        out = out * self.weights.expand_as(out)
+
+        # Flatten all dimensions except the batch dimension.
+        B, *dims = out.shape
+        num_nodes = np.prod(
+            dims
+        )  # Total number of grid nodes (e.g., if grid is HxW, then num_nodes = H*W)
+        out = out.view(B, num_nodes)
+
+        # Determine the number of unique latitude weights and infer the number of grid columns.
+        num_unique = self.weights.shape[0]  # e.g., number of unique latitudes (rows)
+        num_lon = num_nodes // num_unique  # e.g. if 2592 nodes and 36 unique lat, then num_lon=72
+
+        # Tile the unique latitude weights into a full weight grid
+        weight_grid = self.weights.unsqueeze(1).expand(num_unique, num_lon).reshape(1, num_nodes)
+        weight_grid = weight_grid.expand(B, num_nodes)  # Now weight_grid is [B, num_nodes]
+
+        # Multiply the per-node error by the corresponding weight.
+        out = out * weight_grid
+
         assert not torch.isnan(out).any()
         return out.mean()
diff --git a/tests/test_model.py b/tests/test_model.py