diff --git a/operator_learning/DarcyFlowGPU.py b/operator_learning/DarcyFlowGPU.py index d224d4d..bf01d8c 100644 --- a/operator_learning/DarcyFlowGPU.py +++ b/operator_learning/DarcyFlowGPU.py @@ -1,4 +1,14 @@ +#!/usr/bin/env python +# coding: utf-8 + # # Darcy Flow with GPU +# +# $-\nabla\cdot(a\nabla u)=1$ with 0 BC on domain $(0,1)^2$ +# +# +# G: log(a) -> u +# Assume log(a)~GP with Matern covariance + import torch import torch.nn as nn import math @@ -10,421 +20,426 @@ import pickle class MaternGP: - """ - Matern Gaussian Process - If dim=1, then returns samples of size 'size'. - If dim=2, then returns samples of size 'size x size' - """ - def __init__(self, size, dim=1, alpha=2, tau=3, sigma=None, device=None): - self.device = device - self.dim = dim - - if sigma is None: - sigma = tau**(0.5*(2*alpha - self.dim)) - - if self.dim == 1: - i_max = size - i = torch.arange(start=1, end=i_max+1, step=1, device=device) - self.basis_idx = i +""" +Matern Gaussian Process +If dim=1, then returns samples of size 'size'. +If dim=2, then returns samples of size 'size x size' +""" +def __init__(self, size, dim=1, alpha=2, tau=3, sigma=None, device=None): + self.device = device + self.dim = dim - self.sqrt_eig = sigma*(((math.pi**2)*(i**2) + tau**2)**(-alpha/2.0)) - - self.size = (size,) - - x = torch.linspace(0, 1, size, device=device) - self.grid = x - # Basis Functions - self.phi = torch.sin(math.pi * i[:, None] * x[None, :]) + if sigma is None: + sigma = tau**(0.5*(2*alpha - self.dim)) - if self.dim == 2: - i_max = size - j_max = size - i = torch.arange(start=1, end=i_max+1, step=1, device=device) - j = torch.arange(start=1, end=j_max+1, step=1, device=device) - I, J = torch.meshgrid(i,j,indexing='ij') + if self.dim == 1: + i_max = size + i = torch.arange(start=1, end=i_max+1, step=1, device=device) + self.basis_idx = i + + self.sqrt_eig = sigma*(((math.pi**2)*(i**2) + tau**2)**(-alpha/2.0)) - self.sqrt_eig = sigma*(((math.pi**2)*(I**2) + (math.pi**2)*(J**2) + tau**2)**(-alpha/2.0)) - - self.size = (size, size) - - x = torch.linspace(0, 1, size, device=device) - y = torch.linspace(0, 1, size, device=device) - self.grid = torch.meshgrid(x,y,indexing='xy') - - # Basis Functions - self.phi_x = torch.sin(math.pi * i[:, None] * x[None, :]) # (i_max, M) - self.phi_y = torch.sin(math.pi * j[:, None] * y[None, :]) # (j_max, M) + self.size = (size,) - def sample(self, N, z=None): - """ - Returns N samples from the Matern GP - """ - if z is None: - z = torch.randn(N,*self.size, device=self.device) - coeff = self.sqrt_eig*z # if dim==1, N x size. if dim==2, N x size x size + x = torch.linspace(0, 1, size, device=device) + self.grid = x + # Basis Functions + self.phi = torch.sin(math.pi * i[:, None] * x[None, :]) + + if self.dim == 2: + i_max = size + j_max = size + i = torch.arange(start=1, end=i_max+1, step=1, device=device) + j = torch.arange(start=1, end=j_max+1, step=1, device=device) + I, J = torch.meshgrid(i,j,indexing='ij') + + self.sqrt_eig = sigma*(((math.pi**2)*(I**2) + (math.pi**2)*(J**2) + tau**2)**(-alpha/2.0)) - if self.dim == 1: - u = math.sqrt(2) * (coeff @ self.phi) # N x size - - if self.dim == 2: - u = torch.einsum('nij,ik,jl->nkl', coeff, self.phi_x, self.phi_y) - u = 2 * u # (N, size, size) + self.size = (size, size) - return u - -class SolveDarcyFlow(nn.Module): - def __init__(self, M, device=None): - """ - Solves PDE -div(a*grad(u))=f with DBC on grid MxM including boundaries - """ - super().__init__() - self.M = M - self.h = 1/(M-1) - self.device = device - - def getAx(self, a): - """ - Build the diagonal matrix of horizontal harmonic means. - a: (M,M) - returns Ax: (M*(M-1), M*(M-1)) dense torch.Tensor - """ - # sums on each horizontal edge - Ax_den = a[:, :-1] + a[:, 1:] # (M, M−1) - Ax_matrix = torch.where( - Ax_den == 0, # if - torch.zeros_like(Ax_den), # then - 2 * a[:, :-1] * a[:, 1:] / Ax_den # else - ) - Ax_flat = Ax_matrix.reshape(-1) # (M*(M-1),) - return torch.diag(Ax_flat) # diagonal matrix + x = torch.linspace(0, 1, size, device=device) + y = torch.linspace(0, 1, size, device=device) + self.grid = torch.meshgrid(x,y,indexing='xy') + + # Basis Functions + self.phi_x = torch.sin(math.pi * i[:, None] * x[None, :]) # (i_max, M) + self.phi_y = torch.sin(math.pi * j[:, None] * y[None, :]) # (j_max, M) - def getAy(self, a): - """ - Build the diagonal matrix of vertical harmonic means. - a: (M,M) - returns Ay: (M*(M-1), M*(M-1)) - """ - Ay_den = a[:-1, :] + a[1:, :] # (M−1, M) - Ay_matrix = torch.where( - Ay_den == 0, - torch.zeros_like(Ay_den), - 2 * a[:-1, :] * a[1:, :] / Ay_den - ) - Ay_flat = Ay_matrix.reshape(-1) - return torch.diag(Ay_flat) +def sample(self, N, z=None): + """ + Returns N samples from the Matern GP + """ + if z is None: + z = torch.randn(N,*self.size, device=self.device) + coeff = self.sqrt_eig*z # if dim==1, N x size. if dim==2, N x size x size - def assemble_operator(self, a): - """ - Given a (M,M) tensor of conductivity, - assemble the (M^2 × M^2) finite-difference operator L - with Dirichlet BC built in. - """ - M = self.M - h = self.h - - # 1) edge‐flux diagonals - Ax = self.getAx(a) # (M*(M-1)) x (M*(M-1)) - Ay = self.getAy(a) - - # 2) 1D derivative matrix D: (M−1, M) - idx = torch.arange(M - 1, device=self.device) - D = torch.zeros((M - 1, M), device=self.device) - D[idx, idx] = -1.0 - D[idx, idx + 1] = +1.0 - D = D / h - - I = torch.eye(M, device=self.device) - - # 3) Kron products to lift to 2D - Dx = torch.kron(I, D) # ((M*M−M) x M^2) - Dy = torch.kron(D, I) # (M^2 x (M*M−M)) - - # 4) discrete operator L = Dx.T Ax Dx + Dy.T Ay Dy - L = Dx.T @ Ax @ Dx + Dy.T @ Ay @ Dy # (M^2, M^2) - - # 5) enforce Dirichlet BC: row i for any boundary node i has L[i,i]=1, all else 0 - # boundary indices: - # Row major flattening: idx=r*M+c - top = torch.arange(0, M, device=self.device) # row 0, all columns from 0 to M-1 - bottom = torch.arange((M-1)*M, M*M, device=self.device) # row M-1, all columns from 0 to M-1 - left = torch.arange(0, M*M, M, device=self.device) # all rows from 0 to M-1, column 0 - right = torch.arange(M-1, M*M, M, device=self.device) # all rows from 0 to M-1, column M-1 - bidx = torch.unique(torch.cat((top, bottom, left, right))) - - # zero out those rows, then set coef to 1 - L[bidx, :] = 0.0 - L[bidx, bidx] = 1.0 - - return L + if self.dim == 1: + u = math.sqrt(2) * (coeff @ self.phi) # N x size + + if self.dim == 2: + u = torch.einsum('nij,ik,jl->nkl', coeff, self.phi_x, self.phi_y) + u = 2 * u # (N, size, size) - def forward(self, a, f=None): - """ - Args: - a: (N, M, M) tensor of N separate MxM conductivities - f: (N, M, M) right‐hand side defaults to 1.0 in interior, 0 on boundary - - Returns: - u: (N, M, M) solutions - """ - N, M, _ = a.shape - - # default source term - if f is None: - f = torch.ones_like(a, device=self.device) - # zero out boundary - f[:, 0, :] = 0 - f[:, -1, :] = 0 - f[:, :, 0] = 0 - f[:, :, -1] = 0 - - u = torch.zeros_like(a, device=self.device) - - # assemble once per sample - for n in range(N): - L = self.assemble_operator(a[n]) - f_flat = f[n].reshape(-1) # (M^2,) - # dense solve - u_flat = torch.linalg.solve(L, f_flat) - u[n] = u_flat.reshape(M, M) - - return u + return u + +class SolveDarcyFlow(nn.Module): +def __init__(self, M, device=None): + """ + Solves PDE -div(a*grad(u))=f with DBC on grid MxM including boundaries + """ + super().__init__() + self.M = M + self.h = 1/(M-1) + self.device = device + +def getAx(self, a): + """ + Build the diagonal matrix of horizontal harmonic means. + a: (M,M) + returns Ax: (M*(M-1), M*(M-1)) dense torch.Tensor + """ + # sums on each horizontal edge + Ax_den = a[:, :-1] + a[:, 1:] # (M, M−1) + Ax_matrix = torch.where( + Ax_den == 0, # if + torch.zeros_like(Ax_den), # then + 2 * a[:, :-1] * a[:, 1:] / Ax_den # else + ) + Ax_flat = Ax_matrix.reshape(-1) # (M*(M-1),) + return torch.diag(Ax_flat) # diagonal matrix + +def getAy(self, a): + """ + Build the diagonal matrix of vertical harmonic means. + a: (M,M) + returns Ay: (M*(M-1), M*(M-1)) + """ + Ay_den = a[:-1, :] + a[1:, :] # (M−1, M) + Ay_matrix = torch.where( + Ay_den == 0, + torch.zeros_like(Ay_den), + 2 * a[:-1, :] * a[1:, :] / Ay_den + ) + Ay_flat = Ay_matrix.reshape(-1) + return torch.diag(Ay_flat) + +def assemble_operator(self, a): + """ + Given a (M,M) tensor of conductivity, + assemble the (M^2 × M^2) finite-difference operator L + with Dirichlet BC built in. + """ + M = self.M + h = self.h + + # 1) edge‐flux diagonals + Ax = self.getAx(a) # (M*(M-1)) x (M*(M-1)) + Ay = self.getAy(a) + + # 2) 1D derivative matrix D: (M−1, M) + idx = torch.arange(M - 1, device=self.device) + D = torch.zeros((M - 1, M), device=self.device) + D[idx, idx] = -1.0 + D[idx, idx + 1] = +1.0 + D = D / h + + I = torch.eye(M, device=self.device) + + # 3) Kron products to lift to 2D + Dx = torch.kron(I, D) # ((M*M−M) x M^2) + Dy = torch.kron(D, I) # (M^2 x (M*M−M)) + + # 4) discrete operator L = Dx.T Ax Dx + Dy.T Ay Dy + L = Dx.T @ Ax @ Dx + Dy.T @ Ay @ Dy # (M^2, M^2) + + # 5) enforce Dirichlet BC: row i for any boundary node i has L[i,i]=1, all else 0 + # boundary indices: + # Row major flattening: idx=r*M+c + top = torch.arange(0, M, device=self.device) # row 0, all columns from 0 to M-1 + bottom = torch.arange((M-1)*M, M*M, device=self.device) # row M-1, all columns from 0 to M-1 + left = torch.arange(0, M*M, M, device=self.device) # all rows from 0 to M-1, column 0 + right = torch.arange(M-1, M*M, M, device=self.device) # all rows from 0 to M-1, column M-1 + bidx = torch.unique(torch.cat((top, bottom, left, right))) + + # zero out those rows, then set coef to 1 + L[bidx, :] = 0.0 + L[bidx, bidx] = 1.0 + + return L + +def forward(self, a, f=None): + """ + Args: + a: (N, M, M) tensor of N separate MxM conductivities + f: (N, M, M) right‐hand side + defaults to 1.0 in interior, 0 on boundary + + Returns: + u: (N, M, M) solutions + """ + N, M, _ = a.shape + + # default source term + if f is None: + f = torch.ones_like(a, device=self.device) + # zero out boundary + f[:, 0, :] = 0 + f[:, -1, :] = 0 + f[:, :, 0] = 0 + f[:, :, -1] = 0 + + u = torch.zeros_like(a, device=self.device) + + # assemble once per sample + for n in range(N): + L = self.assemble_operator(a[n]) + f_flat = f[n].reshape(-1) # (M^2,) + # dense solve + u_flat = torch.linalg.solve(L, f_flat) + u[n] = u_flat.reshape(M, M) + + return u def train_model(M,N_samples,m,alpha,tau,num_epochs=5000,z=None,device=None): - print('STARTING DATA GENERATION FOR TRAINING') +print('STARTING DATA GENERATION FOR TRAINING') +# Instantiate GP sampler & PDE solver +gp = MaternGP(size=M, dim=2, alpha=alpha, tau=tau, device=device) +solver = SolveDarcyFlow(M=M, device=device) + +# Generate (a,u) pairs +log_a = m+gp.sample(N_samples,z=z) +a = torch.exp(log_a) # shape (N_samples, M, M) +u = solver.forward(a) # shape (N_samples, M, M) + +# Flatten log_a and u to input into DeepONet +log_a_flat = log_a.reshape(N_samples, -1) # (N, M*M) +u_flat = u.reshape(N_samples, -1) # (N, M*M) + +# Train/test split +ntrain = int(0.8 * N_samples) +log_a_train, log_a_test = log_a_flat[:ntrain], log_a_flat[ntrain:] +u_train, u_test = u_flat[:ntrain], u_flat[ntrain:] + +# Trunk grid (same for train & test) +X, Y = gp.grid +x_trunk = torch.stack((X.ravel(), Y.ravel())).T # (M*M, 2) + +# DeepXDE data object +data = dde.data.TripleCartesianProd( + X_train=(log_a_train, x_trunk), + y_train=u_train, + X_test=(log_a_test, x_trunk), + y_test=u_test, +) + +# DeepONet Architecture +p = 200 # # of DeepONet basis functions +net = dde.nn.pytorch.DeepONetCartesianProd( + layer_sizes_branch=[M*M, 256, 256, 128, p], + layer_sizes_trunk=[2, 128, 128, 128, p], + activation="relu", + kernel_initializer="Glorot normal", +) +net.to(device) # Ensure the model params live on GPU + +# Compile model +model = dde.Model(data, net) +model.compile( + "adam", + lr=1e-3, + loss="mean l2 relative error", + metrics=[], +) + +print('STARTING MODEL TRAINING') +# Train model +losshistory, train_state = model.train( + iterations=num_epochs, + display_every=1000, +) +return model, losshistory, train_state + +class Objective: +def __init__(self, M, model, test_dist, Lip, Lip_model, L0_norm, L0_norm_model, device): + self.M = M + self.model = model + self.test_alpha = test_dist.alpha + self.test_tau = test_dist.tau + self.test_sigma = test_dist.sigma + self.test_m = test_dist.m + self.test_m2 = test_dist.m2 + self.test_sqrt_eig = test_dist.sqrt_eig + self.Lip = Lip + self.Lip_model = Lip_model + self.L0_norm = L0_norm + self.L0_norm_model = L0_norm_model + self.device = device + +def part1(self,N_samples,m,alpha,tau,sigma=None,z=None): # Instantiate GP sampler & PDE solver - gp = MaternGP(size=M, dim=2, alpha=alpha, tau=tau, device=device) - solver = SolveDarcyFlow(M=M, device=device) + gp = MaternGP(size=self.M, dim=2, alpha=alpha, tau=tau,sigma=sigma,device=self.device) + solver = SolveDarcyFlow(M=self.M, device=self.device) + if z is None: + z = torch.randn(N_samples,*gp.size, device=self.device) # Generate (a,u) pairs - a = torch.exp(m+gp.sample(N_samples,z=z)) # shape (N_samples, M, M) + log_a = m+gp.sample(N_samples,z=z) + a = torch.exp(log_a) # shape (N_samples, M, M) u = solver.forward(a) # shape (N_samples, M, M) # Flatten a and u to input into DeepONet - a_flat = a.reshape(N_samples, -1) # (N, M*M) + log_a_flat = log_a.reshape(N_samples, -1) # (N, M*M) u_flat = u.reshape(N_samples, -1) # (N, M*M) - - # Train/test split - ntrain = int(0.8 * N_samples) - a_train, a_test = a_flat[:ntrain], a_flat[ntrain:] - u_train, u_test = u_flat[:ntrain], u_flat[ntrain:] - + # Trunk grid (same for train & test) X, Y = gp.grid x_trunk = torch.stack((X.ravel(), Y.ravel())).T # (M*M, 2) - # DeepXDE data object - data = dde.data.TripleCartesianProd( - X_train=(a_train, x_trunk), - y_train=u_train, - X_test=(a_test, x_trunk), - y_test=u_test, - ) - - # DeepONet Architecture - p = 200 # # of DeepONet basis functions - net = dde.nn.pytorch.DeepONetCartesianProd( - layer_sizes_branch=[M*M, 256, 256, 128, p], - layer_sizes_trunk=[2, 128, 128, 128, p], - activation="relu", - kernel_initializer="Glorot normal", - ) - net.to(device) # Ensure the model params live on GPU - - # Compile model - model = dde.Model(data, net) - model.compile( - "adam", - lr=1e-3, - loss="mean l2 relative error", - metrics=[], - ) + # Use DeepONet to predict u + with torch.no_grad(): + u_flat_pred = self.model.net((log_a_flat,x_trunk)) + + # Return average squared 2-norm + return torch.mean(torch.norm(u_flat - u_flat_pred, p=2, dim=1)**2) + +def part2(self,m,alpha,tau,sigma=None): + if sigma == None: + sigma = tau**(alpha - 1) + i = torch.arange(1, self.M+1, device=self.device) + I, J = torch.meshgrid(i, i, indexing='ij') + m2 = torch.norm(m,p=2)**2+torch.sum((sigma**2/(((I*math.pi)**2+(J*math.pi)**2+tau**2)**alpha))) + return torch.sqrt(torch.mean((self.Lip+self.Lip_model)**2*(4*(self.Lip+self.Lip_model)**2*(m2+self.test_m2)+16*(self.L0_norm**2+self.L0_norm_model**2)))) + +def part3(self,m,alpha,tau,sigma=None): + if sigma == None: + sigma = tau**(alpha - 1) + i = torch.arange(1, self.M+1, device=self.device) + I, J = torch.meshgrid(i, i, indexing='ij') + sqrt_eig = sigma/(((I*math.pi)**2+(J*math.pi)**2+tau**2)**(alpha/2)) # Matrix with \sqrt{\lambda_{i,j}} + return torch.norm(m-self.test_m,p=2)**2+torch.sqrt(torch.mean(torch.sum(torch.sum((sqrt_eig - self.test_sqrt_eig)**2,dim=1),dim=1))) + +def value(self,N_samples,m,alpha,tau,sigma=None,z=None): + """ + Takes the number of samples used to approximate Part 1 + """ + return self.part1(N_samples,m,alpha,tau,sigma,z=z)+self.part2(m,alpha,tau,sigma=sigma)*self.part3(m,alpha,tau,sigma=sigma) - print('STARTING MODEL TRAINING') - # Train model - losshistory, train_state = model.train( - iterations=num_epochs, - display_every=1000, - ) - return model, losshistory, train_state +def sample_from_custom_pdf(num_samples, min_x, max_x, device=None, num_grid=10001): +""" +Sample from the PDF f(x) = C * sin(x-pi/2)+1 on x in [min_x, max_x] +C is some normalization constant +using inverse‐CDF sampling with careful unique‐point handling. +Args: + num_samples (int): Number of samples to draw. + num_grid (int): Number of grid points to approximate CDF (default 10001). +Returns: + samples (ndarray): Array of shape (num_samples,) of draws from f. +""" +# Define a fine grid on [-1, 1] +x = torch.linspace(min_x, max_x, num_grid, device=device) +# Compute unnormalized PDF +pdf = torch.sin(x-math.pi/2)+1 +# Approximate CDF via the trapezoidal rule +dx = x[1] - x[0] +cdf = torch.cumsum(pdf,dim=0) * dx +cdf = cdf / cdf[-1] # normalize so cdf[-1] == 1 +cdf[0] = 0.0 # enforce CDF(–1) = 0 exactly + +# Remove any repeated values (from zero-density regions) +mask = torch.ones_like(cdf, dtype=torch.bool) +mask[1:] = cdf[1:] != cdf[:-1] +cdf_u = cdf[mask] +x_u = x[mask] + +# Draw uniform samples +u = torch.rand(num_samples, device=device) + +# Invert CDF by linear interpolation +idx = torch.searchsorted(cdf_u, u, right=True) +idx = torch.clamp(idx, 1, cdf_u.shape[0] - 1) +idx0 = idx - 1 +idx1 = idx +c0 = cdf_u[idx0] +c1 = cdf_u[idx1] +x0 = x_u[idx0] +x1 = x_u[idx1] + +# Interpolate +t = (u - c0) / (c1 - c0) +samples = x0 + t * (x1 - x0) +return samples -# Objective Function -class Objective: - def __init__(self, M, model, test_dist, L_0_norm, L_op, Lip, device): - self.M = M - self.model = model - self.test_alpha = test_dist.alpha - self.test_tau = test_dist.tau - self.test_sigma = test_dist.sigma - self.test_m = test_dist.m - self.test_m2 = test_dist.m2 - self.test_sqrt_eig = test_dist.sqrt_eig - self.L_0_norm = L_0_norm - self.L_op = L_op - self.Lip = Lip - self.device = device +class TestDistribution: +def __init__(self,darcy_solver,m,alpha,tau,sigma=None,device=None): + self.darcy_solver = darcy_solver + self.alpha = alpha + self.tau = tau + self.m = m + if sigma is None: + self.sigma = self.tau**(self.alpha - 1) + else: + self.sigma = sigma + self.device = device + self.m2, self.sqrt_eig = self.getStats() - def part1(self,N_samples,alpha,tau,sigma=None,z=None): - # Instantiate GP sampler & PDE solver - gp = MaternGP(size=self.M, dim=2, alpha=alpha, tau=tau,sigma=sigma,device=self.device) - solver = SolveDarcyFlow(M=self.M, device=self.device) +def getStats(self): + i = torch.arange(1, self.darcy_solver.M+1, device=self.device) + I, J = torch.meshgrid(i, i, indexing='ij') + m2 = torch.norm(self.m,p=2)+torch.sum(torch.sum(self.sigma[:,None,None]**2/(((I[None,:,:]*math.pi)**2+(J[None,:,:]*math.pi)**2+self.tau[:,None,None]**2)**self.alpha[:,None,None]),dim=0),dim=0) + sqrt_eig = self.sigma[:,None,None]/(((I[None,:,:]*math.pi)**2+(J[None,:,:]*math.pi)**2+self.tau[:,None,None]**2)**(self.alpha[:,None,None]/2)) + return m2, sqrt_eig + +def computeRelativeOODError(self,model,N_samples,z=None): + K = len(self.alpha) + error = 0 + den = 0 + for k in range(K): + # Instantiate GP sampler + gp = MaternGP(size=self.darcy_solver.M, dim=2, alpha=self.alpha[k], tau=self.tau[k],sigma=self.sigma[k],device=self.device) if z is None: z = torch.randn(N_samples,*gp.size, device=self.device) # Generate (a,u) pairs - a = torch.exp(gp.sample(N_samples,z=z)) # shape (N_samples, M, M) - u = solver.forward(a) # shape (N_samples, M, M) + log_a = self.m[k]+gp.sample(N_samples,z=z) + a = torch.exp(log_a) # shape (N_samples, M, M) + u = self.darcy_solver.forward(a) # shape (N_samples, M, M) # Flatten a and u to input into DeepONet - a_flat = a.reshape(N_samples, -1) # (N, M*M) + log_a_flat = log_a.reshape(N_samples, -1) # (N, M*M) u_flat = u.reshape(N_samples, -1) # (N, M*M) - + # Trunk grid (same for train & test) X, Y = gp.grid x_trunk = torch.stack((X.ravel(), Y.ravel())).T # (M*M, 2) # Use DeepONet to predict u with torch.no_grad(): - u_flat_pred = self.model.net((a_flat,x_trunk)) + u_flat_pred = model.net((log_a_flat,x_trunk)) # Return average squared 2-norm - return torch.mean(torch.norm(u_flat - u_flat_pred, p=2, dim=1)**2) - - def part2(self,m,alpha,tau,sigma=None): - if sigma == None: - sigma = tau**(alpha - 1) - i = torch.arange(1, self.M+1, device=self.device) - I, J = torch.meshgrid(i, i, indexing='ij') - m2 = torch.norm(m,p=2)**2+torch.sum((sigma**2/(((I*math.pi)**2+(J*math.pi)**2+tau**2)**alpha))) - return torch.sqrt(torch.mean((self.Lip+self.L_op)**2*(3*(self.Lip+self.L_op)**2*(m2+self.test_m2)+12*self.L_0_norm**2))) - - def part3(self,m,alpha,tau,sigma=None): - if sigma == None: - sigma = tau**(alpha - 1) - i = torch.arange(1, self.M+1, device=self.device) - I, J = torch.meshgrid(i, i, indexing='ij') - sqrt_eig = sigma/(((I*math.pi)**2+(J*math.pi)**2+tau**2)**(alpha/2)) # Matrix with \sqrt{\lambda_{i,j}} - return torch.norm(m-self.test_m,p=2)**2+torch.sqrt(torch.mean(torch.sum(torch.sum((sqrt_eig - self.test_sqrt_eig)**2,dim=1),dim=1))) - - def value(self,N_samples,m,alpha,tau,sigma=None,z=None): - """ - Takes the number of samples used to approximate Part 1 - """ - return self.part1(N_samples,alpha,tau,sigma,z=z)+self.part2(m,alpha,tau,sigma=sigma)*self.part3(m,alpha,tau,sigma=sigma) + error += torch.mean(torch.norm(u_flat - u_flat_pred, p=2, dim=1)**2) + den += torch.mean(torch.norm(u_flat, p=2, dim=1)**2) + return (error/K)/(den/K) -def sample_from_custom_pdf(num_samples, min_x, max_x, device=None, num_grid=10001): - """ - Sample from the PDF f(x) = C * sin(x-pi/2)+1 on x in [min_x, max_x] - C is some normalization constant - using inverse‐CDF sampling with careful unique‐point handling. - Args: - num_samples (int): Number of samples to draw. - num_grid (int): Number of grid points to approximate CDF (default 10001). - Returns: - samples (ndarray): Array of shape (num_samples,) of draws from f. - """ - # Define a fine grid on [-1, 1] - x = torch.linspace(min_x, max_x, num_grid, device=device) - # Compute unnormalized PDF - pdf = torch.sin(x-math.pi/2)+1 - # Approximate CDF via the trapezoidal rule - dx = x[1] - x[0] - cdf = torch.cumsum(pdf,dim=0) * dx - cdf = cdf / cdf[-1] # normalize so cdf[-1] == 1 - cdf[0] = 0.0 # enforce CDF(–1) = 0 exactly - - # Remove any repeated values (from zero-density regions) - mask = torch.ones_like(cdf, dtype=torch.bool) - mask[1:] = cdf[1:] != cdf[:-1] - cdf_u = cdf[mask] - x_u = x[mask] - - # Draw uniform samples - u = torch.rand(num_samples, device=device) - - # Invert CDF by linear interpolation - idx = torch.searchsorted(cdf_u, u, right=True) - idx = torch.clamp(idx, 1, cdf_u.shape[0] - 1) - idx0 = idx - 1 - idx1 = idx - c0 = cdf_u[idx0] - c1 = cdf_u[idx1] - x0 = x_u[idx0] - x1 = x_u[idx1] - - # Interpolate - t = (u - c0) / (c1 - c0) - samples = x0 + t * (x1 - x0) - return samples - -class TestDistribution: - def __init__(self,darcy_solver,m,alpha,tau,sigma=None,device=None): - self.darcy_solver = darcy_solver - self.alpha = alpha - self.tau = tau - self.m = m - if sigma is None: - self.sigma = self.tau**(self.alpha - 1) - else: - self.sigma = sigma - self.device = device - self.m2, self.sqrt_eig = self.getStats() - - def getStats(self): - i = torch.arange(1, self.darcy_solver.M+1, device=self.device) - I, J = torch.meshgrid(i, i, indexing='ij') - m2 = torch.norm(self.m,p=2)+torch.sum(torch.sum(self.sigma[:,None,None]**2/(((I[None,:,:]*math.pi)**2+(J[None,:,:]*math.pi)**2+self.tau[:,None,None]**2)**self.alpha[:,None,None]),dim=0),dim=0) - sqrt_eig = self.sigma[:,None,None]/(((I[None,:,:]*math.pi)**2+(J[None,:,:]*math.pi)**2+self.tau[:,None,None]**2)**(self.alpha[:,None,None]/2)) - return m2, sqrt_eig +def optimize(init_m, alpha, tau, obj_fun, num_steps,N_samples=1,z=None): +print('OPTIMIZING GIVEN MODEL') +if z is None: + torch.randn(N_samples,obj_fun.M,obj_fun.M,device=obj_fun.device) +m = torch.tensor(init_m, device=device, requires_grad=True) +opt = torch.optim.Adam([m], lr=1e-3) +for step in range(num_steps): + opt.zero_grad() + loss = obj_fun.value(N_samples=N_samples,m=m,alpha=alpha,tau=tau,z=z) + loss.backward() # backprop into m + opt.step() # update m + + if step % 100 == 0: + print(f"step {step:03d} | m = {m.item():.4f} | loss = {loss.item():.4e}") - def computeRelativeOODError(self,model,N_samples,z=None): - K = len(self.alpha) - error = 0 - den = 0 - for k in range(K): - # Instantiate GP sampler - gp = MaternGP(size=self.darcy_solver.M, dim=2, alpha=self.alpha[k], tau=self.tau[k],sigma=self.sigma[k],device=self.device) - if z is None: - z = torch.randn(N_samples,*gp.size, device=self.device) - - # Generate (a,u) pairs - a = torch.exp(self.m[k]+gp.sample(N_samples,z=z)) # shape (N_samples, M, M) - u = self.darcy_solver.forward(a) # shape (N_samples, M, M) - - # Flatten a and u to input into DeepONet - a_flat = a.reshape(N_samples, -1) # (N, M*M) - u_flat = u.reshape(N_samples, -1) # (N, M*M) - - # Trunk grid (same for train & test) - X, Y = gp.grid - x_trunk = torch.stack((X.ravel(), Y.ravel())).T # (M*M, 2) - - # Use DeepONet to predict u - with torch.no_grad(): - u_flat_pred = model.net((a_flat,x_trunk)) - - # Return average squared 2-norm - error += torch.mean(torch.norm(u_flat - u_flat_pred, p=2, dim=1)**2) - den += torch.mean(torch.norm(u_flat, p=2, dim=1)**2) - return (error/K)/(den/K) +return m.item(), loss.item() -def optimize(init_m, alpha, tau, obj_fun, num_steps,N_samples=1,z=None): - print('OPTIMIZING GIVEN MODEL') - if z is None: - torch.randn(N_samples,obj_fun.M,obj_fun.M,device=obj_fun.device) - m = torch.tensor(init_m, device=device, requires_grad=True) - opt = torch.optim.Adam([m], lr=1e-3) - for step in range(num_steps): - opt.zero_grad() - loss = obj_fun.value(N_samples=N_samples,m=m,alpha=alpha,tau=tau,z=z) - loss.backward() # backprop into m - opt.step() # update m - - if step % 100 == 0: - print(f"step {step:03d} | m = {m.item():.4f} | loss = {loss.item():.4e}") - - return m.item(), loss.item() - -# # Several independent runs of AMA + +# # Several independent runs device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f'Using Device: {device}') @@ -440,62 +455,63 @@ def optimize(init_m, alpha, tau, obj_fun, num_steps,N_samples=1,z=None): N_samples_OOD = 3 alpha, tau = 2, 3 K=20 # Number of test dist. -Lip=.5 -L_op=.5 -L_0_norm=2 +Lip=.1 +Lip_model=.1 +L0_norm=2.6 +L0_norm_model=3 runs = 13 trial_results = [] for run in range(runs): - # Test Distributions - test_alpha = sample_from_custom_pdf(K,2,3,device=device) - test_tau = sample_from_custom_pdf(K,1,4,device=device) - test_m = sample_from_custom_pdf(K,0,1,device=device) - test_dist = TestDistribution(darcy_solver,test_m,test_alpha,test_tau,device=device) - - # Fix randomness within each run - z_min = torch.randn(N_samples_min,M,M, device=device) - z_OOD = torch.randn(N_samples_OOD,M,M, device=device) - - m=0.0 # initial m - AMAloss = 1e+99 # initialize loss - results = [] - for i in range(num_AMA_iter): - # Training - model,_,_ = train_model(M,N_samples,m,alpha,tau,num_epochs=num_train_epochs,device=device) - - # Evaluate Objective at half step - obj_fun = Objective(M, model, test_dist, L_0_norm, L_op, Lip, device) - m = torch.tensor(m,device=device) +# Test Distributions +test_alpha = sample_from_custom_pdf(K,2,3,device=device) +test_tau = sample_from_custom_pdf(K,1,4,device=device) +test_m = sample_from_custom_pdf(K,0,1,device=device) +test_dist = TestDistribution(darcy_solver,test_m,test_alpha,test_tau,device=device) + +# Fix randomness within each run +z_min = torch.randn(N_samples_min,M,M, device=device) +z_OOD = torch.randn(N_samples_OOD,M,M, device=device) + +m=0.0 # initial m +AMAloss = 1e+99 # initialize loss +results = [] +for i in range(num_AMA_iter): + # Training + model,_,_ = train_model(M,N_samples,m,alpha,tau,num_epochs=num_train_epochs,device=device) + + # Evaluate Objective at half step + obj_fun = Objective(M, model, test_dist, Lip, Lip_model, L0_norm, L0_norm_model, device) + m = torch.tensor(m,device=device) + temp_AMAloss = obj_fun.value(N_samples=N_samples_min,m=m,alpha=alpha,tau=tau,z=z_min) + attempt = 0 + while temp_AMAloss > AMAloss and attempt < 2: + print(f"Hmm, loss {temp_AMAloss} is not smaller than previous loss {AMAloss}. We'll keep training.") + # TRAIN AGAIN + losshistory, train_state = model.train( + iterations=num_train_epochs, + display_every=1000, + disregard_previous_best=True, + ) temp_AMAloss = obj_fun.value(N_samples=N_samples_min,m=m,alpha=alpha,tau=tau,z=z_min) - attempt = 0 - while temp_AMAloss > AMAloss and attempt < 2: - print(f"Hmm, loss {temp_AMAloss} is not smaller than previous loss {AMAloss}. We'll keep training.") - # TRAIN AGAIN - losshistory, train_state = model.train( - iterations=num_train_epochs, - display_every=1000, - disregard_previous_best=True, - ) - temp_AMAloss = obj_fun.value(N_samples=N_samples_min,m=m,alpha=alpha,tau=tau,z=z_min) - attempt += 1 - AMAloss = temp_AMAloss - # Evaluate OOD performace - OODerror = test_dist.computeRelativeOODError(model,N_samples_OOD,z=z_OOD) - print(f'Iteration: {i+0.5} | AMA Loss: {AMAloss} | Relative OOD Error: {OODerror} | m: {m}') - results.append([i+0.5,AMAloss,OODerror,m]) - df = pd.DataFrame(results, columns = ['Iteration','AMA Loss','Relative OOD Error','m']) - - # Minimizing - temp_m, temp_AMAloss = optimize(m, alpha, tau, obj_fun, num_min_steps,N_samples=N_samples_min,z=z_min) - attempt = 0 - while temp_AMAloss > AMAloss and attempt < 2: - temp_m, temp_AMAloss = optimize(temp_m, alpha, tau, obj_fun, num_min_steps,N_samples=N_samples_min,z=z_min) - attempt += 1 - m, AMAloss = temp_m, temp_AMAloss - print(f'Iteration: {i+1} | AMA Loss: {AMAloss} | Relative OOD Error: NA | m: {m}') - results.append([i+1,AMAloss,torch.nan,m]) - df = pd.DataFrame(results, columns = ['Iteration','AMA Loss','Relative OOD Error','m']) - trial_results.append(df) - with open('DarcyFlow_results.pkl', 'wb') as f: - pickle.dump(trial_results, f) \ No newline at end of file + attempt += 1 + AMAloss = temp_AMAloss + # Evaluate OOD performace + OODerror = test_dist.computeRelativeOODError(model,N_samples_OOD,z=z_OOD) + print(f'Iteration: {i+0.5} | AMA Loss: {AMAloss} | Relative OOD Error: {OODerror} | m: {m}') + results.append([i+0.5,AMAloss,OODerror,m]) + df = pd.DataFrame(results, columns = ['Iteration','AMA Loss','Relative OOD Error','m']) + + # Minimizing + temp_m, temp_AMAloss = optimize(m, alpha, tau, obj_fun, num_min_steps,N_samples=N_samples_min,z=z_min) + attempt = 0 + while temp_AMAloss > AMAloss and attempt < 2: + temp_m, temp_AMAloss = optimize(temp_m, alpha, tau, obj_fun, num_min_steps,N_samples=N_samples_min,z=z_min) + attempt += 1 + m, AMAloss = temp_m, temp_AMAloss + print(f'Iteration: {i+1} | AMA Loss: {AMAloss} | Relative OOD Error: NA | m: {m}') + results.append([i+1,AMAloss,torch.nan,m]) + df = pd.DataFrame(results, columns = ['Iteration','AMA Loss','Relative OOD Error','m']) +trial_results.append(df) +with open('DarcyFlow_results.pkl', 'wb') as f: + pickle.dump(trial_results, f) diff --git a/operator_learning/NtDGPU.py b/operator_learning/NtDGPU.py new file mode 100644 index 0000000..b2a36df --- /dev/null +++ b/operator_learning/NtDGPU.py @@ -0,0 +1,605 @@ +#!/usr/bin/env python +# coding: utf-8 + +# # AMA NtD with GPU + +import torch +import torch.nn as nn +import math +import matplotlib.pyplot as plt +import os +os.environ["DDE_BACKEND"] = "pytorch" +import deepxde as dde +import pandas as pd +import numpy as np +from scipy.optimize import differential_evolution +import pickle + +class MaternGP: + """ + Matern Gaussian Process + If dim=1, then returns samples of size 'size'. + If dim=2, then returns samples of size 'size x size' + """ + def __init__(self, size, dim=1, alpha=2, tau=3, sigma=None, device=None): + self.device = device + self.dim = dim + + if sigma is None: + sigma = tau**(0.5*(2*alpha - self.dim)) + + if self.dim == 1: + i_max = size + i = torch.arange(start=1, end=i_max+1, step=1, device=device) + self.basis_idx = i + + self.sqrt_eig = sigma*(((math.pi**2)*(i**2) + tau**2)**(-alpha/2.0)) + + self.size = (size,) + + x = torch.linspace(0, 1, size, device=device) + self.grid = x + # Basis Functions + self.phi = torch.sin(math.pi * i[:, None] * x[None, :]) + + if self.dim == 2: + i_max = size + j_max = size + i = torch.arange(start=1, end=i_max+1, step=1, device=device) + j = torch.arange(start=1, end=j_max+1, step=1, device=device) + I, J = torch.meshgrid(i,j,indexing='ij') + + self.sqrt_eig = sigma*(((math.pi**2)*(I**2) + (math.pi**2)*(J**2) + tau**2)**(-alpha/2.0)) + + self.size = (size, size) + + x = torch.linspace(0, 1, size, device=device) + y = torch.linspace(0, 1, size, device=device) + self.grid = torch.meshgrid(x,y,indexing='xy') + + # Basis Functions + self.phi_x = torch.sin(math.pi * i[:, None] * x[None, :]) # (i_max, M) + self.phi_y = torch.sin(math.pi * j[:, None] * y[None, :]) # (j_max, M) + + def sample(self, N, z=None): + """ + Returns N samples from the Matern GP + """ + if z is None: + z = torch.randn(N,*self.size, device=self.device) + coeff = self.sqrt_eig*z # if dim==1, N x size. if dim==2, N x size x size + + if self.dim == 1: + u = math.sqrt(2) * (coeff @ self.phi) # N x size + + if self.dim == 2: + u = torch.einsum('nij,ik,jl->nkl', coeff, self.phi_x, self.phi_y) + u = 2 * u # (N, size, size) + + return u + +class DtNOperator(nn.Module): + def __init__(self, a, device=None): + """ + Solves PDE -div(a*grad(u))=f with DBC on grid MxM including boundaries + and return NBC + """ + super().__init__() + self.a = a + self.M = self.a.shape[0] + self.h = 1/(self.M-1) + self.device = device + self.Ax = self.getAx() + self.Ay = self.getAy() + self.L = self.assemble_operator() + self.D = self.build_D() + self.D_pinv = torch.linalg.pinv(self.D) + self.trunk = self.getTrunk() + + def getTrunk(self): + top_x = torch.linspace(0,1,self.M) + top_x = top_x[0:-1] + right_x = torch.ones((self.M-1,)) + bottom_x = torch.linspace(1,0,self.M) + bottom_x = bottom_x[0:-1] + left_x = torch.zeros((self.M-1,)) + x = torch.concatenate((top_x,right_x,bottom_x,left_x)) + x = x.reshape(4*self.M-4,1) + top_y = torch.ones((self.M-1,)) + right_y = torch.linspace(1,0,self.M) + right_y = right_y[0:-1] + bottom_y = torch.zeros((self.M-1,)) + left_y = torch.linspace(0,1,self.M) + left_y = left_y[0:-1] + y = torch.concatenate((top_y,right_y,bottom_y,left_y)) + y = y.reshape(4*self.M-4,1) + trunk = torch.concatenate((x,y),1) + return trunk + + def getAx(self): + """ + Build the diagonal matrix of horizontal harmonic means from self.a + returns Ax: (M*(M-1), M*(M-1)) dense torch.Tensor + """ + # sums on each horizontal edge + Ax_den = self.a[:, :-1] + self.a[:, 1:] # (M, M−1) + Ax_matrix = torch.where( + Ax_den == 0, # if + torch.zeros_like(Ax_den), # then + 2 * self.a[:, :-1] * self.a[:, 1:] / Ax_den # else + ) + Ax_flat = Ax_matrix.reshape(-1) # (M*(M-1),) + return torch.diag(Ax_flat) # diagonal matrix + + def getAy(self): + """ + Build the diagonal matrix of vertical harmonic means from self.a + returns Ay: (M*(M-1), M*(M-1)) + """ + Ay_den = self.a[:-1, :] + self.a[1:, :] # (M−1, M) + Ay_matrix = torch.where( + Ay_den == 0, + torch.zeros_like(Ay_den), + 2 * self.a[:-1, :] * self.a[1:, :] / Ay_den + ) + Ay_flat = Ay_matrix.reshape(-1) + return torch.diag(Ay_flat) + + def assemble_operator(self): + """ + Using (M,M) tensor of conductivity self.a, + assemble the (M^2 × M^2) finite-difference operator L + with Dirichlet BC built in. + """ + + # 1D derivative matrix D: (M−1, M) + idx = torch.arange(self.M - 1, device=self.device) + D = torch.zeros((self.M - 1, self.M), device=self.device) + D[idx, idx] = -1.0 + D[idx, idx + 1] = +1.0 + D = D / self.h + + I = torch.eye(self.M, device=self.device) + + # Kron products to lift to 2D + Dx = torch.kron(I, D) # ((M*M−M) x M^2) + Dy = torch.kron(D, I) # (M^2 x (M*M−M)) + + # Discrete operator L = Dx.T Ax Dx + Dy.T Ay Dy + L = Dx.T @ self.Ax @ Dx + Dy.T @ self.Ay @ Dy # (M^2, M^2) + + # Enforce Dirichlet BC: row i for any boundary node i has L[i,i]=1, all else 0 + # boundary indices: + # Row major flattening: idx=r*M+c + top = torch.arange(0, self.M, device=self.device) # row 0, all columns from 0 to M-1 + bottom = torch.arange((self.M-1)*self.M, self.M*self.M, device=self.device) # row M-1, all columns from 0 to M-1 + left = torch.arange(0, self.M*self.M, self.M, device=self.device) # all rows from 0 to M-1, column 0 + right = torch.arange(self.M-1, self.M*self.M, self.M, device=self.device) # all rows from 0 to M-1, column M-1 + bidx = torch.unique(torch.cat((top, bottom, left, right))) + + # zero out those rows, then set coef to 1 + L[bidx, :] = 0.0 + L[bidx, bidx] = 1.0 + + return L + + def neumann_flux(self, u): + """ + Compute the Neumann flux q = a * du/dn on the boundary of an M×M grid, + including corners (using the average flux in vertical and horizontal dir.), and return a 1D array + of length 4*(M-1) going clockwise starting at the top‐left corner. + """ + assert u.shape == (self.M, self.M) and self.a.shape == (self.M, self.M) + flux_vals = [] + + # Top‐left corner (i=0,j=0) + dudn_vert = (u[0, 0] - u[1, 0]) / self.h # normal points upward + dudn_horz = (u[0, 0] - u[0, 1]) / self.h # normal points leftward + flux_vals.append(self.a[0, 0] * 0.5 * (dudn_vert + dudn_horz)) + + # Top edge interior (i=0, j=1..M-2) + for j in range(1, self.M - 1): + dudn = (u[0, j] - u[1, j]) / self.h + flux_vals.append(self.a[0, j] * dudn) + + # Top‐right corner (i=0,j=M-1) + dudn_vert = (u[0, self.M - 1] - u[1, self.M - 1]) / self.h + dudn_horz = (u[0, self.M - 1] - u[0, self.M - 2]) / self.h + flux_vals.append(self.a[0, self.M - 1] * 0.5 * (dudn_vert + dudn_horz)) + + # Right edge interior (j=M-1, i=1..M-2) + for i in range(1, self.M - 1): + dudn = (u[i, self.M - 1] - u[i, self.M - 2]) / self.h + flux_vals.append(self.a[i, self.M - 1] * dudn) + + # Bottom‐right corner (i=M-1,j=M-1) + dudn_vert = (u[self.M - 1, self.M - 1] - u[self.M - 2, self.M - 1]) / self.h # normal points downward + dudn_horz = (u[self.M - 1, self.M - 1] - u[self.M - 1, self.M - 2]) / self.h # normal points rightward + flux_vals.append(self.a[self.M - 1, self.M - 1] * 0.5 * (dudn_vert + dudn_horz)) + + # Bottom edge interior (i=M-1, j=M-2..1) + for j in range(self.M - 2, 0, -1): + dudn = (u[self.M - 1, j] - u[self.M - 2, j]) / self.h + flux_vals.append(self.a[self.M - 1, j] * dudn) + + # Bottom‐left corner (i=M-1,j=0) + dudn_vert = (u[self.M - 1, 0] - u[self.M - 2, 0]) / self.h + dudn_horz = (u[self.M - 1, 0] - u[self.M - 1, 1]) / self.h + flux_vals.append(self.a[self.M - 1, 0] * 0.5 * (dudn_vert + dudn_horz)) + + # Left edge interior (j=0, i=M-2..1) + for i in range(self.M - 2, 0, -1): + dudn = (u[i, 0] - u[i, 1]) / self.h + flux_vals.append(self.a[i, 0] * dudn) + + return torch.tensor(flux_vals,device=self.device) + + def forward(self, dbc, f=None): + """ + Args: + dbc: (N, 4*M-4) tensor of N separate Dirichlet boundary conditions + of size 4M-4 + f: (N, M, M) right‐hand side defaults to 0 in interior + Returns: + nbc: (N, 4*M-4) tensor of N separate Neumann boundary conditions + of size 4M-4 + """ + assert dbc.shape[1] == 4*self.M-4 + N = dbc.shape[0] + + # default source term + if f is None: + f = torch.zeros((N,self.M,self.M), device=self.device) + + u = torch.zeros((N,self.M,self.M), device=self.device) + nbc = torch.zeros_like(dbc, device=self.device) + # assemble once per sample + for n in range(N): + + # Apply boundary conditions + f[n,0,0:self.M-1] = dbc[n,:self.M-1] + f[n,:self.M-1,self.M-1] = dbc[n,self.M-1:2*self.M - 2] + bottom_values = dbc[n,2*self.M-2:3*self.M-3] + f[n,self.M-1,1:] = bottom_values.flip(0) + left_values = dbc[n,3*self.M-3:] + f[n,1:, 0] = left_values.flip(0) + f_flat = f[n].reshape(-1) # (M^2,) + + # Dense solve + u_flat = torch.linalg.solve(self.L, f_flat) + u[n] = u_flat.reshape(self.M, self.M) + + # Compute NBC + nbc[n] = self.neumann_flux(u[n]) + nbc[n] -= torch.mean(nbc[n]) # Enforce NBC integrates to 0 + return nbc + + def build_D(self): + """ + Build the DtN matrix D of shape (4*M-4, 4*M-4), so that + nbc = D @ dbc + + M : int, grid size (including boundary) + a : (M,M) conductivity array + """ + n_rows = 4*self.M - 4 # number of Neumann outputs + n_cols = 4*self.M - 4 # length of full DBC + D = torch.zeros((n_rows, n_cols), device=self.device) + + # basis e_j in the full-DBC space + print('CONSTRUCTING OPERATOR') + for j in range(n_cols): + e = torch.zeros(1, n_cols, device=self.device) + e[0,j] = 1.0 + # Apply DtN to Basis + D[:, j] = self.forward(e)[0] + if j % 10 == 0: + print(f"Completed column {j+1}/{n_cols}") + return D + +def train_model(DtN,N_samples,omega,alpha,tau,num_epochs=10000,z=None,device=None): + print('STARTING DATA GENERATION FOR TRAINING') + M = DtN.M + D_pinv = DtN.D_pinv + trunk = DtN.trunk + # Instantiate NBC GP sampler + nbc_gp = MaternGP(size=4*M-4, dim=1, alpha=alpha, tau=tau, device=device) + + # Generate (nbc,dbc) pairs + nbc = amp*torch.sin(omega*(nbc_gp.grid-1/2))+nbc_gp.sample(N_samples,z=z) # Shape (N_samples, 4M-4) + nbc -= torch.mean(nbc,dim=1).reshape(-1,1) # Enforce mean 0 + dbc = nbc @ D_pinv.T + + # Train/test split + ntrain = int(0.8 * N_samples) + nbc_train, nbc_test = nbc[:ntrain], nbc[ntrain:] + dbc_train, dbc_test = dbc[:ntrain], dbc[ntrain:] + + # DeepXDE data object + data = dde.data.TripleCartesianProd( + X_train=(nbc_train, trunk), + y_train=dbc_train, + X_test=(nbc_test, trunk), + y_test=dbc_test, + ) + + # DeepONet Architecture + p = 200 # # of DeepONet basis functions + net = dde.nn.pytorch.DeepONetCartesianProd( + layer_sizes_branch=[4*M-4, 256, 256, 128, p], + layer_sizes_trunk=[2, 128, 128, 128, p], + activation="relu", + kernel_initializer="Glorot normal", + ) + net.to(device) # Ensure the model params live on GPU + + # Compile model + model = dde.Model(data, net) + model.compile( + "adam", + lr=1e-3, + loss="mean l2 relative error", + metrics=[], + ) + + print('STARTING MODEL TRAINING') + # Train model + losshistory, train_state = model.train( + iterations=num_epochs, + display_every=1000, + ) + return model, losshistory, train_state + +class Objective: + def __init__(self, DtN, model, test_dist, Lip, Lip_model, L0_norm, L0_norm_model, device): + self.DtN = DtN + self.model = model + self.test_omega = test_dist.omega + self.test_alpha = test_dist.alpha + self.test_tau = test_dist.tau + self.test_sigma = test_dist.sigma + self.test_m2 = test_dist.m2 + self.test_sqrt_eig = test_dist.sqrt_eig + self.L0_norm = L0_norm + self.L0_norm_model = L0_norm_model + self.Lip = Lip + self.Lip_model = Lip_model + self.device = device + + def part1(self,N_samples,omega,alpha,tau,sigma=None,z=None): + M = self.DtN.M + D_pinv = self.DtN.D_pinv + trunk = self.DtN.trunk + # Instantiate NBC GP sampler + nbc_gp = MaternGP(size=4*M-4, dim=1, alpha=alpha, tau=tau, sigma=sigma, device=device) + + # Generate (nbc,dbc) pairs + nbc = amp*torch.sin(omega*(nbc_gp.grid-1/2))+nbc_gp.sample(N_samples,z=z) # Shape (N_samples, 4M-4) + nbc -= torch.mean(nbc,dim=1).reshape(-1,1) # Enforce mean 0 + dbc = nbc @ D_pinv.T + + # Use DeepONet to predict u + with torch.no_grad(): + dbc_pred = self.model.net((nbc,trunk)) + + # Return average squared 2-norm + return torch.mean(torch.norm(dbc - dbc_pred, p=2, dim=1)**2) + + def part2(self,omega,alpha,tau,sigma=None): + if sigma == None: + sigma = tau**(alpha - 1/2) + i = torch.arange(1, self.DtN.M+1, device=self.device) + x = torch.linspace(0,1,4*self.DtN.M-4,device=self.device) + mean_norm_squared = torch.norm(amp*torch.sin(omega*(x-0.5)),p=2)**2 + m2 = mean_norm_squared+torch.sum((sigma**2/(((i*math.pi)**2+tau**2)**alpha))) + return torch.sqrt(torch.mean((self.Lip+self.Lip_model)**2*(4*(self.Lip+self.Lip_model)**2*(m2+self.test_m2)+16*(self.L0_norm**2+self.L0_norm_model**2)))) + + def part3(self,omega,alpha,tau,sigma=None): + if sigma == None: + sigma = tau**(alpha - 1/2) + i = torch.arange(1, self.DtN.M+1, device=self.device) + sqrt_eig = sigma/(((i*math.pi)**2+tau**2)**(alpha/2)) # vector of \sqrt{\lambda_{i}} + x = torch.linspace(0,1,4*self.DtN.M-4,device=self.device) + mean_diff = torch.norm(amp*torch.sin(omega*(x[None,:]-0.5)) - amp*torch.sin(self.test_omega[:,None]*(x[None,:]-0.5)),p=2,dim=1)**2 + return torch.sqrt(torch.mean(mean_diff + torch.sum((sqrt_eig - self.test_sqrt_eig)**2,dim=1))) + + def value(self,N_samples,omega,alpha,tau,sigma=None,z=None): + """ + Takes the number of samples used to approximate Part 1 + """ + return self.part1(N_samples,omega,alpha,tau,sigma=sigma,z=z)+self.part2(omega,alpha,tau,sigma=sigma)*self.part3(omega,alpha,tau,sigma=sigma) + +def sample_from_custom_pdf(num_samples, min_x, max_x, device=None, num_grid=10001): + """ + Sample from the PDF f(x) = C * sin(x-pi/2)+1 on x in [min_x, max_x] + C is some normalization constant + using inverse‐CDF sampling with careful unique‐point handling. + Args: + num_samples (int): Number of samples to draw. + num_grid (int): Number of grid points to approximate CDF (default 10001). + Returns: + samples (ndarray): Array of shape (num_samples,) of draws from f. + """ + # Define a fine grid on [-1, 1] + x = torch.linspace(min_x, max_x, num_grid, device=device) + # Compute unnormalized PDF + pdf = torch.sin(x-math.pi/2)+1 + # Approximate CDF via the trapezoidal rule + dx = x[1] - x[0] + cdf = torch.cumsum(pdf,dim=0) * dx + cdf = cdf / cdf[-1] # normalize so cdf[-1] == 1 + cdf[0] = 0.0 # enforce CDF(–1) = 0 exactly + + # Remove any repeated values (from zero-density regions) + mask = torch.ones_like(cdf, dtype=torch.bool) + mask[1:] = cdf[1:] != cdf[:-1] + cdf_u = cdf[mask] + x_u = x[mask] + + # Draw uniform samples + u = torch.rand(num_samples, device=device) + + # Invert CDF by linear interpolation + idx = torch.searchsorted(cdf_u, u, right=True) + idx = torch.clamp(idx, 1, cdf_u.shape[0] - 1) + idx0 = idx - 1 + idx1 = idx + c0 = cdf_u[idx0] + c1 = cdf_u[idx1] + x0 = x_u[idx0] + x1 = x_u[idx1] + + # Interpolate + t = (u - c0) / (c1 - c0) + samples = x0 + t * (x1 - x0) + return samples + +class TestDistribution: + def __init__(self,DtN,omega,alpha,tau,sigma=None,device=None): + self.DtN = DtN + self.omega = omega + self.alpha = alpha + self.tau = tau + if sigma is None: + self.sigma = self.tau**(self.alpha - 1/2) + else: + self.sigma = sigma + self.device = device + self.m2, self.sqrt_eig = self.getStats() + + def getStats(self): + i = torch.arange(1, self.DtN.M+1, device=device) + x = torch.linspace(0,1,4*self.DtN.M-4,device=device) + mean_norm_squared = torch.norm(amp*torch.sin(self.omega[:,None]*(x[None,:]-0.5)),p=2,dim=1)**2 + m2 = mean_norm_squared + torch.sum(self.sigma[:,None]**2/(((i[None,:]*math.pi)**2+self.tau[:,None]**2)**self.alpha[:,None]),dim=1) + sqrt_eig = self.sigma[:,None]/(((i[None,:]*math.pi)**2+self.tau[:,None]**2)**(self.alpha[:,None]/2)) # K x (M+1) + return m2, sqrt_eig + + def computeRelativeOODError(self,model,N_samples,z=None): + K = len(self.alpha) + error = 0 + den = 0 + for k in range(K): + M = self.DtN.M + D_pinv = self.DtN.D_pinv + trunk = self.DtN.trunk + # Instantiate NBC GP sampler + nbc_gp = MaternGP(size=4*M-4, dim=1, alpha=self.alpha[k], tau=self.tau[k], device=device) + + # Generate (nbc,dbc) pairs + nbc = amp*torch.sin(self.omega[k]*(nbc_gp.grid-1/2))+nbc_gp.sample(N_samples,z=z) # Shape (N_samples, 4M-4) + nbc -= torch.mean(nbc,dim=1).reshape(-1,1) # Enforce mean 0 + dbc = nbc @ D_pinv.T + + # Use DeepONet to predict u + with torch.no_grad(): + dbc_pred = model.net((nbc,trunk)) + + # Add average relative error for particular k + error += torch.mean(torch.norm(dbc - dbc_pred, p=2, dim=1)**2) + den += torch.mean(torch.norm(dbc, p=2, dim=1)**2) + return (error/K)/(den/K) + +# # Several independent runs + +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +print(f'Using Device: {device}') + +# Conductivity +torch.manual_seed(0) +a_np = np.load('conductivity.npy').astype(np.float32) +a = torch.from_numpy(a_np).to('cuda') +DtN = DtNOperator(a,device=device) + +# Hyperparameters +runs = 5 +num_AMA_iter = 10 +num_train_epochs = 5000 +N_samples_train = 1000 +N_samples_min = 1000 +N_samples_OOD = 1000 + +alpha, tau = 2.0,3.0 +Lip = 1.0 +Lip_model = 1.0 +L0_norm= 0.0 +L0_norm_model= 0.1 +amp = torch.tensor(1,device=device) + +trial_results = [] +for run in range(runs): + # Test Distributions + K = 20 + # test_alpha = sample_from_custom_pdf(K,2,8,device=device) + test_alpha = 1.5*torch.ones(K,device=device) + test_tau = sample_from_custom_pdf(K,0,50,device=device) + test_omega = sample_from_custom_pdf(K,-4*math.pi,4*math.pi,device=device) + # test_alpha = 2*torch.ones(K,device=device) + # test_tau = 3*torch.ones(K,device=device) + # test_omega = 5*torch.ones(K,device=device) + test_dist = TestDistribution(DtN,test_omega,test_alpha,test_tau,device=device) + + # Fix randomness within each run + z_min = torch.randn(N_samples_min,4*DtN.M-4, device=device) + z_OOD = torch.randn(N_samples_OOD,4*DtN.M-4, device=device) + + omega = 4*math.pi # initial omega + AMAloss = 1e+99 # initialize AMA Loss + results = [] + for i in range(num_AMA_iter): + model,_,_ = train_model(DtN,N_samples_train,omega,alpha,tau,num_epochs=num_train_epochs,device=device) + + # Evaluate objective function at half step + obj_fun = Objective(DtN, model, test_dist, Lip, Lip_model, L0_norm, L0_norm_model, device=device) + omega = torch.tensor(omega,device=device) + temp_AMAloss = obj_fun.value(N_samples=N_samples_min,omega=omega,alpha=alpha,tau=tau,z=z_min) + attempt = 0 + while temp_AMAloss > AMAloss and attempt < 10: + print(f"Hmm, loss {temp_AMAloss} is not smaller than previous loss {AMAloss}. We'll keep training.") + # TRAIN AGAIN + losshistory, train_state = model.train( + iterations=1000, + display_every=100, + disregard_previous_best=True, + ) + temp_AMAloss = obj_fun.value(N_samples=N_samples_min,omega=omega,alpha=alpha,tau=tau,z=z_min) + attempt += 1 + AMAloss = temp_AMAloss + + # Evaluate OOD performance + OODerror = test_dist.computeRelativeOODError(model,N_samples_OOD,z=z_OOD) + print(f'Iteration {i+0.5} | AMA Loss {AMAloss} | OOD Error {OODerror} | omega {omega}') + results.append([i+0.5,AMAloss,OODerror,omega]) + df = pd.DataFrame(results, columns = ['Iteration','AMA Loss','Relative OOD Error','omega']) + + # Minimizing + print("MINIMIZING") + tol = 1e-12 + # Wrap the obj_fun for differential_evolution (Output CPU) + def obj_w(omega): + return obj_fun.value(N_samples=N_samples_min, + omega=torch.tensor(omega,dtype=torch.float32,device=device), + alpha=alpha, + tau=tau, + z=z_min).item() + + res = differential_evolution(obj_w, bounds=[(-4*np.pi, 4*np.pi)], tol=tol,maxiter=5000, popsize=30) + temp_omega = res.x[0] + temp_AMAloss = res.fun + attempt = 0 + while temp_AMAloss >= AMAloss and attempt < 10: + print(f"Hmm, loss {temp_AMAloss} is not less than previous loss {AMAloss}. Let's use smaller tolerance for minimizing.") + tol = tol / 2 + res = differential_evolution(obj_w, bounds=[(-4*np.pi, 4*np.pi)], tol=tol,maxiter=5000, popsize=30) + temp_omega = res.x[0] + temp_AMAloss = res.fun + attempt += 1 + omega = torch.tensor(temp_omega, dtype=torch.float32, device=device) + AMAloss = torch.tensor(temp_AMAloss, dtype=torch.float32, device=device) + + print(f'Iteration {i+1} | AMA Loss {AMAloss} | OOD Error NA | omega {omega}') + results.append([i+1,AMAloss,torch.nan,omega]) + df = pd.DataFrame(results, columns = ['Iteration','AMA Loss','Relative OOD Error','omega']) + trial_results.append(df) + with open('NtD_results.pkl', 'wb') as f: + pickle.dump(trial_results, f) diff --git a/operator_learning/README.md b/operator_learning/README.md index 3f81afd..ebed79b 100644 --- a/operator_learning/README.md +++ b/operator_learning/README.md @@ -5,6 +5,7 @@ This repository contains scripts and notebook to reproduce the main experimental ## Files * **NtDExample.py**: Python script to run multiple independent runs of the Alternating Minimization Algorithm on the NtD example and save results to `NtD_results.pkl`. +* **NtDGPU.py**: Python script to run multiple independent runs of the Alternating Minimization Algorithm on the NtD example (using GPU if available), and save results to `NtD_results.pkl`. * **DarcyFlowGPU.py**: Python script to run multiple independent runs of the Alternating Minimization Algorithm on the Darcy flow forward problem (using GPU if available), and save results to `DarcyFlow_results.pkl`. * **PlotResults.ipynb**: Jupyter notebook that loads `NtD_results.pkl` and `DarcyFlow_results.pkl`, generates the figures presented in the paper, and saves or displays them. @@ -39,8 +40,16 @@ Additional dependencies: ``` * Outputs: `NtD_results.pkl` containing model performance metrics and intermediate data. + +2. **NtD GPU** -2. **Darcy flow example** + ```bash + python NtDGPU.py + ``` + + * Outputs: `NtD_results.pkl` containing model performance metrics and intermediate data. + +3. **Darcy flow example** ```bash python DarcyFlowGPU.py @@ -48,7 +57,7 @@ Additional dependencies: * Outputs: `DarcyFlow_results.pkl` containing model performance metrics and intermediate data. -3. **Plotting results** +4. **Plotting results** ```bash jupyter notebook PlotResults.ipynb @@ -62,9 +71,10 @@ Additional dependencies: ``` ./ ├── NtDExample.py # Script for NtD example +├── NtDGPU.py # Script for NtD example with GPU ├── DarcyFlowGPU.py # Script for Darcy flow example ├── PlotResults.ipynb # Notebook for plotting saved results -├── NtD_results.pkl # Output from NtDExample.py +├── NtD_results.pkl # Output from NtDExample.py/NtDGPU.py └── DarcyFlow_results.pkl # Output from DarcyFlowGPU.py ```