From 3c111408aa1d2ac37e1e688c74817b9465ca028b Mon Sep 17 00:00:00 2001 From: kchenlun Date: Fri, 8 Aug 2025 11:39:25 -0400 Subject: [PATCH 1/6] Add files via upload Add MGWR summary for Poisson and Binomial models --- mgwrlib/mgwr/summary.py | 40 +++++++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/mgwrlib/mgwr/summary.py b/mgwrlib/mgwr/summary.py index 2ce34e0e..9852650c 100755 --- a/mgwrlib/mgwr/summary.py +++ b/mgwrlib/mgwr/summary.py @@ -183,23 +183,33 @@ def summaryMGWR(self,diag): for j in range(self.k): summary += "%-67s %12.3f\n" % (diag.XNames[j], diag.testMCResults[j]) - summary += "\n%s\n" % ('Diagnostic Information') summary += '-' * 80 + '\n' - - summary += "%-67s %12.3f\n" % ('Residual sum of squares:', self.resid_ss) - summary += "%-67s %12.3f\n" % ('Effective number of parameters (trace(S)):', self.tr_S) - summary += "%-67s %12.3f\n" % ('Degree of freedom (n - trace(S)):', self.df_model) - summary += "%-67s %12.3f\n" % ('Sigma estimate:', np.sqrt(self.sigma2)) - summary += "%-67s %12.3f\n" % ('Log-likelihood:', self.llf) - summary += "%-67s %12.3f\n" % ('Degree of Dependency (DoD):', self.DoD) - summary += "%-67s %12.3f\n" % ('AIC:', self.aic) - summary += "%-67s %12.3f\n" % ('AICc:', self.aicc) - summary += "%-67s %12.3f\n" % ('BIC:', self.bic) - summary += "%-67s %12.3f\n" % ('R2:', self.D2) - summary += "%-67s %12.3f\n" % ('Adj. R2:', self.adj_D2) - - summary += "\n%s\n" % ('Summary Statistics For MGWR Parameter Estimates') + + if isinstance(self.family, Gaussian): + summary += "%-67s %12.3f\n" % ('Residual sum of squares:', self.resid_ss) + summary += "%-67s %12.3f\n" % ('Effective number of parameters (trace(S)):', self.tr_S) + summary += "%-67s %12.3f\n" % ('Degree of freedom (n - trace(S)):', self.df_model) + summary += "%-67s %12.3f\n" % ('Sigma estimate:', np.sqrt(self.sigma2)) + summary += "%-67s %12.3f\n" % ('Log-likelihood:', self.llf) + summary += "%-67s %12.3f\n" % ('Degree of Dependency (DoD):', self.DoD) + summary += "%-67s %12.3f\n" % ('AIC:', self.aic) + summary += "%-67s %12.3f\n" % ('AICc:', self.aicc) + summary += "%-67s %12.3f\n" % ('BIC:', self.bic) + summary += "%-67s %12.3f\n" % ('R2:', self.D2) + summary += "%-67s %12.3f\n" % ('Adj. R2:', self.adj_D2) + else: + summary += "%-67s %12.3f\n" % ('Effective number of parameters (trace(S)):', self.tr_S) + summary += "%-67s %12.3f\n" % ('Degree of freedom (n - trace(S)):', self.df_model) + summary += "%-67s %12.3f\n" % ('Deviance:', self.global_deviance) + summary += "%-67s %12.3f\n" % ('AIC:', self.aic) + summary += "%-67s %12.3f\n" % ('AICc:', self.aicc) + summary += "%-67s %12.3f\n" % ('BIC:', self.bic) + summary += "%-67s %12.3f\n" % ('Percent deviance explained:', self.D2) + summary += "%-67s %12.3f\n" % ('Adj. percent deviance explained:', self.adj_D2) + + summary += "\n%s\n" % ('Diagnostic Information') + summary += "\n%s\n" % ('Summary Statistics For MGWR Parameter Estimates') summary += '-' * 80 + '\n' summary += "%-25s %10s %10s %10s %10s %10s\n" % ('Variable', 'Mean' ,'STD', 'Min' ,'Median', 'Max') summary += "%-25s %10s %10s %10s %10s %10s\n" % ('-'*20, '-'*10 ,'-'*10, '-'*10 ,'-'*10, '-'*10) From a16f0b2c1ddd3e3f5281dee05379d668f2a0ea71 Mon Sep 17 00:00:00 2001 From: kchenlun Date: Fri, 8 Aug 2025 11:40:41 -0400 Subject: [PATCH 2/6] Add files via upload Add Poisson and Binomial model to MGWR --- mgwrlib/mgwr/gwr.py | 542 ++++++++++++++++++++++++++++++++------------ 1 file changed, 392 insertions(+), 150 deletions(-) diff --git a/mgwrlib/mgwr/gwr.py b/mgwrlib/mgwr/gwr.py index 9ba93b42..fcd9a06f 100755 --- a/mgwrlib/mgwr/gwr.py +++ b/mgwrlib/mgwr/gwr.py @@ -1,8 +1,11 @@ -#Main GWR classes +# Main GWR classes __author__ = "Taylor Oshan Tayoshan@gmail.com" import copy +import os +from typing import Optional +import warnings import numpy as np import numpy.linalg as la from scipy.stats import t @@ -12,10 +15,11 @@ from spglm.glm import GLM, GLMResults from spglm.iwls import iwls, _compute_betas_gwr from spglm.utils import cache_readonly +from spreg.utils import spdot, spmultiply +from joblib import Parallel, delayed from .diagnostics import get_AIC, get_AICc, get_BIC, corr from .kernels import * from .summary import * -import multiprocessing as mp class GWR(GLM): @@ -82,6 +86,13 @@ class GWR(GLM): True to store full n by n hat matrix, False to not store full hat matrix to minimize memory footprint (defalut). + name_x : list of strings + Names of independent variables for use in output + + n_jobs : integer + The number of jobs (default -1) to run in parallel. -1 means using all processors. + + Attributes ---------- coords : array-like @@ -134,7 +145,7 @@ class GWR(GLM): spherical : boolean True for shperical coordinates (long-lat), False for projected coordinates (defalut). - + hat_matrix : boolean True to store full n by n hat matrix, False to not store full hat matrix to minimize memory footprint (defalut). @@ -206,7 +217,7 @@ class GWR(GLM): def __init__(self, coords, y, X, bw, family=Gaussian(), offset=None, sigma2_v1=True, kernel='bisquare', fixed=False, constant=True, - spherical=False, hat_matrix=False): + spherical=False, hat_matrix=False, name_x=None,n_jobs=-1,ext_w=None): """ Initialize class """ @@ -229,10 +240,16 @@ def __init__(self, coords, y, X, bw, family=Gaussian(), offset=None, self.P = None self.spherical = spherical self.hat_matrix = hat_matrix - self.m = np.unique(self.coords, axis=0).shape[0] + self.name_x = name_x + self.n_jobs = n_jobs + self.ext_w = ext_w def _build_wi(self, i, bw): + if bw == np.inf: + wi = np.ones((self.n)) + return wi + try: wi = Kernel(i, self.coords, bw, fixed=self.fixed, function=self.kernel, points=self.points, @@ -258,7 +275,7 @@ def _local_fit(self, i): elif isinstance(self.family, (Poisson, Binomial)): rslt = iwls(self.y, self.X, self.family, self.offset, None, self.fit_params['ini_params'], self.fit_params['tol'], - self.fit_params['max_iter'], wi=wi) + self.fit_params['max_iter'], wi=wi,ext_w=self.ext_w) inv_xtx_xt = rslt[5] w = rslt[3][i][0] influ = np.dot(self.X[i], inv_xtx_xt[:, i]) * w @@ -277,7 +294,7 @@ def _local_fit(self, i): return influ, resid, predy, betas.reshape(-1), w, Si, tr_STS_i, CCT def fit(self, ini_params=None, tol=1.0e-5, max_iter=20, solve='iwls', - lite=False, pool=None): + lite=False,pool=None): """ Method that fits a model with a particular estimation routine. @@ -304,7 +321,7 @@ def fit(self, ini_params=None, tol=1.0e-5, max_iter=20, solve='iwls', bandwidth selection (could speed up bandwidth selection for GWR) or to estimate a full GWR. Default is False. - pool : A multiprocessing Pool object to enable parallel fitting; default is None. + pool : None, deprecated and not used. Returns ------- @@ -320,6 +337,9 @@ def fit(self, ini_params=None, tol=1.0e-5, max_iter=20, solve='iwls', self.fit_params['solve'] = solve self.fit_params['lite'] = lite + if pool: + warnings.warn("The pool parameter is no longer used and will have no effect; parallelization is default and implemented using joblib instead.", RuntimeWarning, stacklevel=2) + if solve.lower() == 'iwls': if self.points is None: @@ -327,11 +347,7 @@ def fit(self, ini_params=None, tol=1.0e-5, max_iter=20, solve='iwls', else: m = self.points.shape[0] - if pool: - rslt = pool.map(self._local_fit, - range(m)) #parallel using mp.Pool - else: - rslt = map(self._local_fit, range(m)) #sequential + rslt = Parallel(n_jobs=self.n_jobs)(delayed(self._local_fit)(i) for i in range(m)) rslt_list = list(zip(*rslt)) influ = np.array(rslt_list[0]).reshape(-1, 1) @@ -350,7 +366,8 @@ def fit(self, ini_params=None, tol=1.0e-5, max_iter=20, solve='iwls', tr_STS = np.sum(np.array(rslt_list[-2])) CCT = np.array(rslt_list[-1]) return GWRResults(self, params, predy, S, CCT, influ, tr_STS, - w) + w, self.name_x) + def predict(self, points, P, exog_scale=None, exog_resid=None, fit_params={}): @@ -432,6 +449,9 @@ class GWRResults(GLMResults): n*1, final weight used for iteratively re-weighted least sqaures; default is None + name_x : list of strings + Names of independent variables for use in output + Attributes ---------- model : GWR Object @@ -517,10 +537,10 @@ class GWRResults(GLMResults): R2 : float R-squared for the entire model (1- RSS/TSS) - + adj_R2 : float adjusted R-squared for the entire model - + aic : float Akaike information criterion @@ -576,11 +596,11 @@ class GWRResults(GLMResults): pDev : float local percent of deviation accounted for; analogous to r-squared for GLM's - + D2 : float percent deviance explained for GLM, equivaleng to R2 for Gaussian. - + adj_D2 : float adjusted percent deviance explained, equivaleng to adjusted R2 for Gaussian. @@ -597,10 +617,13 @@ class GWRResults(GLMResults): p*1, predicted values generated by calling the GWR predict method to predict dependent variable at unsampled points () + + name_x : list of strings + Names of independent variables for use in output """ def __init__(self, model, params, predy, S, CCT, influ, tr_STS=None, - w=None): + w=None, name_x=None): GLMResults.__init__(self, model, params, predy, w) self.offset = model.offset if w is not None: @@ -611,18 +634,13 @@ def __init__(self, model, params, predy, S, CCT, influ, tr_STS=None, self.influ = influ self.CCT = self.cov_params(CCT, model.exog_scale) self._cache = {} + self.name_x=name_x @cache_readonly def W(self): W = np.array( [self.model._build_wi(i, self.model.bw) for i in range(self.n)]) return W - - @cache_readonly - def sumW(self): - W = np.array( - [np.sum(self.model._build_wi(i, self.model.bw)) for i in range(self.n)]) - return np.array(W).reshape(-1,1) @cache_readonly def resid_ss(self): @@ -671,8 +689,7 @@ def ENP(self): """ effective number of parameters - Defualts to tr(s) as defined in yu et. al (2018) Inference in - Multiscale GWR + Defaults to tr(s) as defined in :cite:`yu:2019` but can alternatively be based on 2tr(s) - tr(STS) @@ -768,16 +785,16 @@ def sigma2(self): if sigma2_v1 is True: only use n-tr(S) in denominator - Methods: p214, (9.6), + Methods: p214, (9.6) :cite:`fotheringham_geographically_2002` Fotheringham, A. S., Brunsdon, C., & Charlton, M. (2002). Geographically weighted regression: the analysis of spatially varying relationships. - and as defined in Yu et. al. (2018) Inference in Multiscale GWR + and as defined in :cite:`yu:2019` if sigma2_v1 is False (v1v2): use n-2(tr(S)+tr(S'S)) in denominator - Methods: p55 (2.16)-(2.18) + Methods: p55 (2.16)-(2.18) :cite:`fotheringham_geographically_2002` Fotheringham, A. S., Brunsdon, C., & Charlton, M. (2002). Geographically weighted regression: the analysis of spatially varying relationships. @@ -794,7 +811,7 @@ def std_res(self): """ standardized residuals - Methods: p215, (9.7) + Methods: p215, (9.7) :cite:`fotheringham_geographically_2002` Fotheringham, A. S., Brunsdon, C., & Charlton, M. (2002). Geographically weighted regression: the analysis of spatially varying relationships. @@ -807,7 +824,7 @@ def bse(self): """ standard errors of Betas - Methods: p215, (2.15) and (2.21) + Methods: p215, (2.15) and (2.21) :cite:`fotheringham_geographically_2002` Fotheringham, A. S., Brunsdon, C., & Charlton, M. (2002). Geographically weighted regression: the analysis of spatially varying relationships. @@ -819,7 +836,7 @@ def cooksD(self): """ Influence: leading diagonal of S Matrix - Methods: p216, (9.11), + Methods: p216, (9.11) :cite:`fotheringham_geographically_2002` Fotheringham, A. S., Brunsdon, C., & Charlton, M. (2002). Geographically weighted regression: the analysis of spatially varying relationships. @@ -876,7 +893,7 @@ def adj_alpha(self): testing. Includes corrected value for 90% (.1), 95% (.05), and 99% (.01) confidence levels. Correction comes from: - da Silva, A. R., & Fotheringham, A. S. (2015). The Multiple Testing Issue in + :cite:`Silva:2016` : da Silva, A. R., & Fotheringham, A. S. (2015). The Multiple Testing Issue in Geographically Weighted Regression. Geographical Analysis. """ @@ -887,7 +904,7 @@ def adj_alpha(self): def critical_tval(self, alpha=None): """ - Utility function to derive the critial t-value based on given alpha + Utility function to derive the critical t-value based on given alpha that are needed for hypothesis testing Parameters @@ -926,7 +943,7 @@ def filter_tvals(self, critical_t=None, alpha=None): Parameters ---------- - critical : scalar + critical_t : scalar critical t-value to determine whether parameters are statistically significant @@ -1041,12 +1058,7 @@ def aicc(self): @cache_readonly def bic(self): return get_BIC(self) - - @cache_readonly - def DoD(self): - #Degree of Dependency - return (np.log(self.model.m*self.k) - np.log(self.ENP))/(np.log(self.model.m*self.k) - np.log(self.k)) - + @cache_readonly def pseudoR2(self): return None @@ -1057,8 +1069,7 @@ def adj_pseudoR2(self): @cache_readonly def pvalues(self): - n = self.n - return t.sf(np.abs(self.tvalues), n - 1) * 2 + return None @cache_readonly def conf_int(self): @@ -1067,9 +1078,17 @@ def conf_int(self): @cache_readonly def use_t(self): return None - - #Li et al. (2020) Annals of AAG - def get_bws_intervals(self, selector, pval=0.95): + + def get_bws_intervals(self, selector, level=0.95): + """ + Computes bandwidths confidence interval (CI) for GWR. + The CI is based on Akaike weights and the bandwidth search algorithm used. + Details are in Li et al. (2020) Annals of AAG + + Returns a tuple with lower and upper bound of the bw CI. + e.g. (100, 300) + """ + try: import pandas as pd except ImportError: @@ -1088,11 +1107,11 @@ def get_bws_intervals(self, selector, pval=0.95): #Calculate cum. AICc weights aiccs['cum_w_ak'] = aiccs.w_aic_ak.cumsum() #Find index where the cum weights above p-val - index = len(aiccs[aiccs.cum_w_ak < pval]) + 1 + index = len(aiccs[aiccs.cum_w_ak < level]) + 1 #Get bw boundaries interval = (aiccs.iloc[:index,:].bw.min(),aiccs.iloc[:index,:].bw.max()) return interval - + def local_collinearity(self): """ @@ -1106,7 +1125,7 @@ def local_collinearity(self): Returns four arrays with the order and dimensions listed above where n is the number of locations used as calibrations points and p is the - nubmer of explanatory variables. Local correlation coefficient and local + number of explanatory variables. Local correlation coefficient and local VIF are not calculated for constant term. """ @@ -1129,25 +1148,25 @@ def local_collinearity(self): vdp_pi = np.ndarray((nrow, nvar, nvar)) for i in range(nrow): - wi = self.model._build_wi(i,self.model.bw) + wi = self.model._build_wi(i, self.model.bw) sw = np.sum(wi) wi = wi / sw tag = 0 for j, k in jk: - corr_mat[i, tag] = corr( - np.cov(x[:, j], x[:, k], aweights=wi))[0][1] + corr_mat[i, tag] = corr(np.cov(x[:, j], x[:, k], + aweights=wi))[0][1] tag = tag + 1 if self.model.constant: corr_mati = corr(np.cov(x[:, 1:].T, aweights=wi)) - vifs_mat[i, ] = np.diag(np.linalg.solve( - corr_mati, np.identity((nvar - 1)))) + vifs_mat[i, ] = np.diag( + np.linalg.solve(corr_mati, np.identity((nvar - 1)))) else: corr_mati = corr(np.cov(x.T, aweights=wi)) - vifs_mat[i, ] = np.diag(np.linalg.solve( - corr_mati, np.identity((nvar)))) + vifs_mat[i, ] = np.diag( + np.linalg.solve(corr_mati, np.identity((nvar)))) xw = x * wi.reshape((nrow, 1)) sxw = np.sqrt(np.sum(xw**2, axis=0)) @@ -1164,7 +1183,7 @@ def local_collinearity(self): return corr_mat, vifs_mat, local_CN, VDP - def spatial_variability(self, selector, n_iters=1000, seed=None, pool=None): + def spatial_variability(self, selector, n_iters=1000, seed=None): """ Method to compute a Monte Carlo test of spatial variability for each estimated coefficient surface. @@ -1219,14 +1238,19 @@ def spatial_variability(self, selector, n_iters=1000, seed=None, pool=None): init_sd = np.std(self.params, axis=0) SDs = [] - for x in range(n_iters): - print("MC iteration:",x,"/1000") + try: + from tqdm.auto import tqdm # if they have it, let users have a progress bar + except ImportError: + def tqdm(x, desc=''): # otherwise, just passthrough the range + return x + + for x in tqdm(range(n_iters), desc='Testing'): temp_coords = np.random.permutation(self.model.coords) temp_sel.coords = temp_coords - temp_bw = temp_sel.search(**search_params, pool=pool) + temp_bw = temp_sel.search(**search_params) temp_gwr.bw = temp_bw temp_gwr.coords = temp_coords - temp_params = temp_gwr.fit(**fit_params, pool=pool).params + temp_params = temp_gwr.fit(**fit_params).params temp_sd = np.std(temp_params, axis=0) SDs.append(temp_sd) @@ -1243,19 +1267,37 @@ def predictions(self): predictions = np.sum(P * self.params, axis=1).reshape((-1, 1)) return predictions - def summary(self): + def summary(self, as_str: bool = False) -> Optional[str]: """ Print out GWR summary + + Parameters + ---------- + as_str : bool + optional parameters to specify that summary results + should be returned as str and not printed to stdout + + Returns + ------- + + summary : Optional[str] + optional GWR summary string if `as_str` is True """ summary = summaryModel(self) + summaryGLM(self) + summaryGWR(self) + + if as_str: + return summary + print(summary) - return + return None class GWRResultsLite(object): """ Lightweight GWR that computes the minimum diagnostics needed for bandwidth - selection + selection. + + See FastGWR,Li et al., 2019, IJGIS. Parameters ---------- @@ -1288,11 +1330,11 @@ class GWRResultsLite(object): """ def __init__(self, model, resid, influ, params): - self.y = model.y.reshape(-1) + self.y = model.y self.family = model.family self.n = model.n self.influ = influ - self.resid_response = resid.reshape(-1) + self.resid_response = resid self.model = model self.params = params @@ -1318,11 +1360,10 @@ def resid_ss(self): return np.dot(u, u.T) - - class MGWR(GWR): """ Multiscale GWR estimation and inference. + See :cite:`Fotheringham:2017` :cite:`yu:2019`. Parameters ---------- @@ -1370,13 +1411,16 @@ class MGWR(GWR): intercept. spherical : boolean - True for shperical coordinates (long-lat), + True for spherical coordinates (long-lat), False for projected coordinates (defalut). hat_matrix : boolean True for computing and storing covariate-specific hat matrices R (n,n,k) and model hat matrix S (n,n). False (default) for computing MGWR inference on the fly. + name_x : list of strings + Names of independent variables for use in output + Attributes ---------- coords : array-like @@ -1453,6 +1497,12 @@ class MGWR(GWR): observations from each calibration point: one for each covariate (k) + name_x : list of strings + Names of independent variables for use in output + + n_jobs : integer + The number of jobs (default 1) to run in parallel. -1 means using all processors. + Examples -------- @@ -1480,29 +1530,59 @@ class MGWR(GWR): """ - def __init__(self, coords, y, X, selector, sigma2_v1=True, - kernel='bisquare', fixed=False, constant=True, - spherical=False, hat_matrix=False): + def __init__(self, coords, y, X, selector, family=Gaussian(), offset=None, + sigma2_v1=True, kernel='bisquare', fixed=False, constant=True, + spherical=False, hat_matrix=False, name_x=None, n_jobs=1): + self.family = family + if isinstance(self.family, Gaussian): """ Initialize class """ + self.y = y self.selector = selector - self.bws = self.selector.bw[0] #final set of bandwidth - self.bws_history = selector.bw[1] #bws history in backfitting - self.bw_init = self.selector.bw_init #initialization bandwidth - self.family = Gaussian() # manually set since we only support Gassian MGWR for now + self.bws = selector.bw[0] #final set of bandwidth + self.bws_history = selector.bw[1] #bws history in backfitting + self.bw_init = selector.bw_init #initialization bandwidth + if offset is None: + self.offset = np.ones((len(y), 1)) + else: + self.offset = offset * 1.0 GWR.__init__(self, coords, y, X, self.bw_init, family=self.family, sigma2_v1=sigma2_v1, kernel=kernel, fixed=fixed, constant=constant, spherical=spherical, hat_matrix=hat_matrix) - self.selector = selector self.sigma2_v1 = sigma2_v1 self.points = None self.P = None - self.offset = None self.exog_resid = None self.exog_scale = None - self_fit_params = None + self.fit_params = None + self.n_jobs = n_jobs + self.name_x = name_x + + elif isinstance(self.family, (Poisson, Binomial)): + self.coords = np.array(coords) + self.y = y + self.selector = None + self.bws = None + self.bws_history = None + self.bw_init = None + if offset is None: + self.offset = np.ones((len(y), 1)) + else: + self.offset = offset * 1.0 + GWR.__init__(self, coords, y, X, self.bw_init, family=self.family, + sigma2_v1=sigma2_v1, kernel=kernel, fixed=fixed, + constant=constant, spherical=spherical, + hat_matrix=hat_matrix) + self.sigma2_v1 = sigma2_v1 + self.points = None + self.P = None + self.exog_resid = None + self.exog_scale = None + self.fit_params = None + self.n_jobs = n_jobs + self.name_x = name_x def _chunk_compute_R(self, chunk_id=0): """ @@ -1531,7 +1611,6 @@ def _chunk_compute_R(self, chunk_id=0): err = init_pR - np.sum(pR, axis=2) #n by chunk_size for iter_i in range(self.bws_history.shape[0]): - for j in range(k): pRj_old = pR[:, :, j] + err Xj = self.X[:, j] @@ -1559,7 +1638,7 @@ def _chunk_compute_R(self, chunk_id=0): return ENP_j, CCT, pR return ENP_j, CCT - def fit(self, n_chunks=1, pool=None): + def fit(self, n_chunks=1, pool=None, ini_betas=None, tol=1e-10, max_iter=200, bw_stable=3): """ Compute MGWR inference by chunk to reduce memory footprint. @@ -1570,43 +1649,195 @@ def fit(self, n_chunks=1, pool=None): A number of chunks parameter to reduce memory usage. e.g. n_chunks=2 should reduce overall memory usage by 2. pool : A multiprocessing Pool object to enable parallel fitting; default is None. - + + ini_betas : np.ndarray, optional + Initial values for LSA (Poisson/Binomial). + tol : float + Convergence in LSA. + max_iter : integer + Max iterations for LSA. + bw_stable : integer + Iterations stopped after same bandwithds produeced Returns ------- : MGWRResults """ - params = self.selector.params - predy = np.sum(self.X * params, axis=1).reshape(-1, 1) + y = self.y + x = self.X + offset = self.offset + coords = self.coords + family = self.family - try: - from tqdm.autonotebook import tqdm #progress bar - except ImportError: + if isinstance(self.family, Gaussian): + params = self.selector.params + predy = np.sum(self.X * params, axis=1).reshape(-1, 1) + try: + from tqdm.autonotebook import tqdm #progress bar + except ImportError: + + def tqdm(x, total=0, + desc=''): #otherwise, just passthrough the range + return x + + if pool: + self.n_chunks = pool._processes * n_chunks + rslt = tqdm( + pool.imap(self._chunk_compute_R, range(self.n_chunks)), + total=self.n_chunks, desc='Inference') + else: + self.n_chunks = n_chunks + rslt = map(self._chunk_compute_R, + tqdm(range(self.n_chunks), desc='Inference')) + + rslt_list = list(zip(*rslt)) + ENP_j = np.sum(np.array(rslt_list[0]), axis=0) + CCT = np.sum(np.array(rslt_list[1]), axis=0) + + w = np.ones(self.n) + if self.hat_matrix: + R = np.hstack(rslt_list[2]) + else: + R = None + return MGWRResults(self, params, predy, CCT, ENP_j, w, R) + + elif isinstance(self.family, (Poisson, Binomial)): + from .sel_bw import Sel_BW - def tqdm(x, total=0, - desc=''): #otherwise, just passthrough the range - return x + # LSA for Poisson and Binomial - if pool: - self.n_chunks = pool._processes * n_chunks - rslt = tqdm( - pool.imap(self._chunk_compute_R, range(self.n_chunks)), - total=self.n_chunks, desc='Inference') + n_iter = 0 + diff = 1.0e6 + n = y.shape[0] + w = None + y_raw = y + bw_no_change = 0 + bw_history = [] + + if ini_betas is None: + betas = np.zeros(x.shape, np.float64) + else: + betas = ini_betas + + y_off = y / offset + y_off = self.family.starting_mu(y_off) + v = self.family.predict(y_off) + mu = self.family.starting_mu(y) + + while diff > tol and n_iter < max_iter: + n_iter += 1 + w = self.family.weights(mu) + z = v + (self.family.link.deriv(mu) * (y - mu)) + w = np.sqrt(w) + + wx = spmultiply(x, w, array_out=False) + wz = spmultiply(z, w, array_out=False) + + sel = Sel_BW(coords, wz, wx, constant=False, multi=True, + raw_y=y_raw, raw_x=x, offset=offset, ext_w=w, family=family) + bws_guass_wxz = sel.search() + print(bws_guass_wxz) + + temp_model = MGWR(coords, wz, wx, selector=sel, + constant=False, hat_matrix=True, + family=Gaussian(), offset=offset, name_x=self.name_x) + + n_betas = sel.params + + if len(bw_history) > 0 and np.allclose(bws_guass_wxz, bw_history[-1]): + bw_no_change += 1 + else: + bw_no_change = 0 + bw_history.append(np.array(bws_guass_wxz)) + + if bw_no_change >= (bw_stable - 1): + print(f"Calibration stopped as the bandwidths remain the same for {bw_stable} iterations.") + break + + v = np.sum(sel.params * x,axis=1).reshape(-1, 1) + + mu = family.fitted(v) + + if isinstance(self.family, Poisson): + mu = mu * offset + + if np.sum(n_betas**2) == 0: + diff = 0 + else: + num = np.sum((n_betas - betas)**2) / n + den = np.sum(np.sum(n_betas, axis=1)**2) + diff = np.sqrt(num / den) + + betas = n_betas + y_raw = mu.reshape(-1, 1) + + print(f"LSA iteration {n_iter}: Difference = {diff:.5f}") + + if n_iter >= max_iter: + print(f"Warning: Maximum iterations reached without convergence") + + print(f"Final bandwidths: {bws_guass_wxz}") + + mgwr_rslt = temp_model.fit() + + final_model = MGWR(coords, y, x, selector=sel, constant=False, + hat_matrix=True, family=family, offset=offset, name_x=self.name_x) + + final_model.selector = sel + final_model.bws = np.array(bws_guass_wxz) + final_model.name_x = self.name_x + + return MGWRResults(final_model, betas, mu, mgwr_rslt.raw_CCT, mgwr_rslt.ENP_j, w, mgwr_rslt.R, final_model.name_x) + else: - self.n_chunks = n_chunks - rslt = map(self._chunk_compute_R, - tqdm(range(self.n_chunks), desc='Inference')) + raise NotImplementedError('N/A') + + def exact_fit(self): + """ + A closed-form solution to MGWR estimates and inference, + the backfitting in self.fit() will converge to this solution. + + Note: this would require large memory when n > 5,000. + See Li and Fotheringham, 2020, IJGIS, pg.4. - rslt_list = list(zip(*rslt)) - ENP_j = np.sum(np.array(rslt_list[0]), axis=0) - CCT = np.sum(np.array(rslt_list[1]), axis=0) + Returns + ------- + : MGWRResults + """ + + P = [] + Q = [] + I = np.eye(self.n) + for j1 in range(self.k): + Aj = GWR(self.coords,self.y,self.X[:,j1].reshape(-1,1),bw=self.bws[j1],hat_matrix=True,constant=False,n_jobs=self.n_jobs).fit().S + Pj = [] + for j2 in range(self.k): + if j1 == j2: + Pj.append(I) + else: + Pj.append(Aj) + P.append(Pj) + Q.append([Aj]) + P = np.block(P) + Q = np.block(Q) + R = np.linalg.solve(P, Q) + f = R.dot(self.y) + + params = f/self.X.T.reshape(-1,1) + params = params.reshape(-1,self.n).T + + R = np.stack(np.split(R,self.k),axis=2) + ENP_j = np.trace(R, axis1=0, axis2=1) + predy = np.sum(self.X * params, axis=1).reshape(-1, 1) w = np.ones(self.n) - if self.hat_matrix: - R = np.hstack(rslt_list[2]) - else: - R = None + + CCT = np.zeros((self.n,self.k)) + for j in range(self.k): + CCT[:, j] = ((R[:, :, j] / self.X[:, j].reshape(-1, 1))**2).sum(axis=1) + return MGWRResults(self, params, predy, CCT, ENP_j, w, R) + def predict(self): ''' Not implemented. @@ -1642,6 +1873,9 @@ class MGWRResults(GWRResults): n*1, final weight used for iteratively re-weighted least sqaures; default is None + name_x : list of strings + Names of independent variables for use in output + Attributes ---------- model : GWR Object @@ -1734,7 +1968,7 @@ class MGWRResults(GWRResults): R2 : float R-squared for the entire model (1- RSS/TSS) - + adj_R2 : float adjusted R-squared for the entire model @@ -1779,37 +2013,32 @@ class MGWRResults(GWRResults): """ - def __init__(self, model, params, predy, CCT, ENP_j, w, R): + def __init__(self, model, params, predy, CCT, ENP_j, w, R, name_x=None): """ Initialize class """ self.ENP_j = ENP_j self.R = R + self.raw_CCT = CCT GWRResults.__init__(self, model, params, predy, None, CCT, None, w) if model.hat_matrix: self.S = np.sum(self.R, axis=2) self.predy = predy + self.name_x = name_x @cache_readonly def tr_S(self): return np.sum(self.ENP_j) - + @cache_readonly def W(self): Ws = [] for bw_j in self.model.bws: - W = np.array([self.model._build_wi(i, bw_j) for i in range(self.n)]) + W = np.array( + [self.model._build_wi(i, bw_j) for i in range(self.n)]) Ws.append(W) return Ws - - @cache_readonly - def sumW(self): - sumW = [] - for bw_j in self.model.bws: - sum_Wj = np.array([np.sum(self.model._build_wi(i, bw_j)) for i in range(self.n)]) - sumW.append(sum_Wj) - return np.array(sumW).T - + @cache_readonly def adj_alpha_j(self): """ @@ -1817,7 +2046,7 @@ def adj_alpha_j(self): testing. Includes corrected value for 90% (.1), 95% (.05), and 99% (.01) confidence levels. Correction comes from: - da Silva, A. R., & Fotheringham, A. S. (2015). The Multiple Testing Issue in + :cite:`Silva:2016` : da Silva, A. R., & Fotheringham, A. S. (2015). The Multiple Testing Issue in Geographically Weighted Regression. Geographical Analysis. """ @@ -1905,46 +2134,37 @@ def RSS(self): def TSS(self): raise NotImplementedError( 'Not yet implemented for multiple bandwidths') - + @cache_readonly def localR2(self): - if isinstance(self.family, Gaussian): - localR2 = np.zeros(shape=(self.n, 1)) - for i in range(self.n): - wi = self.model._build_wi(i, self.model.bw_init).reshape(-1, 1) - y_bar = np.sum(self.y.reshape(-1, 1) * wi)/ np.sum(wi) - TSS = np.sum(wi * (self.y.reshape(-1, 1) - y_bar)**2) - RSS = np.sum(wi * self.resid_response.reshape(-1, 1)**2) - localR2[i] = 1 - RSS/TSS - - return localR2 - - else: - raise NotImplementedError('Only applicable to Gaussian') + raise NotImplementedError( + 'Not yet implemented for multiple bandwidths') - @cache_readonly def y_bar(self): raise NotImplementedError( 'Not yet implemented for multiple bandwidths') - - @cache_readonly - def DoD_j(self): - return [(np.log(self.model.m) - np.log(enp_j))/(np.log(self.model.m) - np.log(1)) for enp_j in self.ENP_j] @cache_readonly def predictions(self): raise NotImplementedError('Not yet implemented for MGWR') - #Function for getting BW intervals - #Li et al. (2020) Annals of AAG - def get_bws_intervals(self, selector, pval=0.95): + #Function for getting BWs intervals + def get_bws_intervals(self, selector, level=0.95): + """ + Computes bandwidths confidence intervals (CIs) for MGWR. + The CIs are based on Akaike weights and the bandwidth search algorithm used. + Details are in Li et al. (2020) Annals of AAG + + Returns a list of confidence intervals. e.g. [(40, 60), (100, 180), (150, 300)] + + """ intervals = [] try: import pandas as pd except ImportError: return - + for j in range(self.k): #Get AICcs and associated bw from the last iteration of back-fitting and make a DataFrame aiccs = pd.DataFrame(list(zip(*selector.sel_hist[-self.k+j]))[1],columns=["aicc"]) @@ -1959,7 +2179,7 @@ def get_bws_intervals(self, selector, pval=0.95): #Calculate cum. AICc weights aiccs['cum_w_ak'] = aiccs.w_aic_ak.cumsum() #Find index where the cum weights above p-val - index = len(aiccs[aiccs.cum_w_ak < pval]) + 1 + index = len(aiccs[aiccs.cum_w_ak < level]) + 1 #Get bw boundaries interval = (aiccs.iloc[:index,:].bw.min(),aiccs.iloc[:index,:].bw.max()) intervals += [interval] @@ -1974,7 +2194,7 @@ def local_collinearity(self): local condition number (n, 1) local variance-decomposition proportions (n, p) - Returns four arrays with the order and dimensions listed above where n + Returns two arrays with the order and dimensions listed above where n is the number of locations used as calibrations points and p is the nubmer of explanatory variables @@ -2009,7 +2229,7 @@ def local_collinearity(self): return local_CN, VDP - def spatial_variability(self, selector, n_iters=1000, seed=None, pool=None): + def spatial_variability(self, selector, n_iters=1000, seed=None): """ Method to compute a Monte Carlo test of spatial variability for each estimated coefficient surface. @@ -2060,11 +2280,17 @@ def spatial_variability(self, selector, n_iters=1000, seed=None, pool=None): init_sd = np.std(self.params, axis=0) SDs = [] - for x in range(n_iters): - print("MC iteration:",x,"/1000") + try: + from tqdm.auto import tqdm # if they have it, let users have a progress bar + except ImportError: + + def tqdm(x, desc=''): # otherwise, just passthrough the range + return x + + for x in tqdm(range(n_iters), desc='Testing'): temp_coords = np.random.permutation(self.model.coords) temp_sel.coords = temp_coords - temp_sel.search(**search_params, pool=pool) + temp_sel.search(**search_params) temp_params = temp_sel.params temp_sd = np.std(temp_params, axis=0) SDs.append(temp_sd) @@ -2072,10 +2298,26 @@ def spatial_variability(self, selector, n_iters=1000, seed=None, pool=None): p_vals = (np.sum(np.array(SDs) > init_sd, axis=0) / float(n_iters)) return p_vals - def summary(self): + def summary(self, as_str: bool=False) -> Optional[str]: """ Print out MGWR summary + + Parameters + ---------- + as_str : bool + optional parameters to specify that summary results + should be returned as str and not printed to stdout + + Returns + ------- + + summary : Optional[str] + optional MGWR summary string if `as_str` is True """ summary = summaryModel(self) + summaryGLM(self) + summaryMGWR(self) + + if as_str: + return summary + print(summary) return From 09828cb4c477b361156aa5b77794b6c05a48ffa1 Mon Sep 17 00:00:00 2001 From: kchenlun Date: Fri, 8 Aug 2025 11:43:08 -0400 Subject: [PATCH 3/6] Add files via upload --- mgwrlib/mgwr/__init__.py | 2 +- mgwrlib/mgwr/kernels.py | 4 +- mgwrlib/mgwr/search.py | 73 ++++++++++++------- mgwrlib/mgwr/sel_bw.py | 147 +++++++++++++++++++++------------------ 4 files changed, 131 insertions(+), 95 deletions(-) diff --git a/mgwrlib/mgwr/__init__.py b/mgwrlib/mgwr/__init__.py index 9132d8d8..777a9fce 100755 --- a/mgwrlib/mgwr/__init__.py +++ b/mgwrlib/mgwr/__init__.py @@ -1,4 +1,4 @@ -__version__ = "2.1.1" +__version__ = "2.2.1" from . import gwr from . import sel_bw diff --git a/mgwrlib/mgwr/kernels.py b/mgwrlib/mgwr/kernels.py index f66104bd..27d4715e 100755 --- a/mgwrlib/mgwr/kernels.py +++ b/mgwrlib/mgwr/kernels.py @@ -67,15 +67,13 @@ def _kernel_funcs(self, zs): if self.function == 'triangular': return 1 - zs elif self.function == 'uniform': - return np.ones(zi.shape) * 0.5 + return np.ones(zs.shape) * 0.5 elif self.function == 'quadratic': return (3. / 4) * (1 - zs**2) elif self.function == 'quartic': return (15. / 16) * (1 - zs**2)**2 elif self.function == 'gaussian': return np.exp(-0.5 * (zs)**2) - elif self.function == 'gui-gaussian': - return np.exp(-0.5 * (2.45*zs)**2) elif self.function == 'bisquare': return (1 - (zs)**2)**2 elif self.function == 'exponential': diff --git a/mgwrlib/mgwr/search.py b/mgwrlib/mgwr/search.py index f1040bb5..9588b991 100755 --- a/mgwrlib/mgwr/search.py +++ b/mgwrlib/mgwr/search.py @@ -5,13 +5,14 @@ import numpy as np from copy import deepcopy - -def golden_section(a, c, delta, function, tol, max_iter, int_score=False, +def golden_section(a, c, delta, function, tol, max_iter, bw_max, int_score=False, verbose=False): """ Golden section search routine + Method: p212, 9.6.4 - Fotheringham, A. S., Brunsdon, C., & Charlton, M. (2002). + + :cite:`fotheringham_geographically_2002`: Fotheringham, A. S., Brunsdon, C., & Charlton, M. (2002). Geographically weighted regression: the analysis of spatially varying relationships. Parameters @@ -41,14 +42,19 @@ def golden_section(a, c, delta, function, tol, max_iter, int_score=False, output : list of tuples searching history """ - b = a + delta * np.abs(c - a) - d = c - delta * np.abs(c - a) - score = 0.0 + if c == np.inf: + b = a + delta * np.abs(n - a) + d = n - delta * np.abs(n - a) + else: + b = a + delta * np.abs(c - a) + d = c - delta * np.abs(c - a) + + opt_score = np.inf diff = 1.0e9 iters = 0 output = [] dict = {} - while np.abs(diff) > tol and iters < max_iter: + while np.abs(diff) > tol and iters < max_iter and a != np.inf: iters += 1 if int_score: b = np.round(b) @@ -61,7 +67,7 @@ def golden_section(a, c, delta, function, tol, max_iter, int_score=False, dict[b] = score_b if verbose: print("Bandwidth: ", np.round(b, 2), ", score: ", - "{0:.2f}".format(score_b)) + "{0:.2f}".format(score_b[0])) if d in dict: score_d = dict[d] @@ -70,7 +76,7 @@ def golden_section(a, c, delta, function, tol, max_iter, int_score=False, dict[d] = score_d if verbose: print("Bandwidth: ", np.round(d, 2), ", score: ", - "{0:.2f}".format(score_d)) + "{0:.2f}".format(score_d[0])) if score_b <= score_d: opt_val = b @@ -86,10 +92,29 @@ def golden_section(a, c, delta, function, tol, max_iter, int_score=False, b = d d = c - delta * np.abs(c - a) + output.append((opt_val, opt_score)) + + opt_val = np.round(opt_val, 2) + if (opt_val, opt_score) not in output: + output.append((opt_val, opt_score)) + diff = score_b - score_d score = opt_score - output = list(dict.items()) - return np.round(opt_val, 2), opt_score, output + + + if a == np.inf or bw_max == np.inf: + score_ols = function(np.inf) + output.append((np.inf, score_ols)) + + if score_ols <= opt_score: + opt_score = score_ols + opt_val = np.inf + + if verbose: + print("Bandwidth: ", np.inf, ", score: ", + "{0:.2f}".format(score_ols[0])) + + return opt_val, opt_score, output def equal_interval(l_bound, u_bound, interval, function, int_score=False, @@ -132,17 +157,18 @@ def equal_interval(l_bound, u_bound, interval, function, int_score=False, score_a = function(a) if verbose: - print("Bandwidth:", a, ", score:", "{0:.2f}".format(score_a)) + print(score_a) + print("Bandwidth:", a, ", score:", "{0:.2f}".format(score_a[0])) output.append((a, score_a)) - + opt_val = a opt_score = score_a - + while b < c: score_b = function(b) if verbose: - print("Bandwidth:", b, ", score:", "{0:.2f}".format(score_b)) + print("Bandwidth:", b, ", score:", "{0:.2f}".format(score_b[0])) output.append((b, score_b)) if score_b < opt_score: @@ -152,8 +178,8 @@ def equal_interval(l_bound, u_bound, interval, function, int_score=False, score_c = function(c) if verbose: - print("Bandwidth:", c, ", score:", "{0:.2f}".format(score_c)) - + print("Bandwidth:", c, ", score:", "{0:.2f}".format(score_c[0])) + output.append((c, score_c)) if score_c < opt_score: @@ -165,7 +191,7 @@ def equal_interval(l_bound, u_bound, interval, function, int_score=False, def multi_bw(init, y, X, n, k, family, tol, max_iter, rss_score, gwr_func, bw_func, sel_func, multi_bw_min, multi_bw_max, bws_same_times, - verbose=False): + verbose=False, ext_w=None, raw_y=None, raw_x=None, offset=None): """ Multiscale GWR bandwidth search procedure using iterative GAM backfitting """ @@ -188,7 +214,6 @@ def multi_bw(init, y, X, n, k, family, tol, max_iter, rss_score, gwr_func, BWs = [] bw_stable_counter = 0 bws = np.empty(k) - gwr_sel_hist = [] try: @@ -201,13 +226,13 @@ def tqdm(x, desc=''): #otherwise, just passthrough the range for iters in tqdm(range(1, max_iter + 1), desc='Backfitting'): new_XB = np.zeros_like(X) params = np.zeros_like(X) - + for j in range(k): temp_y = XB[:, j].reshape((-1, 1)) temp_y = temp_y + err temp_X = X[:, j].reshape((-1, 1)) bw_class = bw_func(temp_y, temp_X) - + if bw_stable_counter >= bws_same_times: #If in backfitting, all bws not changing in bws_same_times (default 5) iterations bw = bws[j] @@ -221,13 +246,13 @@ def tqdm(x, desc=''): #otherwise, just passthrough the range new_XB[:, j] = optim_model.predy.reshape(-1) params[:, j] = param bws[j] = bw - + + #If bws remain the same as from previous iteration if (iters > 1) and np.all(BWs[-1] == bws): bw_stable_counter += 1 else: bw_stable_counter = 0 - - + num = np.sum((new_XB - XB)**2) / n den = np.sum(np.sum(new_XB, axis=1)**2) score = (num / den)**0.5 diff --git a/mgwrlib/mgwr/sel_bw.py b/mgwrlib/mgwr/sel_bw.py index 4450ca02..e851cbd6 100755 --- a/mgwrlib/mgwr/sel_bw.py +++ b/mgwrlib/mgwr/sel_bw.py @@ -5,8 +5,8 @@ __author__ = "Taylor Oshan Tayoshan@gmail.com" import spreg.user_output as USER +import warnings import numpy as np -import multiprocessing as mp from scipy.spatial.distance import pdist from scipy.optimize import minimize_scalar from spglm.family import Gaussian, Poisson, Binomial @@ -23,7 +23,8 @@ class Sel_BW(object): Select bandwidth for kernel Methods: p211 - p213, bandwidth selection - Fotheringham, A. S., Brunsdon, C., & Charlton, M. (2002). + + :cite:`fotheringham_geographically_2002`: Fotheringham, A. S., Brunsdon, C., & Charlton, M. (2002). Geographically weighted regression: the analysis of spatially varying relationships. Parameters @@ -36,15 +37,17 @@ class Sel_BW(object): n*k2, local independent variable, including constant. coords : list of tuples (x,y) of points used in bandwidth selection - family : string - GWR model type: 'Gaussian', 'logistic, 'Poisson'' - offset : array - n*1, the offset variable at the ith location. For Poisson model - this term is often the size of the population at risk or - the expected size of the outcome in spatial epidemiology - Default is None where Ni becomes 1.0 for all locations - kernel : string - kernel function: 'gaussian', 'bisquare', 'exponetial' + family : family object/instance, optional + underlying probability model: Gaussian(), Poisson(), + Binomial(). Default is Gaussian(). + offset : array + n*1, the offset variable at the ith location. For Poisson model + this term is often the size of the population at risk or + the expected size of the outcome in spatial epidemiology + Default is None where Ni becomes 1.0 for all locations + kernel : string, optional + kernel function: 'gaussian', 'bisquare', 'exponential'. + Default is 'bisquare'. fixed : boolean True for fixed bandwidth and False for adaptive (NN) multi : True for multiple (covaraite-specific) bandwidths @@ -53,9 +56,11 @@ class Sel_BW(object): constant : boolean True to include intercept (default) in model and False to exclude intercept. - spherical : boolean - True for shperical coordinates (long-lat), - False for projected coordinates (defalut). + spherical : boolean + True for shperical coordinates (long-lat), + False for projected coordinates (defalut). + n_jobs : integer + The number of jobs (default -1) to run in parallel. -1 means using all processors. Attributes ---------- @@ -93,36 +98,36 @@ class Sel_BW(object): constant : boolean True to include intercept (default) in model and False to exclude intercept. - offset : array - n*1, the offset variable at the ith location. For Poisson model - this term is often the size of the population at risk or - the expected size of the outcome in spatial epidemiology - Default is None where Ni becomes 1.0 for all locations - spherical : boolean - True for shperical coordinates (long-lat), - False for projected coordinates (defalut). - search_params : dict - stores search arguments - int_score : boolan - True if adaptive bandwidth is being used and bandwdith - selection should be discrete. False - if fixed bandwidth is being used and bandwidth does not have - to be discrete. - bw : scalar or array-like - Derived optimal bandwidth(s). Will be a scalar for GWR - (multi=False) and a list of scalars for MGWR (multi=True) - with one bandwidth for each covariate. - S : array - n*n, hat matrix derived from the iterative backfitting - algorthim for MGWR during bandwidth selection - R : array - n*n*k, partial hat matrices derived from the iterative - backfitting algoruthm for MGWR during bandwidth selection. - There is one n*n matrix for each of the k covariates. - params : array - n*k, calibrated parameter estimates for MGWR based on the - iterative backfitting algorithm - computed and saved here to - avoid having to do it again in the MGWR object. + offset : array + n*1, the offset variable at the ith location. For Poisson model + this term is often the size of the population at risk or + the expected size of the outcome in spatial epidemiology + Default is None where Ni becomes 1.0 for all locations + spherical : boolean + True for shperical coordinates (long-lat), + False for projected coordinates (defalut). + search_params : dict + stores search arguments + int_score : boolan + True if adaptive bandwidth is being used and bandwdith + selection should be discrete. False + if fixed bandwidth is being used and bandwidth does not have + to be discrete. + bw : scalar or array-like + Derived optimal bandwidth(s). Will be a scalar for GWR + (multi=False) and a list of scalars for MGWR (multi=True) + with one bandwidth for each covariate. + S : array + n*n, hat matrix derived from the iterative backfitting + algorthim for MGWR during bandwidth selection + R : array + n*n*k, partial hat matrices derived from the iterative + backfitting algoruthm for MGWR during bandwidth selection. + There is one n*n matrix for each of the k covariates. + params : array + n*k, calibrated parameter estimates for MGWR based on the + iterative backfitting algorithm - computed and saved here to + avoid having to do it again in the MGWR object. Examples -------- @@ -172,7 +177,7 @@ class Sel_BW(object): def __init__(self, coords, y, X_loc, X_glob=None, family=Gaussian(), offset=None, kernel='bisquare', fixed=False, multi=False, - constant=True, spherical=False): + constant=True, spherical=False,n_jobs=-1,ext_w=None,raw_y=None,raw_x=None): self.coords = np.array(coords) self.y = y self.X_loc = X_loc @@ -191,14 +196,18 @@ def __init__(self, coords, y, X_loc, X_glob=None, family=Gaussian(), self._functions = [] self.constant = constant self.spherical = spherical + self.n_jobs = n_jobs self.search_params = {} + self.ext_w = ext_w + self.raw_y = raw_y + self.raw_x = raw_x def search(self, search_method='golden_section', criterion='AICc', bw_min=None, bw_max=None, interval=0.0, tol=1.0e-6, max_iter=200, init_multi=None, tol_multi=1.0e-5, rss_score=False, max_iter_multi=200, multi_bw_min=[None], multi_bw_max=[None - ], bws_same_times=5, pool=None, verbose=False): + ], bws_same_times=5, verbose=False,pool=None): """ Method to select one unique bandwidth for a gwr model or a bandwidth vector for a mgwr model. @@ -244,10 +253,9 @@ def search(self, search_method='golden_section', criterion='AICc', bws_same_times : If bandwidths keep the same between iterations for bws_same_times (default 5) in backfitting, then use the current set of bandwidths as final bandwidths. - pool : A multiprocessing Pool object to enbale parallel fitting; - default is None verbose : Boolean If true, bandwidth searching history is printed out; default is False. + pool : None, deprecated and not used. Returns ------- @@ -265,7 +273,6 @@ def search(self, search_method='golden_section', criterion='AICc', self.bw_min = bw_min self.bw_max = bw_max self.bws_same_times = bws_same_times - self.pool = pool self.verbose = verbose if len(multi_bw_min) == k: @@ -288,6 +295,10 @@ def search(self, search_method='golden_section', criterion='AICc', " a single entry or a list containing an entry for each of k" " covariates including the intercept") + if pool: + warnings.warn("The pool parameter is no longer used and will have no effect; parallelization is default and implemented using joblib instead.", RuntimeWarning, stacklevel=2) + + self.interval = interval self.tol = tol self.max_iter = max_iter @@ -309,29 +320,29 @@ def search(self, search_method='golden_section', criterion='AICc', if self.multi: self._mbw() self.params = self.bw[3] #params n by k - self.sel_hist = self.bw[-2] - self.bw_init = self.bw[-1] #scalar, optimal bw from initial gwr model + self.sel_hist = self.bw[-2] #bw searching history + self.bw_init = self.bw[ + -1] #scalar, optimal bw from initial gwr model else: self._bw() self.sel_hist = self.bw[-1] - self.pool = None return self.bw[0] def _bw(self): gwr_func = lambda bw: getDiag[self.criterion](GWR( self.coords, self.y, self.X_loc, bw, family=self.family, kernel= self.kernel, fixed=self.fixed, constant=self.constant, offset=self. - offset, spherical=self.spherical).fit(lite=True, pool=self.pool)) + offset, spherical=self.spherical,ext_w=self.ext_w,n_jobs=self.n_jobs).fit(lite=True)) self._optimized_function = gwr_func if self.search_method == 'golden_section': - self.bw_min, self.bw_max = self._init_section(self.X_glob, self.X_loc, self.coords, + a, c = self._init_section(self.X_glob, self.X_loc, self.coords, self.constant) delta = 0.38197 #1 - (np.sqrt(5.0)-1.0)/2.0 - self.bw = golden_section(self.bw_min, self.bw_max, delta, gwr_func, self.tol, - self.max_iter, self.int_score, + self.bw = golden_section(a, c, delta, gwr_func, self.tol, + self.max_iter, self.bw_max, self.int_score, self.verbose) elif self.search_method == 'interval': self.bw = equal_interval(self.bw_min, self.bw_max, self.interval, @@ -374,28 +385,29 @@ def _mbw(self): max_iter = self.max_iter bws_same_times = self.bws_same_times - def gwr_func(y, X, bw): + def gwr_func(y, X, bw,family=Gaussian(),offset=None,ext_w=None): return GWR(coords, y, X, bw, family=family, kernel=kernel, fixed=fixed, offset=offset, constant=False, - spherical=self.spherical, hat_matrix=False).fit( - lite=True, pool=self.pool) + spherical=self.spherical, hat_matrix=False,n_jobs=self.n_jobs,ext_w=ext_w).fit( + lite=True) - def bw_func(y, X): + def bw_func(y, X,family=Gaussian(),offset=None,ext_w=None): selector = Sel_BW(coords, y, X, X_glob=[], family=family, kernel=kernel, fixed=fixed, offset=offset, - constant=False, spherical=self.spherical) + constant=False, spherical=self.spherical,ext_w=ext_w,n_jobs=self.n_jobs) return selector def sel_func(bw_func, bw_min=None, bw_max=None): return bw_func.search( search_method=search_method, criterion=criterion, bw_min=bw_min, bw_max=bw_max, interval=interval, tol=tol, - max_iter=max_iter, pool=self.pool, verbose=False) + max_iter=max_iter, verbose=False) self.bw = multi_bw(self.init_multi, y, X, n, k, family, self.tol_multi, self.max_iter_multi, self.rss_score, gwr_func, bw_func, sel_func, multi_bw_min, multi_bw_max, - bws_same_times, verbose=self.verbose) + bws_same_times, verbose=self.verbose,raw_y=self.raw_y, + raw_x=self.raw_x,ext_w=self.ext_w,offset=self.offset) def _init_section(self, X_glob, X_loc, coords, constant): if len(X_glob) > 0: @@ -418,16 +430,17 @@ def _init_section(self, X_glob, X_loc, coords, constant): else: min_dist = np.min(np.array([np.min(np.delete( local_cdist(coords[i],coords,spherical=self.spherical),i)) - for i in range(n)])) + for i in range(n)])) max_dist = np.max(np.array([np.max( - local_cdist(coords[i],coords,spherical=self.spherical)) - for i in range(n)])) + local_cdist(coords[i],coords,spherical=self.spherical)) + for i in range(n)])) + a = min_dist / 2.0 c = max_dist * 2.0 if self.bw_min is not None: a = self.bw_min - if self.bw_max is not None: + if self.bw_max is not None and self.bw_max is not np.inf: c = self.bw_max return a, c From 70187e9a14f744464eebd3c91cfef1b43fcec2b4 Mon Sep 17 00:00:00 2001 From: kchenlun Date: Fri, 15 Aug 2025 14:46:27 -0400 Subject: [PATCH 4/6] Add files via upload Add interval search for LSA --- mgwrlib/mgwr/gwr.py | 165 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 130 insertions(+), 35 deletions(-) diff --git a/mgwrlib/mgwr/gwr.py b/mgwrlib/mgwr/gwr.py index fcd9a06f..38a4688e 100755 --- a/mgwrlib/mgwr/gwr.py +++ b/mgwrlib/mgwr/gwr.py @@ -1638,36 +1638,120 @@ def _chunk_compute_R(self, chunk_id=0): return ENP_j, CCT, pR return ENP_j, CCT - def fit(self, n_chunks=1, pool=None, ini_betas=None, tol=1e-10, max_iter=200, bw_stable=3): + def fit(self, search_method='golden_section', criterion='AICc', + bw_min=None, bw_max=None, interval=0.0, tol=1.0e-6, + max_iter=200, init_multi=None, tol_multi=1.0e-5, + rss_score=False, multi_bw_min=[None], multi_bw_max=[None], + bws_same_times=5, max_iter_multi=200, + ini_betas=None, tol_lsa=1e-10, max_iter_lsa=200, bw_lsa_stable=3, + verbose=False, pool=None, n_chunks=1): """ Compute MGWR inference by chunk to reduce memory footprint. Parameters ---------- - n_chunks : integer, optional - A number of chunks parameter to reduce memory usage. - e.g. n_chunks=2 should reduce overall memory usage by 2. - pool : A multiprocessing Pool object to enable parallel fitting; default is None. - - ini_betas : np.ndarray, optional - Initial values for LSA (Poisson/Binomial). - tol : float - Convergence in LSA. - max_iter : integer - Max iterations for LSA. - bw_stable : integer - Iterations stopped after same bandwithds produeced + search_method : string + bw search method: 'golden', 'interval' + criterion : string + bw selection criterion: 'AICc', 'AIC', 'BIC', 'CV' + bw_min : float + min value used in bandwidth search + bw_max : float + max value used in bandwidth search + interval : float + interval increment used in interval search + tol : float + tolerance used to determine convergence + max_iter : integer + max iterations if no convergence to tol + init_multi : float + None (default) to initialize MGWR with a bandwidth + derived from GWR. Otherwise this option will choose the + bandwidth to initialize MGWR with. + tol_multi : convergence tolerence for the multiple bandwidth + backfitting algorithm; a larger tolerance may stop the + algorith faster though it may result in a less optimal + model + rss_score : True to use the residual sum of sqaures to evaluate + each iteration of the multiple bandwidth backfitting + routine and False to use a smooth function; default is + False + multi_bw_min : list + min values used for each covariate in mgwr bandwidth search. + Must be either a single value or have one value for + each covariate including the intercept + multi_bw_max : list + max values used for each covariate in mgwr bandwidth + search. Must be either a single value or have one value + for each covariate including the intercept + bws_same_times : If bandwidths keep the same between iterations for + bws_same_times (default 5) in backfitting, then use the + current set of bandwidths as final bandwidths. + max_iter_multi : max iterations if no convergence to tol for multiple + bandwidth backfitting algorithm + ini_betas : np.ndarray, optional + Initial values for LSA (Poisson/Binomial). + tol_lsa : float + Convergence in LSA. + max_iter_lsa : integer + Max iterations for LSA. + bw_lsa_stable : integer + If bandwidths keep the same between iterations for + bw_lsa_stable (default 3) in backfitting, then use the + current set of bandwidths as final bandwidths for LSA. + verbose : Boolean + If true, bandwidth searching history is printed out; default is False. + pool : None, deprecated and not used. + n_chunks : integer, optional + A number of chunks parameter to reduce memory usage. + e.g. n_chunks=2 should reduce overall memory usage by 2. Returns ------- : MGWRResults """ y = self.y x = self.X + self.X_loc = x + X_glob = [] offset = self.offset coords = self.coords family = self.family + if search_method=='interval': + init_multi=42 + interval + else: + init_multi=init_multi + + if bw_min==None: + bw_min=multi_bw_min[0] + + if bw_max==None: + bw_max=multi_bw_max[0] + + k = self.X.shape[1] + if len(multi_bw_min) == k: + multi_bw_min = multi_bw_min + elif len(multi_bw_min) == 1: + multi_bw_min = multi_bw_min * k + else: + raise AttributeError( + "multi_bw_min must be either a list containing" + " a single entry or a list containing an entry for each of k") + + if len(multi_bw_max) == k: + multi_bw_max = multi_bw_max + elif len(multi_bw_max) == 1: + multi_bw_max = multi_bw_max * k + else: + raise AttributeError( + "multi_bw_max must be either a list containing" + " a single entry or a list containing an entry for each of k" + " covariates including the intercept") + + if pool: + warnings.warn("The pool parameter is no longer used and will have no effect; parallelization is default and implemented using joblib instead.", RuntimeWarning, stacklevel=2) + if isinstance(self.family, Gaussian): params = self.selector.params predy = np.sum(self.X * params, axis=1).reshape(-1, 1) @@ -1678,15 +1762,9 @@ def fit(self, n_chunks=1, pool=None, ini_betas=None, tol=1e-10, max_iter=200, bw def tqdm(x, total=0, desc=''): #otherwise, just passthrough the range return x - - if pool: - self.n_chunks = pool._processes * n_chunks - rslt = tqdm( - pool.imap(self._chunk_compute_R, range(self.n_chunks)), - total=self.n_chunks, desc='Inference') - else: - self.n_chunks = n_chunks - rslt = map(self._chunk_compute_R, + + self.n_chunks = n_chunks + rslt = map(self._chunk_compute_R, tqdm(range(self.n_chunks), desc='Inference')) rslt_list = list(zip(*rslt)) @@ -1710,8 +1788,8 @@ def tqdm(x, total=0, n = y.shape[0] w = None y_raw = y - bw_no_change = 0 - bw_history = [] + bw_lsa_no_change = 0 + bw_lsa_history = [] if ini_betas is None: betas = np.zeros(x.shape, np.float64) @@ -1723,7 +1801,7 @@ def tqdm(x, total=0, v = self.family.predict(y_off) mu = self.family.starting_mu(y) - while diff > tol and n_iter < max_iter: + while diff > tol_lsa and n_iter < max_iter_lsa: n_iter += 1 w = self.family.weights(mu) z = v + (self.family.link.deriv(mu) * (y - mu)) @@ -1734,7 +1812,23 @@ def tqdm(x, total=0, sel = Sel_BW(coords, wz, wx, constant=False, multi=True, raw_y=y_raw, raw_x=x, offset=offset, ext_w=w, family=family) - bws_guass_wxz = sel.search() + + bws_guass_wxz = sel.search(search_method=search_method, + criterion=criterion, + bw_min=bw_min, + bw_max=bw_max, + interval=interval, + tol=tol, + max_iter=max_iter_multi, + init_multi=init_multi, + tol_multi=tol_multi, + rss_score=rss_score, + max_iter_multi=max_iter_multi, + multi_bw_min=multi_bw_min, + multi_bw_max=multi_bw_max, + bws_same_times=bws_same_times, + verbose=verbose, + pool=None) print(bws_guass_wxz) temp_model = MGWR(coords, wz, wx, selector=sel, @@ -1743,14 +1837,14 @@ def tqdm(x, total=0, n_betas = sel.params - if len(bw_history) > 0 and np.allclose(bws_guass_wxz, bw_history[-1]): - bw_no_change += 1 + if len(bw_lsa_history) > 0 and np.allclose(bws_guass_wxz, bw_lsa_history[-1]): + bw_lsa_no_change += 1 else: - bw_no_change = 0 - bw_history.append(np.array(bws_guass_wxz)) + bw_lsa_no_change = 0 + bw_lsa_history.append(np.array(bws_guass_wxz)) - if bw_no_change >= (bw_stable - 1): - print(f"Calibration stopped as the bandwidths remain the same for {bw_stable} iterations.") + if bw_lsa_no_change >= (bw_lsa_stable - 1): + print(f"Calibration stopped as the bandwidths remain the same for {bw_lsa_stable} LSA iterations.") break v = np.sum(sel.params * x,axis=1).reshape(-1, 1) @@ -1772,8 +1866,8 @@ def tqdm(x, total=0, print(f"LSA iteration {n_iter}: Difference = {diff:.5f}") - if n_iter >= max_iter: - print(f"Warning: Maximum iterations reached without convergence") + if n_iter >= max_iter_lsa: + print(f"Warning: LSA: Maximum iterations reached without convergence") print(f"Final bandwidths: {bws_guass_wxz}") @@ -2263,6 +2357,7 @@ def spatial_variability(self, selector, n_iters=1000, seed=None): """ + temp_sel = copy.deepcopy(selector) if seed is None: From 997ecdc6e77f70c5e9f7956dd62e59e221c09498 Mon Sep 17 00:00:00 2001 From: kchenlun Date: Thu, 21 Aug 2025 15:50:46 -0400 Subject: [PATCH 5/6] Add files via upload --- mgwrlib/mgwr/gwr.py | 64 ++++++++++++++++++--------------------------- 1 file changed, 25 insertions(+), 39 deletions(-) diff --git a/mgwrlib/mgwr/gwr.py b/mgwrlib/mgwr/gwr.py index 38a4688e..239b04ea 100755 --- a/mgwrlib/mgwr/gwr.py +++ b/mgwrlib/mgwr/gwr.py @@ -1533,56 +1533,42 @@ class MGWR(GWR): def __init__(self, coords, y, X, selector, family=Gaussian(), offset=None, sigma2_v1=True, kernel='bisquare', fixed=False, constant=True, spherical=False, hat_matrix=False, name_x=None, n_jobs=1): - self.family = family + """ + Initialize class + """ + self.y = y + self.sigma2_v1 = sigma2_v1 + self.points = None + self.P = None + self.exog_resid = None + self.exog_scale = None + self.fit_params = None + self.n_jobs = n_jobs + self.name_x = name_x + self.family = family + + if offset is None: + self.offset = np.ones((len(y), 1)) + else: + self.offset = offset * 1.0 + + GWR.__init__(self, coords, y, X, self.bw_init, family=self.family, + sigma2_v1=sigma2_v1, kernel=kernel, fixed=fixed, + constant=constant, spherical=spherical, + hat_matrix=hat_matrix) + if isinstance(self.family, Gaussian): - """ - Initialize class - """ - self.y = y self.selector = selector self.bws = selector.bw[0] #final set of bandwidth self.bws_history = selector.bw[1] #bws history in backfitting self.bw_init = selector.bw_init #initialization bandwidth - if offset is None: - self.offset = np.ones((len(y), 1)) - else: - self.offset = offset * 1.0 - GWR.__init__(self, coords, y, X, self.bw_init, family=self.family, - sigma2_v1=sigma2_v1, kernel=kernel, fixed=fixed, - constant=constant, spherical=spherical, - hat_matrix=hat_matrix) - self.sigma2_v1 = sigma2_v1 - self.points = None - self.P = None - self.exog_resid = None - self.exog_scale = None - self.fit_params = None - self.n_jobs = n_jobs - self.name_x = name_x elif isinstance(self.family, (Poisson, Binomial)): self.coords = np.array(coords) - self.y = y self.selector = None self.bws = None self.bws_history = None self.bw_init = None - if offset is None: - self.offset = np.ones((len(y), 1)) - else: - self.offset = offset * 1.0 - GWR.__init__(self, coords, y, X, self.bw_init, family=self.family, - sigma2_v1=sigma2_v1, kernel=kernel, fixed=fixed, - constant=constant, spherical=spherical, - hat_matrix=hat_matrix) - self.sigma2_v1 = sigma2_v1 - self.points = None - self.P = None - self.exog_resid = None - self.exog_scale = None - self.fit_params = None - self.n_jobs = n_jobs - self.name_x = name_x def _chunk_compute_R(self, chunk_id=0): """ @@ -1776,7 +1762,7 @@ def tqdm(x, total=0, R = np.hstack(rslt_list[2]) else: R = None - return MGWRResults(self, params, predy, CCT, ENP_j, w, R) + return MGWRResults(self, params, predy, CCT, ENP_j, w, R, self.name_x) elif isinstance(self.family, (Poisson, Binomial)): from .sel_bw import Sel_BW From 6766235e3f4b6565508fc17363ddd3922f37e1a5 Mon Sep 17 00:00:00 2001 From: kchenlun Date: Thu, 21 Aug 2025 19:12:17 -0400 Subject: [PATCH 6/6] Add files via upload --- mgwrlib/mgwr/gwr.py | 45 +++++++++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/mgwrlib/mgwr/gwr.py b/mgwrlib/mgwr/gwr.py index 239b04ea..7a06493f 100755 --- a/mgwrlib/mgwr/gwr.py +++ b/mgwrlib/mgwr/gwr.py @@ -1536,27 +1536,8 @@ def __init__(self, coords, y, X, selector, family=Gaussian(), offset=None, """ Initialize class """ - self.y = y - self.sigma2_v1 = sigma2_v1 - self.points = None - self.P = None - self.exog_resid = None - self.exog_scale = None - self.fit_params = None - self.n_jobs = n_jobs - self.name_x = name_x self.family = family - if offset is None: - self.offset = np.ones((len(y), 1)) - else: - self.offset = offset * 1.0 - - GWR.__init__(self, coords, y, X, self.bw_init, family=self.family, - sigma2_v1=sigma2_v1, kernel=kernel, fixed=fixed, - constant=constant, spherical=spherical, - hat_matrix=hat_matrix) - if isinstance(self.family, Gaussian): self.selector = selector self.bws = selector.bw[0] #final set of bandwidth @@ -1570,6 +1551,25 @@ def __init__(self, coords, y, X, selector, family=Gaussian(), offset=None, self.bws_history = None self.bw_init = None + GWR.__init__(self, coords, y, X, self.bw_init, family=self.family, + sigma2_v1=sigma2_v1, kernel=kernel, fixed=fixed, + constant=constant, spherical=spherical, + hat_matrix=hat_matrix) + self.y = y + self.sigma2_v1 = sigma2_v1 + self.points = None + self.P = None + self.exog_resid = None + self.exog_scale = None + self.fit_params = None + self.n_jobs = n_jobs + self.name_x = name_x + + if offset is None: + self.offset = np.ones((len(y), 1)) + else: + self.offset = offset * 1.0 + def _chunk_compute_R(self, chunk_id=0): """ Compute MGWR inference by chunks to reduce memory footprint. @@ -1758,12 +1758,14 @@ def tqdm(x, total=0, CCT = np.sum(np.array(rslt_list[1]), axis=0) w = np.ones(self.n) + if self.hat_matrix: R = np.hstack(rslt_list[2]) else: R = None + return MGWRResults(self, params, predy, CCT, ENP_j, w, R, self.name_x) - + elif isinstance(self.family, (Poisson, Binomial)): from .sel_bw import Sel_BW @@ -1864,9 +1866,8 @@ def tqdm(x, total=0, final_model.selector = sel final_model.bws = np.array(bws_guass_wxz) - final_model.name_x = self.name_x - return MGWRResults(final_model, betas, mu, mgwr_rslt.raw_CCT, mgwr_rslt.ENP_j, w, mgwr_rslt.R, final_model.name_x) + return MGWRResults(final_model, betas, mu, mgwr_rslt.raw_CCT, mgwr_rslt.ENP_j, w, mgwr_rslt.R, self.name_x) else: raise NotImplementedError('N/A')