From 6914fbde7aef14f15adbdcae37694b7b824b8144 Mon Sep 17 00:00:00 2001 From: Colin Caprani Date: Mon, 18 Oct 2021 18:39:33 +1100 Subject: [PATCH 1/7] Added GEV distribution --- docs/source/api/distributions/continuous.rst | 1 + pymc/distributions/__init__.py | 2 + pymc/distributions/continuous.py | 167 +++++++++++++++++++ pymc/tests/test_distributions.py | 15 ++ 4 files changed, 185 insertions(+) diff --git a/docs/source/api/distributions/continuous.rst b/docs/source/api/distributions/continuous.rst index a6d4d99507..c1fa65ca1d 100644 --- a/docs/source/api/distributions/continuous.rst +++ b/docs/source/api/distributions/continuous.rst @@ -37,6 +37,7 @@ Continuous LogitNormal Interpolated PolyaGamma + GenExtreme .. automodule:: pymc.distributions.continuous :members: diff --git a/pymc/distributions/__init__.py b/pymc/distributions/__init__.py index d9c26738ba..5184374428 100644 --- a/pymc/distributions/__init__.py +++ b/pymc/distributions/__init__.py @@ -32,6 +32,7 @@ Exponential, Flat, Gamma, + GenExtreme, Gumbel, HalfCauchy, HalfFlat, @@ -198,4 +199,5 @@ "logcdf", "_logcdf", "logpt_sum", + "GenExtreme", ] diff --git a/pymc/distributions/continuous.py b/pymc/distributions/continuous.py index 282fba1dc9..46c9ead45f 100644 --- a/pymc/distributions/continuous.py +++ b/pymc/distributions/continuous.py @@ -123,6 +123,7 @@ def polyagamma_cdf(*args, **kwargs): "Moyal", "AsymmetricLaplace", "PolyaGamma", + "GenExtreme", ] @@ -4246,3 +4247,169 @@ def logcdf(value, h, z): TensorVariable """ return bound(_PolyaGammaLogDistFunc(False)(value, h, z), h > 0, value > 0) + + +class GenExtremeRV(RandomVariable): + name: str = "Generalized Extreme Value" + ndim_supp: int = 0 + ndims_params: List[int] = [0, 0, 0] + dtype: str = "floatX" + _print_name: Tuple[str, str] = ("Generalized Extreme Value", "\\operatorname{GEV}") + + def __call__(self, mu=0.0, sigma=1.0, xi=0.0, size=None, **kwargs) -> TensorVariable: + return super().__call__(mu, sigma, xi, size=size, **kwargs) + + @classmethod + def rng_fn( + cls, + rng: np.random.RandomState, + mu: np.ndarray, + sigma: np.ndarray, + xi: np.ndarray, + size: Tuple[int, ...], + ) -> np.ndarray: + # Notice negative here, since remainder of GenExtreme is based on Coles parametrization + return stats.genextreme.rvs(c=-xi, loc=mu, scale=sigma, random_state=rng, size=size) + + +gev = GenExtremeRV() + + +class GenExtreme(Continuous): + r""" + Univariate Generalized Extreme Value log-likelihood + + The cdf of this distribution is + + .. math:: + + G(x \mid \mu, \sigma, \xi) = exp\{ -\[1 + \xi z\]^{-\frac{1}{\xi}} \} + + where + + .. math:: + + z = \frac{x - \mu}{\sigma}. + + and is defined on the set: + + .. math:: + + {x: 1 + \xi(\frac{x-\mu}{\sigma}) > 0} + + Note that this parametrization differs from that of Scipy in the sign of + the shape parameter, $\xi$. + + .. plot:: + + import matplotlib.pyplot as plt + import numpy as np + import scipy.stats as st + import arviz as az + plt.style.use('arviz-darkgrid') + x = np.linspace(-10, 20, 200) + mus = [0., 4., -1.] + sigmas = [2., 2., 4.] + xis = [-0.3, 0.0, 0.3] + for mu, sigma, xi in zip(mus, sigmas, xis): + pdf = st.genextreme.pdf(x, c=-xi, loc=mu, scale=sigma) + plt.plot(x, pdf, label=rf'$\mu$ = {mu}, $\sigma$ = {sigma}, $\xi$={xi}') + plt.xlabel('x', fontsize=12) + plt.ylabel('f(x)', fontsize=12) + plt.legend(loc=1) + plt.show() + + + ======== ========================================== + Support :math: `x \in [\mu - \sigma/\xi, +\inf]`, when :math: `\xi > 0` + :math: `x \in \mathbb{R}` when \xi = 0 + :math: `x \in [-\inf, \mu - \sigma/\xi]`, when :math: `\xi < 0` + Mean :math: `\mu + \sigma(g_1 - 1)/\xi`, when `\xi \neq 0, \xi < 1` + :math: `\mu + \sigma \gamma`, when `\xi = 0` + :math: `\inf`, when `xi \geq 1` + where :math:`\gamma` is the Euler-Mascheroni constant, and + :math:`g_k = \Gamma (1-k\xi)` + Variance :math:`\sigma^2 (g_2 - g_1^2)/\xi^2`, when `\xi \neq 0, \xi < 0.5` + :math:`\frac{\pi^2}{6} \sigma^2`, when `\xi = 0` + :math:`\inf`, when `\xi \geq 0.5` + ======== ========================================== + + Parameters + ---------- + mu: float + Location parameter. + sigma: float + Scale parameter (sigma > 0). + xi: float + Shape parameter + scipy: bool + Whether or not to use the Scipy interpretation of the shape parameter + default False. + """ + + rv_op = gev + + @classmethod + def dist(cls, mu=0, sigma=1, xi=0, scipy=False, **kwargs): + # If SciPy, use its parametrization, otherwise convert to standard + if scipy: + xi = -xi + mu = at.as_tensor_variable(floatX(mu)) + sigma = at.as_tensor_variable(floatX(sigma)) + xi = at.as_tensor_variable(floatX(xi)) + + return super().dist([mu, sigma, xi], **kwargs) + + def logp(value, mu, sigma, xi): + """ + Calculate log-probability of Generalized Extreme Value distribution + at specified value. + + Parameters + ---------- + value: numeric + Value(s) for which log-probability is calculated. If the log probabilities for multiple + values are desired the values must be provided in a numpy array or Aesara tensor + + Returns + ------- + TensorVariable + """ + scaled = (value - mu) / sigma + + logp_expression = at.switch( + at.isclose(xi, 0), + at.log(sigma) - scaled - at.exp(-scaled), + -at.log(sigma) + - ((xi + 1) / xi) * at.log1p(xi * scaled) + - at.pow(1 + xi * scaled, -1 / xi), + ) + # bnd = mu - sigma/xi + return bound( + logp_expression, + 1 + xi * (value - mu) / sigma > 0, + # at.switch(xi > 0, value > bnd, value < bnd), + sigma > 0, + ) + + def logcdf(value, mu, sigma, xi): + """ + Compute the log of the cumulative distribution function for Generalized Extreme Value + distribution at the specified value. + + Parameters + ---------- + value: numeric or np.ndarray or `TensorVariable` + Value(s) for which log CDF is calculated. If the log CDF for + multiple values are desired the values must be provided in a numpy + array or `TensorVariable`. + + Returns + ------- + TensorVariable + """ + scaled = (value - mu) / sigma + logc_expression = at.switch( + at.isclose(xi, 0), -at.exp(-scaled), -at.pow(1 + xi * scaled, -1 / xi) + ) + return bound(logc_expression, 1 + xi * scaled > 0, sigma > 0) diff --git a/pymc/tests/test_distributions.py b/pymc/tests/test_distributions.py index d29a929e97..faa78fdef0 100644 --- a/pymc/tests/test_distributions.py +++ b/pymc/tests/test_distributions.py @@ -75,6 +75,7 @@ def polyagamma_cdf(*args, **kwargs): Flat, Gamma, Geometric, + GenExtreme, Gumbel, HalfCauchy, HalfFlat, @@ -2544,6 +2545,20 @@ def test_gumbel(self): lambda value, mu, beta: sp.gumbel_r.logcdf(value, loc=mu, scale=beta), ) + def test_genextreme(self): + self.check_logp( + GenExtreme, + R, + {"mu": R, "sigma": Rplus, "xi": Domain([-1, 1])}, + lambda value, mu, sigma, xi: sp.genextreme.logpdf(value, c=-xi, loc=mu, scale=sigma), + ) + self.check_logcdf( + GenExtreme, + R, + {"mu": R, "sigma": Rplus, "xi": Domain([-1, 1])}, + lambda value, mu, sigma, xi: sp.genextreme.logcdf(value, c=-xi, loc=mu, scale=sigma), + ) + def test_logistic(self): self.check_logp( Logistic, From f1fd0919f62ea5bb727aeb85d0e1c46b01e65a5e Mon Sep 17 00:00:00 2001 From: Colin Caprani Date: Mon, 18 Oct 2021 20:00:29 +1100 Subject: [PATCH 2/7] GEV docs tweaks --- pymc/distributions/continuous.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/pymc/distributions/continuous.py b/pymc/distributions/continuous.py index 46c9ead45f..6f71e2036c 100644 --- a/pymc/distributions/continuous.py +++ b/pymc/distributions/continuous.py @@ -4283,7 +4283,7 @@ class GenExtreme(Continuous): .. math:: - G(x \mid \mu, \sigma, \xi) = exp\{ -\[1 + \xi z\]^{-\frac{1}{\xi}} \} + G(x \mid \mu, \sigma, \xi) = \exp\left[ -\left(1 + \xi z\right)^{-\frac{1}{\xi}} \right] where @@ -4295,10 +4295,10 @@ class GenExtreme(Continuous): .. math:: - {x: 1 + \xi(\frac{x-\mu}{\sigma}) > 0} + \left\{x: 1 + \xi(\frac{x-\mu}{\sigma}) > 0 \right\} Note that this parametrization differs from that of Scipy in the sign of - the shape parameter, $\xi$. + the shape parameter, :math:`\xi`. .. plot:: @@ -4320,19 +4320,19 @@ class GenExtreme(Continuous): plt.show() - ======== ========================================== - Support :math: `x \in [\mu - \sigma/\xi, +\inf]`, when :math: `\xi > 0` - :math: `x \in \mathbb{R}` when \xi = 0 - :math: `x \in [-\inf, \mu - \sigma/\xi]`, when :math: `\xi < 0` - Mean :math: `\mu + \sigma(g_1 - 1)/\xi`, when `\xi \neq 0, \xi < 1` - :math: `\mu + \sigma \gamma`, when `\xi = 0` - :math: `\inf`, when `xi \geq 1` + ======== ========================================================================= + Support * :math:`x \in [\mu - \sigma/\xi, +\infty]`, when :math: `\xi > 0` + * :math:`x \in \mathbb{R}` when \xi = 0 + * :math:`x \in [-\infty, \mu - \sigma/\xi]`, when :math: `\xi < 0` + Mean * :math:`\mu + \sigma(g_1 - 1)/\xi`, when `\xi \neq 0, \xi < 1` + * :math:`\mu + \sigma \gamma`, when `\xi = 0` + * :math:`\infty`, when `xi \geq 1` where :math:`\gamma` is the Euler-Mascheroni constant, and :math:`g_k = \Gamma (1-k\xi)` - Variance :math:`\sigma^2 (g_2 - g_1^2)/\xi^2`, when `\xi \neq 0, \xi < 0.5` - :math:`\frac{\pi^2}{6} \sigma^2`, when `\xi = 0` - :math:`\inf`, when `\xi \geq 0.5` - ======== ========================================== + Variance * :math:`\sigma^2 (g_2 - g_1^2)/\xi^2`, when :math:`\xi \neq 0, \xi < 0.5` + * :math:`\frac{\pi^2}{6} \sigma^2`, when :math:`\xi = 0` + * :math:`\infty`, when :math:`\xi \geq 0.5` + ======== ========================================================================= Parameters ---------- @@ -4344,7 +4344,7 @@ class GenExtreme(Continuous): Shape parameter scipy: bool Whether or not to use the Scipy interpretation of the shape parameter - default False. + (defaults to `False`). """ rv_op = gev From dc64f1b4d8d554b4b04dbe0e38b4be4cd525509d Mon Sep 17 00:00:00 2001 From: Colin Caprani Date: Mon, 18 Oct 2021 20:38:56 +1100 Subject: [PATCH 3/7] Final doc tweak and reference --- pymc/distributions/continuous.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/pymc/distributions/continuous.py b/pymc/distributions/continuous.py index 6f71e2036c..1e36cbc7c7 100644 --- a/pymc/distributions/continuous.py +++ b/pymc/distributions/continuous.py @@ -4289,16 +4289,16 @@ class GenExtreme(Continuous): .. math:: - z = \frac{x - \mu}{\sigma}. + z = \frac{x - \mu}{\sigma} and is defined on the set: .. math:: - \left\{x: 1 + \xi(\frac{x-\mu}{\sigma}) > 0 \right\} + \left\{x: 1 + \xi\left(\frac{x-\mu}{\sigma}\right) > 0 \right\}. - Note that this parametrization differs from that of Scipy in the sign of - the shape parameter, :math:`\xi`. + Note that this parametrization is per Coles (2001), and differs from that of + Scipy in the sign of the shape parameter, :math:`\xi`. .. plot:: @@ -4325,8 +4325,8 @@ class GenExtreme(Continuous): * :math:`x \in \mathbb{R}` when \xi = 0 * :math:`x \in [-\infty, \mu - \sigma/\xi]`, when :math: `\xi < 0` Mean * :math:`\mu + \sigma(g_1 - 1)/\xi`, when `\xi \neq 0, \xi < 1` - * :math:`\mu + \sigma \gamma`, when `\xi = 0` - * :math:`\infty`, when `xi \geq 1` + * :math:`\mu + \sigma \gamma`, when :math:`\xi = 0` + * :math:`\infty`, when :math:`xi \geq 1` where :math:`\gamma` is the Euler-Mascheroni constant, and :math:`g_k = \Gamma (1-k\xi)` Variance * :math:`\sigma^2 (g_2 - g_1^2)/\xi^2`, when :math:`\xi \neq 0, \xi < 0.5` @@ -4345,6 +4345,13 @@ class GenExtreme(Continuous): scipy: bool Whether or not to use the Scipy interpretation of the shape parameter (defaults to `False`). + + Reference + --------- + .. [Coles2001] Coles, S.G. (2001). + An Introduction to the Statistical Modeling of Extreme Values + Springer-Verlag, London + """ rv_op = gev From b06711fb7d03a7705083035ddeb2cdf5ef3b2882 Mon Sep 17 00:00:00 2001 From: Colin Caprani Date: Mon, 18 Oct 2021 20:56:11 +1100 Subject: [PATCH 4/7] Docs formatting again --- pymc/distributions/continuous.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pymc/distributions/continuous.py b/pymc/distributions/continuous.py index 1e36cbc7c7..beb843d9c1 100644 --- a/pymc/distributions/continuous.py +++ b/pymc/distributions/continuous.py @@ -4321,12 +4321,12 @@ class GenExtreme(Continuous): ======== ========================================================================= - Support * :math:`x \in [\mu - \sigma/\xi, +\infty]`, when :math: `\xi > 0` - * :math:`x \in \mathbb{R}` when \xi = 0 - * :math:`x \in [-\infty, \mu - \sigma/\xi]`, when :math: `\xi < 0` - Mean * :math:`\mu + \sigma(g_1 - 1)/\xi`, when `\xi \neq 0, \xi < 1` + Support * :math:`x \in [\mu - \sigma/\xi, +\infty]`, when :math:`\xi > 0` + * :math:`x \in \mathbb{R}` when :math:`\xi = 0` + * :math:`x \in [-\infty, \mu - \sigma/\xi]`, when :math:`\xi < 0` + Mean * :math:`\mu + \sigma(g_1 - 1)/\xi`, when :math:`\xi \neq 0, \xi < 1` * :math:`\mu + \sigma \gamma`, when :math:`\xi = 0` - * :math:`\infty`, when :math:`xi \geq 1` + * :math:`\infty`, when :math:`\xi \geq 1` where :math:`\gamma` is the Euler-Mascheroni constant, and :math:`g_k = \Gamma (1-k\xi)` Variance * :math:`\sigma^2 (g_2 - g_1^2)/\xi^2`, when :math:`\xi \neq 0, \xi < 0.5` @@ -4346,8 +4346,8 @@ class GenExtreme(Continuous): Whether or not to use the Scipy interpretation of the shape parameter (defaults to `False`). - Reference - --------- + References + ---------- .. [Coles2001] Coles, S.G. (2001). An Introduction to the Statistical Modeling of Extreme Values Springer-Verlag, London From 9ab131f595d3a5b8108c3583b554f61ae26c36de Mon Sep 17 00:00:00 2001 From: Colin Caprani Date: Mon, 18 Oct 2021 23:31:35 +1100 Subject: [PATCH 5/7] Edits after first review --- pymc/distributions/continuous.py | 7 +++++++ pymc/tests/test_distributions.py | 12 ++++++++---- pymc/tests/test_distributions_random.py | 14 ++++++++++++++ 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/pymc/distributions/continuous.py b/pymc/distributions/continuous.py index beb843d9c1..d601e29ab3 100644 --- a/pymc/distributions/continuous.py +++ b/pymc/distributions/continuous.py @@ -4420,3 +4420,10 @@ def logcdf(value, mu, sigma, xi): at.isclose(xi, 0), -at.exp(-scaled), -at.pow(1 + xi * scaled, -1 / xi) ) return bound(logc_expression, 1 + xi * scaled > 0, sigma > 0) + + def get_moment(value_var, size, mu, sigma, xi): + r""" + Using the mode, as the mean can be infinite when :math:`\xi > 1` + """ + mode = at.switch(at.isclose(xi, 0), mu, mu + sigma * (at.pow(1 + xi, -xi) - 1) / xi) + return at.full(size, mode, dtype=aesara.config.floatX) diff --git a/pymc/tests/test_distributions.py b/pymc/tests/test_distributions.py index faa78fdef0..c3b8a14d18 100644 --- a/pymc/tests/test_distributions.py +++ b/pymc/tests/test_distributions.py @@ -2549,14 +2549,18 @@ def test_genextreme(self): self.check_logp( GenExtreme, R, - {"mu": R, "sigma": Rplus, "xi": Domain([-1, 1])}, - lambda value, mu, sigma, xi: sp.genextreme.logpdf(value, c=-xi, loc=mu, scale=sigma), + {"mu": R, "sigma": Rplus, "xi": Domain([-1, -1, -0.5, 0, 0.5, 1, 1])}, + lambda value, mu, sigma, xi: sp.genextreme.logpdf( + value, c=-xi, loc=mu, scale=sigma, n_samples=-1 + ), ) self.check_logcdf( GenExtreme, R, - {"mu": R, "sigma": Rplus, "xi": Domain([-1, 1])}, - lambda value, mu, sigma, xi: sp.genextreme.logcdf(value, c=-xi, loc=mu, scale=sigma), + {"mu": R, "sigma": Rplus, "xi": Domain([-1, -1, -0.5, 0, 0.5, 1, 1])}, + lambda value, mu, sigma, xi: sp.genextreme.logcdf( + value, c=-xi, loc=mu, scale=sigma, n_samples=-1 + ), ) def test_logistic(self): diff --git a/pymc/tests/test_distributions_random.py b/pymc/tests/test_distributions_random.py index 8b7c189e6c..ab33ea9254 100644 --- a/pymc/tests/test_distributions_random.py +++ b/pymc/tests/test_distributions_random.py @@ -548,6 +548,20 @@ def seeded_exgaussian_rng_fn(self): ] +class TestGenExtreme(BaseTestDistribution): + pymc_dist = pm.GenExtreme + pymc_dist_params = {"mu": 0, "sigma": 1, "xi": -0.1} + expected_rv_op_params = {"mu": 0, "sigma": 1, "xi": -0.1} + # Notice, using different parametrization of xi sign to scipy + reference_dist_params = {"loc": 0, "scale": 1, "c": 0.1} + reference_dist = seeded_scipy_distribution_builder("genextreme") + tests_to_run = [ + "check_pymc_params_match_rv_op", + "check_pymc_draws_match_reference", + "check_rv_size", + ] + + class TestGumbel(BaseTestDistribution): pymc_dist = pm.Gumbel pymc_dist_params = {"mu": 1.5, "beta": 3.0} From 3c9df345148633e6d2db6ccf4afc85327d47b55e Mon Sep 17 00:00:00 2001 From: Colin Caprani Date: Tue, 19 Oct 2021 01:24:10 +1100 Subject: [PATCH 6/7] Revision following first review --- pymc/distributions/continuous.py | 2 +- pymc/tests/test_distributions.py | 10 ++++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/pymc/distributions/continuous.py b/pymc/distributions/continuous.py index d601e29ab3..9911e6415e 100644 --- a/pymc/distributions/continuous.py +++ b/pymc/distributions/continuous.py @@ -4386,7 +4386,7 @@ def logp(value, mu, sigma, xi): logp_expression = at.switch( at.isclose(xi, 0), - at.log(sigma) - scaled - at.exp(-scaled), + -at.log(sigma) - scaled - at.exp(-scaled), -at.log(sigma) - ((xi + 1) / xi) * at.log1p(xi * scaled) - at.pow(1 + xi * scaled, -1 / xi), diff --git a/pymc/tests/test_distributions.py b/pymc/tests/test_distributions.py index c3b8a14d18..ba866e96ed 100644 --- a/pymc/tests/test_distributions.py +++ b/pymc/tests/test_distributions.py @@ -2550,17 +2550,15 @@ def test_genextreme(self): GenExtreme, R, {"mu": R, "sigma": Rplus, "xi": Domain([-1, -1, -0.5, 0, 0.5, 1, 1])}, - lambda value, mu, sigma, xi: sp.genextreme.logpdf( - value, c=-xi, loc=mu, scale=sigma, n_samples=-1 - ), + lambda value, mu, sigma, xi: sp.genextreme.logpdf(value, c=-xi, loc=mu, scale=sigma), + n_samples=-1, ) self.check_logcdf( GenExtreme, R, {"mu": R, "sigma": Rplus, "xi": Domain([-1, -1, -0.5, 0, 0.5, 1, 1])}, - lambda value, mu, sigma, xi: sp.genextreme.logcdf( - value, c=-xi, loc=mu, scale=sigma, n_samples=-1 - ), + lambda value, mu, sigma, xi: sp.genextreme.logcdf(value, c=-xi, loc=mu, scale=sigma), + n_samples=-1, ) def test_logistic(self): From a9b2bb8932148d293b6f295a5a7ff9878c03f948 Mon Sep 17 00:00:00 2001 From: Colin Caprani Date: Tue, 19 Oct 2021 01:29:13 +1100 Subject: [PATCH 7/7] Fixed n_samples --- pymc/tests/test_distributions.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pymc/tests/test_distributions.py b/pymc/tests/test_distributions.py index ba866e96ed..1688b201fb 100644 --- a/pymc/tests/test_distributions.py +++ b/pymc/tests/test_distributions.py @@ -2551,14 +2551,12 @@ def test_genextreme(self): R, {"mu": R, "sigma": Rplus, "xi": Domain([-1, -1, -0.5, 0, 0.5, 1, 1])}, lambda value, mu, sigma, xi: sp.genextreme.logpdf(value, c=-xi, loc=mu, scale=sigma), - n_samples=-1, ) self.check_logcdf( GenExtreme, R, {"mu": R, "sigma": Rplus, "xi": Domain([-1, -1, -0.5, 0, 0.5, 1, 1])}, lambda value, mu, sigma, xi: sp.genextreme.logcdf(value, c=-xi, loc=mu, scale=sigma), - n_samples=-1, ) def test_logistic(self):