From 9f9e9adc7b77f8f9cbf8ab86b392ee840f0a2f86 Mon Sep 17 00:00:00 2001 From: Aki Vehtari Date: Fri, 17 Oct 2025 19:54:19 +0300 Subject: [PATCH 1/4] use posterior::gpdfit --- R/psis.R | 2 +- R/psislw.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/psis.R b/R/psis.R index ce5207d8..68233eb8 100644 --- a/R/psis.R +++ b/R/psis.R @@ -254,7 +254,7 @@ psis_smooth_tail <- function(x, cutoff) { exp_cutoff <- exp(cutoff) # save time not sorting since x already sorted - fit <- gpdfit(exp(x) - exp_cutoff, sort_x = FALSE) + fit <- posterior::gpdfit(exp(x) - exp_cutoff, sort_x = FALSE) k <- fit$k sigma <- fit$sigma if (is.finite(k)) { diff --git a/R/psislw.R b/R/psislw.R index 9ed5ab15..ff870389 100644 --- a/R/psislw.R +++ b/R/psislw.R @@ -72,7 +72,7 @@ psislw <- function(lw, wcp = 0.2, wtrunc = 3/4, # body and gPd smoothed tail tail_ord <- order(x_tail) exp_cutoff <- exp(cutoff) - fit <- gpdfit(exp(x_tail) - exp_cutoff, wip=FALSE, min_grid_pts = 80) + fit <- posterior::gpdfit(exp(x_tail) - exp_cutoff, wip=FALSE, min_grid_pts = 80) k <- fit$k sigma <- fit$sigma prb <- (seq_len(tail_len) - 0.5) / tail_len From 05c69e1f466468c7d2e80214e9a8b0f5917f051c Mon Sep 17 00:00:00 2001 From: Aki Vehtari Date: Fri, 17 Oct 2025 20:04:23 +0300 Subject: [PATCH 2/4] use posterior::qgeneralized_pareto()g --- R/psis.R | 2 +- R/psislw.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/psis.R b/R/psis.R index 68233eb8..463d8caf 100644 --- a/R/psis.R +++ b/R/psis.R @@ -259,7 +259,7 @@ psis_smooth_tail <- function(x, cutoff) { sigma <- fit$sigma if (is.finite(k)) { p <- (seq_len(len) - 0.5) / len - qq <- qgpd(p, k, sigma) + exp_cutoff + qq <- posterior::qgeneralized_pareto(p, 0, sigma, k) + exp_cutoff tail <- log(qq) } else { tail <- x diff --git a/R/psislw.R b/R/psislw.R index ff870389..808e657f 100644 --- a/R/psislw.R +++ b/R/psislw.R @@ -76,7 +76,7 @@ psislw <- function(lw, wcp = 0.2, wtrunc = 3/4, k <- fit$k sigma <- fit$sigma prb <- (seq_len(tail_len) - 0.5) / tail_len - qq <- qgpd(prb, k, sigma) + exp_cutoff + qq <- posterior::qgeneralized_pareto(prb, 0, sigma, k) + exp_cutoff smoothed_tail <- rep.int(0, tail_len) smoothed_tail[tail_ord] <- log(qq) x_new <- x From aae6c17b1820f578ca930d070bbe9ba731fe4b84 Mon Sep 17 00:00:00 2001 From: Aki Vehtari Date: Fri, 17 Oct 2025 20:04:44 +0300 Subject: [PATCH 3/4] delete gpd functions --- R/gpdfit.R | 99 ------------------------------------------------------ 1 file changed, 99 deletions(-) delete mode 100644 R/gpdfit.R diff --git a/R/gpdfit.R b/R/gpdfit.R deleted file mode 100644 index 7bc7c312..00000000 --- a/R/gpdfit.R +++ /dev/null @@ -1,99 +0,0 @@ -#' Estimate parameters of the Generalized Pareto distribution -#' -#' Given a sample \eqn{x}, Estimate the parameters \eqn{k} and \eqn{\sigma} of -#' the generalized Pareto distribution (GPD), assuming the location parameter is -#' 0. By default the fit uses a prior for \eqn{k}, which will stabilize -#' estimates for very small sample sizes (and low effective sample sizes in the -#' case of MCMC samples). The weakly informative prior is a Gaussian prior -#' centered at 0.5. -#' -#' @export -#' @param x A numeric vector. The sample from which to estimate the parameters. -#' @param wip Logical indicating whether to adjust \eqn{k} based on a weakly -#' informative Gaussian prior centered on 0.5. Defaults to `TRUE`. -#' @param min_grid_pts The minimum number of grid points used in the fitting -#' algorithm. The actual number used is `min_grid_pts + floor(sqrt(length(x)))`. -#' @param sort_x If `TRUE` (the default), the first step in the fitting -#' algorithm is to sort the elements of `x`. If `x` is already -#' sorted in ascending order then `sort_x` can be set to `FALSE` to -#' skip the initial sorting step. -#' @return A named list with components `k` and `sigma`. -#' -#' @details Here the parameter \eqn{k} is the negative of \eqn{k} in Zhang & -#' Stephens (2009). -#' -#' @seealso [psis()], [pareto-k-diagnostic] -#' -#' @references -#' Zhang, J., and Stephens, M. A. (2009). A new and efficient estimation method -#' for the generalized Pareto distribution. *Technometrics* **51**, 316-325. -#' -gpdfit <- function(x, wip = TRUE, min_grid_pts = 30, sort_x = TRUE) { - # See section 4 of Zhang and Stephens (2009) - if (sort_x) { - x <- sort.int(x) - } - N <- length(x) - prior <- 3 - M <- min_grid_pts + floor(sqrt(N)) - jj <- seq_len(M) - xstar <- x[floor(N / 4 + 0.5)] # first quartile of sample - theta <- 1 / x[N] + (1 - sqrt(M / (jj - 0.5))) / prior / xstar - l_theta <- N * lx(theta, x) # profile log-lik - w_theta <- exp(l_theta - matrixStats::logSumExp(l_theta)) # normalize - theta_hat <- sum(theta * w_theta) - k <- mean.default(log1p(-theta_hat * x)) - sigma <- -k / theta_hat - - if (wip) { - k <- adjust_k_wip(k, n = N) - } - - if (is.na(k)) { - k <- Inf - } - - nlist(k, sigma) -} - - -# internal ---------------------------------------------------------------- - -lx <- function(a,x) { - a <- -a - k <- vapply(a, FUN = function(a_i) mean(log1p(a_i * x)), FUN.VALUE = numeric(1)) - log(a / k) - k - 1 -} - -#' Adjust k based on weakly informative prior, Gaussian centered on 0.5. This -#' will stabilize estimates for very small Monte Carlo sample sizes and low neff -#' cases. -#' -#' @noRd -#' @param k Scalar khat estimate. -#' @param n Integer number of tail samples used to fit GPD. -#' @return Scalar adjusted khat estimate. -#' -adjust_k_wip <- function(k, n) { - a <- 10 - n_plus_a <- n + a - k * n / n_plus_a + a * 0.5 / n_plus_a -} - - -#' Inverse CDF of generalized Pareto distribution -#' (assuming location parameter is 0) -#' -#' @noRd -#' @param p Vector of probabilities. -#' @param k Scalar shape parameter. -#' @param sigma Scalar scale parameter. -#' @return Vector of quantiles. -#' -qgpd <- function(p, k, sigma) { - if (is.nan(sigma) || sigma <= 0) { - return(rep(NaN, length(p))) - } - - sigma * expm1(-k * log1p(-p)) / k -} From 099398b7cd874c0957eb5d4f7454beef16debfda Mon Sep 17 00:00:00 2001 From: Aki Vehtari Date: Fri, 17 Oct 2025 21:35:14 +0300 Subject: [PATCH 4/4] remove gpdfit doc and export --- NAMESPACE | 1 - man/gpdfit.Rd | 44 -------------------------------------------- 2 files changed, 45 deletions(-) delete mode 100644 man/gpdfit.Rd diff --git a/NAMESPACE b/NAMESPACE index 4d1a9118..d0fae6fb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -106,7 +106,6 @@ export(example_loglik_array) export(example_loglik_matrix) export(extract_log_lik) export(find_model_names) -export(gpdfit) export(is.kfold) export(is.loo) export(is.psis) diff --git a/man/gpdfit.Rd b/man/gpdfit.Rd deleted file mode 100644 index 8cb61d12..00000000 --- a/man/gpdfit.Rd +++ /dev/null @@ -1,44 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gpdfit.R -\name{gpdfit} -\alias{gpdfit} -\title{Estimate parameters of the Generalized Pareto distribution} -\usage{ -gpdfit(x, wip = TRUE, min_grid_pts = 30, sort_x = TRUE) -} -\arguments{ -\item{x}{A numeric vector. The sample from which to estimate the parameters.} - -\item{wip}{Logical indicating whether to adjust \eqn{k} based on a weakly -informative Gaussian prior centered on 0.5. Defaults to \code{TRUE}.} - -\item{min_grid_pts}{The minimum number of grid points used in the fitting -algorithm. The actual number used is \code{min_grid_pts + floor(sqrt(length(x)))}.} - -\item{sort_x}{If \code{TRUE} (the default), the first step in the fitting -algorithm is to sort the elements of \code{x}. If \code{x} is already -sorted in ascending order then \code{sort_x} can be set to \code{FALSE} to -skip the initial sorting step.} -} -\value{ -A named list with components \code{k} and \code{sigma}. -} -\description{ -Given a sample \eqn{x}, Estimate the parameters \eqn{k} and \eqn{\sigma} of -the generalized Pareto distribution (GPD), assuming the location parameter is -0. By default the fit uses a prior for \eqn{k}, which will stabilize -estimates for very small sample sizes (and low effective sample sizes in the -case of MCMC samples). The weakly informative prior is a Gaussian prior -centered at 0.5. -} -\details{ -Here the parameter \eqn{k} is the negative of \eqn{k} in Zhang & -Stephens (2009). -} -\references{ -Zhang, J., and Stephens, M. A. (2009). A new and efficient estimation method -for the generalized Pareto distribution. \emph{Technometrics} \strong{51}, 316-325. -} -\seealso{ -\code{\link[=psis]{psis()}}, \link{pareto-k-diagnostic} -}