From 7d4ddc8932e8ba9a7da8a434c763bf022706ac84 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Fri, 9 May 2025 16:28:28 +0200 Subject: [PATCH 01/17] =?UTF-8?q?=F0=9F=94=96=20Bump=20package=20version?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .zenodo.json | 8 ++++---- CITATION.cff | 2 +- DESCRIPTION | 4 ++-- LICENSE.md | 2 +- NEWS.md | 4 ++++ README.md | 4 ++-- inst/CITATION | 6 +++--- 7 files changed, 17 insertions(+), 13 deletions(-) diff --git a/.zenodo.json b/.zenodo.json index fcf7c32..3ff4445 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -1,9 +1,9 @@ { "title": "territoria: Clustering Observations from Breeding Birds into Territoria", - "version": "0.0.3", + "version": "0.1.0", "license": "GPL-3.0", "upload_type": "software", - "description": "

Clusters individual observations based on breeding indication and\ndistance between observations.<\/p>", + "description": "

Clusters individual observations based on breeding indication and distance between observations.<\/p>", "keywords": [ "breeding bird", "cluster" @@ -15,11 +15,11 @@ "name": "Onkelinx, Thierry", "affiliation": "Research Institute for Nature and Forest (INBO)", "orcid": "0000-0001-8804-4216", - "type": "ContactPerson" + "type": "contactperson" }, { "name": "Research Institute for Nature and Forest (INBO)", - "type": "RightsHolder" + "type": "rightsholder" } ], "creators": [ diff --git a/CITATION.cff b/CITATION.cff index 2925793..398e4b1 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -20,4 +20,4 @@ repository-code: https://github.com/inbo/territoria/ type: software abstract: "Clusters individual observations based on breeding indication and distance between observations." -version: 0.0.3 +version: 0.1.0 diff --git a/DESCRIPTION b/DESCRIPTION index e5cbdb6..6e24ca5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: territoria Title: Clustering Observations from Breeding Birds into Territoria -Version: 0.0.3 +Version: 0.1.0 Authors@R: c( person("Thierry", "Onkelinx", , "thierry.onkelinx@inbo.be", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-8804-4216", affiliation = "Research Institute for Nature and Forest (INBO)")), @@ -23,4 +23,4 @@ Config/checklist/keywords: breeding bird; cluster Encoding: UTF-8 Language: en-GB Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.2 diff --git a/LICENSE.md b/LICENSE.md index 2fb2e74..379c1b2 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,4 +1,4 @@ -### GNU GENERAL PUBLIC LICENSE +# GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 diff --git a/NEWS.md b/NEWS.md index 44ce1af..a7a7b79 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# territoria 0.1.0 + +* Update [`checklist`](https://inbo.github.io/checklist/) machinery. + # territoria 0.0.3 * Update [`checklist`](https://inbo.github.io/checklist/) machinery. diff --git a/README.md b/README.md index 1f29e3b..c042e62 100644 --- a/README.md +++ b/README.md @@ -60,7 +60,7 @@ summary(obs$centroids) #> 1st Qu.: 544.72 1st Qu.: 946.10 #> Median : 927.92 Median :1246.24 #> Mean : 955.15 Mean :1224.73 -#> 3rd Qu.:1349.57 3rd Qu.:1669.61 +#> 3rd Qu.:1349.57 3rd Qu.:1669.60 #> Max. :1897.31 Max. :1963.82 summary(obs$observations) #> x y survey status @@ -68,7 +68,7 @@ summary(obs$observations) #> 1st Qu.: 550.47 1st Qu.: 910.56 1st Qu.:1.75 1st Qu.:2.000 #> Median : 899.91 Median :1272.53 Median :2.50 Median :2.000 #> Mean : 946.40 Mean :1238.04 Mean :2.50 Mean :2.096 -#> 3rd Qu.:1362.48 3rd Qu.:1764.88 3rd Qu.:3.25 3rd Qu.:3.000 +#> 3rd Qu.:1362.48 3rd Qu.:1764.89 3rd Qu.:3.25 3rd Qu.:3.000 #> Max. :2012.23 Max. :2082.18 Max. :4.00 Max. :3.000 #> observed id #> Mode :logical Min. : 1.00 diff --git a/inst/CITATION b/inst/CITATION index 25fab8f..9f3b31f 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -2,12 +2,12 @@ citHeader("To cite `territoria` in publications please use:") # begin checklist entry bibentry( bibtype = "Manual", - title = "territoria: Clustering Observations from Breeding Birds into Territoria. Version 0.0.3", + title = "territoria: Clustering Observations from Breeding Birds into Territoria. Version 0.1.0", author = c( author = c(person(given = "Thierry", family = "Onkelinx"))), - year = 2023, + year = 2025, url = "https://github.com/inbo/territoria/", abstract = "Clusters individual observations based on breeding indication and distance between observations.", - textVersion = "Onkelinx, Thierry (2023) territoria: Clustering Observations from Breeding Birds into Territoria. Version 0.0.3. https://github.com/inbo/territoria/", + textVersion = "Onkelinx, Thierry (2025) territoria: Clustering Observations from Breeding Birds into Territoria. Version 0.1.0. https://github.com/inbo/territoria/", keywords = "breeding bird; cluster", ) # end checklist entry From dff057b0d924e89e20949293ee2988c3e79040ef Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Fri, 9 May 2025 21:45:48 +0200 Subject: [PATCH 02/17] =?UTF-8?q?=E2=9C=A8=20Split=20surveys=20in=20groups?= =?UTF-8?q?=20when=20importing=20the=20observations?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DESCRIPTION | 3 ++ NAMESPACE | 6 +++ R/import_observations.R | 80 ++++++++++++++++++++++++++++++++++---- man/import_observations.Rd | 5 ++- 4 files changed, 85 insertions(+), 9 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6e24ca5..27caa0d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -12,8 +12,11 @@ Description: Clusters individual observations based on breeding indication License: GPL-3 URL: https://github.com/inbo/territoria BugReports: https://github.com/inbo/territoria/issues +Depends: R (>= 4.1.0) Imports: assertthat, + deldir, + igraph, mvtnorm, RSQLite, spatstat.geom, diff --git a/NAMESPACE b/NAMESPACE index 78aa76c..844fd00 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -20,7 +20,13 @@ importFrom(assertthat,is.flag) importFrom(assertthat,is.number) importFrom(assertthat,is.string) importFrom(assertthat,noNA) +importFrom(deldir,deldir) +importFrom(igraph,V) +importFrom(igraph,decompose) +importFrom(igraph,graph_from_data_frame) importFrom(mvtnorm,rmvnorm) importFrom(spatstat.geom,owin) importFrom(spatstat.random,rStrauss) +importFrom(stats,aggregate) importFrom(stats,rbinom) +importFrom(stats,rnorm) diff --git a/R/import_observations.R b/R/import_observations.R index 78a7d04..9df707a 100644 --- a/R/import_observations.R +++ b/R/import_observations.R @@ -3,12 +3,23 @@ #' The function overwrites any existing table with observations. #' @param observations a data.frame with the observations. #' @param max_dist maximum clustering distance in m. +#' @param threshold The minimum distance in m between groups of the same survey. +#' This is used to split surveys into different groups. #' @param conn a DBI connection to an SQLite database. #' @export #' @importFrom assertthat assert_that has_name is.number is.string noNA +#' @importFrom deldir deldir +#' @importFrom igraph decompose graph_from_data_frame V #' @importFrom RSQLite dbClearResult dbSendQuery dbWriteTable -import_observations <- function(observations, conn, max_dist = 336) { - assert_that(inherits(observations, "data.frame")) +#' @importFrom stats aggregate rnorm +import_observations <- function( + observations, conn, max_dist = 336, threshold = 3000 +) { + assert_that( + inherits(observations, "data.frame"), + is.number(max_dist), noNA(max_dist), max_dist > 0, + is.number(threshold), noNA(threshold), threshold > 0, max_dist < threshold + ) assert_that( has_name(observations, "id"), has_name(observations, "x"), has_name(observations, "y"), @@ -33,16 +44,24 @@ import_observations <- function(observations, conn, max_dist = 336) { assert_that(inherits(conn, "SQLiteConnection")) - assert_that(is.number(max_dist), max_dist > 0) - sql <- "DROP TABLE IF EXISTS distance" res <- dbSendQuery(conn, sql) dbClearResult(res) + sql <- "DROP TABLE IF EXISTS survey" + res <- dbSendQuery(conn, sql) + dbClearResult(res) + sql <- "DROP TABLE IF EXISTS observation" res <- dbSendQuery(conn, sql) dbClearResult(res) + sql <- "CREATE TABLE survey ( + id INTEGER PRIMARY KEY, original INTEGER NOT NULL +)" + res <- dbSendQuery(conn, sql) + dbClearResult(res) + sql <- "CREATE TABLE observation ( id INTEGER PRIMARY KEY, x REAL NOT NULL, y REAL NOT NULL, group_x INTEGER NOT NULL, group_y INTEGER NOT NULL, survey INTEGER NOT NULL, @@ -53,12 +72,57 @@ import_observations <- function(observations, conn, max_dist = 336) { # force observations into a data.frame to avoid problems with sf objects observations <- as.data.frame(observations) - observations$cluster <- observations$id - observations$group_x <- floor(observations$x / max_dist / 2) - observations$group_y <- floor(observations$y / max_dist / 2) + observations$original <- observations$survey + bb_min <- aggregate(cbind(x, y) ~ survey, data = observations, FUN = min) + bb_max <- aggregate(cbind(x, y) ~ survey, data = observations, FUN = max) + diagonal <- sqrt((bb_max$x - bb_min$x) ^ 2 + (bb_max$y - bb_min$y) ^ 2) + to_do <- bb_min$survey[diagonal >= threshold] + done <- observations[!observations$survey %in% to_do, ] + for (i in to_do) { + candidate <- which(observations$survey == i) + dd <- deldir( + x = rnorm(length(candidate), mean = observations$x[candidate], sd = 0.01), + y = rnorm(length(candidate), mean = observations$y[candidate], sd = 0.01), + id = observations$id[candidate] + ) + edges <- data.frame( + id1 = dd$delsgs$ind1, id2 = dd$delsgs$ind2, + length = sqrt( + (dd$delsgs$x2 - dd$delsgs$x1) ^ 2 + (dd$delsgs$y2 - dd$delsgs$y1) ^ 2 + ) + ) + if (max(edges$length) < threshold) { + done <- rbind(done, observations[candidate, ]) + next + } + edges[edges$length < threshold, ] |> + graph_from_data_frame(directed = FALSE) |> + decompose() -> sg + if (length(sg) == 1) { + done <- rbind(done, observations[candidate, ]) + next + } + for (j in seq_along(sg)) { + V(sg[[j]]) |> + names() |> + as.integer() -> relevant + extra <- observations[observations$id %in% relevant, ] + extra$survey <- j + done <- rbind(done, extra) + } + } + done$survey <- interaction(done$original, done$survey, drop = TRUE) |> + as.integer() + surveys <- unique(done[, c("survey", "original")]) + colnames(surveys) <- c("id", "original") + dbWriteTable(conn, name = "survey", append = TRUE, value = surveys) + + done$cluster <- done$id + done$group_x <- floor(done$x / max_dist / 2) + done$group_y <- floor(done$y / max_dist / 2) cols <- c("id", "x", "y", "survey", "status", "cluster", "group_x", "group_y") dbWriteTable( - conn, name = "observation", append = TRUE, value = observations[, cols] + conn, name = "observation", append = TRUE, value = done[, cols] ) sql <- "CREATE INDEX IF NOT EXISTS observation_idx ON diff --git a/man/import_observations.Rd b/man/import_observations.Rd index 388a4e0..7e53aaf 100644 --- a/man/import_observations.Rd +++ b/man/import_observations.Rd @@ -4,7 +4,7 @@ \alias{import_observations} \title{Import the observations} \usage{ -import_observations(observations, conn, max_dist = 336) +import_observations(observations, conn, max_dist = 336, threshold = 3000) } \arguments{ \item{observations}{a data.frame with the observations.} @@ -12,6 +12,9 @@ import_observations(observations, conn, max_dist = 336) \item{conn}{a DBI connection to an SQLite database.} \item{max_dist}{maximum clustering distance in m.} + +\item{threshold}{The minimum distance in m between groups of the same survey. +This is used to split surveys into different groups.} } \description{ The function overwrites any existing table with observations. From 52789f75b525fb69c9f9952079640aacffde0faf Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Sat, 10 May 2025 21:37:52 +0200 Subject: [PATCH 03/17] =?UTF-8?q?=E2=9C=A8=20Add=20a=20function=20to=20tes?= =?UTF-8?q?t=20for=20unlikely=20status=20distribution?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- NAMESPACE | 2 ++ R/import_observations.R | 4 +++ R/unlikely_status.R | 65 +++++++++++++++++++++++++++++++++++++++++ man/unlikely_status.Rd | 32 ++++++++++++++++++++ 4 files changed, 103 insertions(+) create mode 100644 R/unlikely_status.R create mode 100644 man/unlikely_status.Rd diff --git a/NAMESPACE b/NAMESPACE index 844fd00..a542c2f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,6 +6,7 @@ export(distance_matrix) export(get_cluster) export(import_observations) export(simulate_observations) +export(unlikely_status) importFrom(RSQLite,SQLite) importFrom(RSQLite,dbClearResult) importFrom(RSQLite,dbConnect) @@ -28,5 +29,6 @@ importFrom(mvtnorm,rmvnorm) importFrom(spatstat.geom,owin) importFrom(spatstat.random,rStrauss) importFrom(stats,aggregate) +importFrom(stats,binom.test) importFrom(stats,rbinom) importFrom(stats,rnorm) diff --git a/R/import_observations.R b/R/import_observations.R index 9df707a..fcb3dc6 100644 --- a/R/import_observations.R +++ b/R/import_observations.R @@ -56,6 +56,10 @@ import_observations <- function( res <- dbSendQuery(conn, sql) dbClearResult(res) + sql <- "DROP TABLE IF EXISTS unlikely" + res <- dbSendQuery(conn, sql) + dbClearResult(res) + sql <- "CREATE TABLE survey ( id INTEGER PRIMARY KEY, original INTEGER NOT NULL )" diff --git a/R/unlikely_status.R b/R/unlikely_status.R new file mode 100644 index 0000000..02849bb --- /dev/null +++ b/R/unlikely_status.R @@ -0,0 +1,65 @@ +#' Detect surveys with an unlikely status distribution +#' @inheritParams import_observations +#' @param threshold A numeric value between 0 and 1 indicating the average +#' proportion of the observations with status above or equal to the +#' `status_split `. +#' Default of 0.5. +#' @param status_split An integer values indicating which status level splits +#' the status into two groups. +#' The default is 2. +#' @param alpha A numeric value between 0 and 1 indicating the family-wise Type +#' I error. +#' Default of 0.01. +#' @return A numeric value between 0 and 1 indicating the proportion of +#' surveys with an unlikely status distribution. +#' The list of unlikely surveys is written to the database. +#' @export +#' @importFrom assertthat assert_that is.count is.number noNA +#' @importFrom RSQLite dbGetQuery dbWriteTable +#' @importFrom stats aggregate binom.test +unlikely_status <- function( + conn, threshold = 0.5, status_split = 2, alpha = 0.01 +) { + assert_that( + is.number(threshold), noNA(threshold), threshold > 0, + is.count(status_split), noNA(status_split), + is.number(alpha), noNA(alpha), alpha > 0, alpha < 1, + inherits(conn, "SQLiteConnection") + ) + sprintf( + "SELECT survey, status >= %1$i AS above, COUNT(id) AS n +FROM observation +GROUP BY survey, status >= %1$i", + status_split + ) |> + dbGetQuery(conn = conn) -> status_obs + merge( + status_obs[status_obs$above == 1, c("survey", "n")], + status_obs[status_obs$above == 0, c("survey", "n")], + by = "survey", all = TRUE + ) -> status_obs + status_obs$n.x[is.na(status_obs$n.x)] <- 0 + status_obs$n.y[is.na(status_obs$n.y)] <- 0 + status_obs[, c("n.x", "n.y")] |> + apply( + 1, threshold = threshold, + FUN = function(x, threshold) { + binom.test(x = x, alternative = "greater", p = threshold)$p.value + } + ) -> status_obs$p_value + status_obs <- status_obs[order(status_obs$p_value, -status_obs$n.y), ] + status_obs$log_p <- log(1 - status_obs$p_value) + c(0, diff(status_obs$n.x) != 0 | diff(status_obs$n.y) != 0) |> + cumsum() -> status_obs$group + status_group <- aggregate(log_p ~ group, data = status_obs, FUN = sum) + status_group$p <- 1 - exp(cumsum(status_group$log_p)) + status_group[status_group$p < alpha, c("group", "p")] |> + merge(status_obs[, c("survey", "group")], by = "group") -> unlikely + unlikely$reason <- sprintf( + "fraction status above or equal to %i greather than %.0f%%", status_split, + 100 * threshold + ) + unlikely[, c("survey", "reason", "p")] |> + dbWriteTable(conn = conn, name = "unlikely", append = TRUE) + return(nrow(unlikely) / nrow(status_obs)) +} diff --git a/man/unlikely_status.Rd b/man/unlikely_status.Rd new file mode 100644 index 0000000..9e71907 --- /dev/null +++ b/man/unlikely_status.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/unlikely_status.R +\name{unlikely_status} +\alias{unlikely_status} +\title{Detect surveys with an unlikely status distribution} +\usage{ +unlikely_status(conn, threshold = 0.5, status_split = 2, alpha = 0.01) +} +\arguments{ +\item{conn}{a DBI connection to an SQLite database.} + +\item{threshold}{A numeric value between 0 and 1 indicating the average +proportion of the observations with status above or equal to the +\code{status_split }. +Default of 0.5.} + +\item{status_split}{An integer values indicating which status level splits +the status into two groups. +The default is 2.} + +\item{alpha}{A numeric value between 0 and 1 indicating the family-wise Type +I error. +Default of 0.01.} +} +\value{ +A numeric value between 0 and 1 indicating the proportion of +surveys with an unlikely status distribution. +The list of unlikely surveys is written to the database. +} +\description{ +Detect surveys with an unlikely status distribution +} From dbf566e7fb637909e399badc6809f5c7d2b074cd Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Sun, 11 May 2025 20:16:46 +0200 Subject: [PATCH 04/17] =?UTF-8?q?=E2=9C=A8=20Add=20edge=5Fdistribution()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- NAMESPACE | 1 + R/edge_distribution.R | 40 ++++++++++++++++++++++++++++++++++++++++ inst/en_gb.dic | 1 + man/edge_distribution.Rd | 18 ++++++++++++++++++ 4 files changed, 60 insertions(+) create mode 100644 R/edge_distribution.R create mode 100644 man/edge_distribution.Rd diff --git a/NAMESPACE b/NAMESPACE index a542c2f..7f5da19 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,7 @@ export(cluster_observation) export(connect_db) export(distance_matrix) +export(edge_distribution) export(get_cluster) export(import_observations) export(simulate_observations) diff --git a/R/edge_distribution.R b/R/edge_distribution.R new file mode 100644 index 0000000..7070043 --- /dev/null +++ b/R/edge_distribution.R @@ -0,0 +1,40 @@ +#' @title Edge distribution +#' @description +#' This function return the length of the edges of the Delaunay triangulation +#' per survey. +#' The function returns only edges with length smaller than twice `max_dist`. +#' @inheritParams import_observations +#' @importFrom assertthat assert_that is.number noNA +#' @importFrom deldir deldir +#' @importFrom RSQLite dbGetQuery +#' @export +edge_distribution <- function(conn, max_dist = 336) { + assert_that( + inherits(conn, "SQLiteConnection"), + is.number(max_dist), noNA(max_dist), max_dist > 0 + ) + "SELECT survey, x, y FROM observation" |> + dbGetQuery(conn = conn) -> observations + table(observations$survey) |> + as.data.frame() -> n_survey + n_survey[n_survey$Freq > 1, ] |> + merge(x = observations, by.y = "Var1", by.x = "survey") -> observations + edges <- data.frame(survey = integer(0), length = numeric(0)) + for (i in unique(observations$survey)) { + candidate <- which(observations$survey == i) + dd <- deldir( + x = rnorm(length(candidate), mean = observations$x[candidate], sd = 0.01), + y = rnorm(length(candidate), mean = observations$y[candidate], sd = 0.01), + id = observations$id[candidate] + ) + extra <- data.frame( + survey = i, + length = sqrt( + (dd$delsgs$x2 - dd$delsgs$x1) ^ 2 + (dd$delsgs$y2 - dd$delsgs$y1) ^ 2 + ) + ) + extra[extra$length <= 2 * max_dist, ] |> + rbind(edges) -> edges + } + return(edges) +} diff --git a/inst/en_gb.dic b/inst/en_gb.dic index 700598b..3703ec6 100644 --- a/inst/en_gb.dic +++ b/inst/en_gb.dic @@ -1 +1,2 @@ +Delaunay SQLite diff --git a/man/edge_distribution.Rd b/man/edge_distribution.Rd new file mode 100644 index 0000000..a3a3367 --- /dev/null +++ b/man/edge_distribution.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edge_distribution.R +\name{edge_distribution} +\alias{edge_distribution} +\title{Edge distribution} +\usage{ +edge_distribution(conn, max_dist = 336) +} +\arguments{ +\item{conn}{a DBI connection to an SQLite database.} + +\item{max_dist}{maximum clustering distance in m.} +} +\description{ +This function return the length of the edges of the Delaunay triangulation +per survey. +The function returns only edges with length smaller than twice \code{max_dist}. +} From 45bfc247abe236f417c8de810ab2188039531735 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Sun, 11 May 2025 21:17:18 +0200 Subject: [PATCH 05/17] =?UTF-8?q?=E2=9C=A8=20Add=20unlikely=5Fedge=5Fdistr?= =?UTF-8?q?ibution()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- NAMESPACE | 2 ++ R/unlikely_edge_distribution.R | 45 +++++++++++++++++++++++++++++++ man/unlikely_edge_distribution.Rd | 28 +++++++++++++++++++ 3 files changed, 75 insertions(+) create mode 100644 R/unlikely_edge_distribution.R create mode 100644 man/unlikely_edge_distribution.Rd diff --git a/NAMESPACE b/NAMESPACE index 7f5da19..4e94f37 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,6 +7,7 @@ export(edge_distribution) export(get_cluster) export(import_observations) export(simulate_observations) +export(unlikely_edge_distribution) export(unlikely_status) importFrom(RSQLite,SQLite) importFrom(RSQLite,dbClearResult) @@ -33,3 +34,4 @@ importFrom(stats,aggregate) importFrom(stats,binom.test) importFrom(stats,rbinom) importFrom(stats,rnorm) +importFrom(stats,wilcox.test) diff --git a/R/unlikely_edge_distribution.R b/R/unlikely_edge_distribution.R new file mode 100644 index 0000000..7a4f36a --- /dev/null +++ b/R/unlikely_edge_distribution.R @@ -0,0 +1,45 @@ +#' Check for unlikely distribution of Delaunay edges +#' +#' This function compares the distribution of Delaunay edges per survey in the +#' `conn` database with the distribution of Delaunay edges of all surveys in the +#' `conn_reference` database. +#' It uses a Wilcoxon test to check if the distribution of edges is +#' significantly different. +#' The function returns the proportion of surveys with an unlikely distribution +#' and writes the list of unlikely surveys to the database. +#' @inheritParams import_observations +#' @inheritParams unlikely_status +#' @param conn_reference a connection to the reference database. +#' @export +#' @importFrom assertthat assert_that is.number noNA +#' @importFrom RSQLite dbWriteTable +#' @importFrom stats wilcox.test +unlikely_edge_distribution <- function( + conn, conn_reference, max_dist = 336, alpha = 0.01 +) { + assert_that(is.number(alpha), noNA(alpha), alpha > 0, alpha < 1) + edges <- edge_distribution(conn = conn, max_dist = max_dist) + reference <- edge_distribution(conn = conn_reference, max_dist = max_dist) + survey <- unique(edges$survey) + p <- vapply( + survey, FUN.VALUE = numeric(1), edges = edges, reference = reference, + FUN = function(i, edges, reference) { + wilcox.test( + x = edges$length[edges$survey == i], y = reference$length + )$p.value + } + ) + dist_test <- data.frame(survey = survey, p_value = p) + dist_test <- dist_test[order(dist_test$p_value), ] + dist_test$log_p <- log(1 - dist_test$p_value) + c(0, diff(dist_test$p_value) > 0) |> + cumsum() -> dist_test$group + dist_group <- aggregate(log_p ~ group, data = dist_test, FUN = sum) + dist_group$p <- 1 - exp(cumsum(dist_group$log_p)) + dist_group[dist_group$p < alpha, c("group", "p")] |> + merge(dist_test[, c("survey", "group")], by = "group") -> unlikely + unlikely$reason <- "Difference in distribution of Delaunay edges" + unlikely[, c("survey", "reason", "p")] |> + dbWriteTable(conn = conn, name = "unlikely", append = TRUE) + return(nrow(unlikely) / nrow(dist_test)) +} diff --git a/man/unlikely_edge_distribution.Rd b/man/unlikely_edge_distribution.Rd new file mode 100644 index 0000000..82d6a58 --- /dev/null +++ b/man/unlikely_edge_distribution.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/unlikely_edge_distribution.R +\name{unlikely_edge_distribution} +\alias{unlikely_edge_distribution} +\title{Check for unlikely distribution of Delaunay edges} +\usage{ +unlikely_edge_distribution(conn, conn_reference, max_dist = 336, alpha = 0.01) +} +\arguments{ +\item{conn}{a DBI connection to an SQLite database.} + +\item{conn_reference}{a connection to the reference database.} + +\item{max_dist}{maximum clustering distance in m.} + +\item{alpha}{A numeric value between 0 and 1 indicating the family-wise Type +I error. +Default of 0.01.} +} +\description{ +This function compares the distribution of Delaunay edges per survey in the +\code{conn} database with the distribution of Delaunay edges of all surveys in the +\code{conn_reference} database. +It uses a Wilcoxon test to check if the distribution of edges is +significantly different. +The function returns the proportion of surveys with an unlikely distribution +and writes the list of unlikely surveys to the database. +} From addeb1d1d8a706e5bc357213474304c012754765 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Mon, 12 May 2025 09:31:07 +0200 Subject: [PATCH 06/17] =?UTF-8?q?=E2=9C=A8=20Add=20user=20and=20region=20w?= =?UTF-8?q?hen=20importing=20observations?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- R/import_observations.R | 42 +++++++++++++++++++++++++++++++-------- R/simulate_observations.R | 2 ++ README.md | 14 ++++++------- 3 files changed, 43 insertions(+), 15 deletions(-) diff --git a/R/import_observations.R b/R/import_observations.R index fcb3dc6..2fd1d0e 100644 --- a/R/import_observations.R +++ b/R/import_observations.R @@ -21,21 +21,30 @@ import_observations <- function( is.number(threshold), noNA(threshold), threshold > 0, max_dist < threshold ) assert_that( - has_name(observations, "id"), + has_name(observations, "id"), has_name(observations, "user"), has_name(observations, "x"), has_name(observations, "y"), - has_name(observations, "survey"), has_name(observations, "status") + has_name(observations, "survey"), has_name(observations, "status"), + has_name(observations, "region") ) assert_that(is.numeric(observations$x), is.numeric(observations$y)) assert_that( noNA(observations$id), noNA(observations$x), noNA(observations$y), - noNA(observations$survey), noNA(observations$status) + noNA(observations$survey), noNA(observations$status), + noNA(observations$user) ) observations$id <- make_integer(observations$id) observations$survey <- make_integer(observations$survey) observations$status <- make_integer(observations$status) + observations$user <- make_integer(observations$user) + observations$region <- make_integer(observations$region) assert_that( anyDuplicated(observations$id) == 0, msg = "duplicate values in id" ) + unique_combo <- unique(observations[, c("survey", "user")]) + assert_that( + anyDuplicated(unique_combo) == 0, msg = "`survey` with multiple `user`" + ) + diagonal <- diff(range(observations$x)) ^ 2 + diff(range(observations$y)) ^ 2 assert_that( max_dist < sqrt(diagonal), @@ -61,7 +70,7 @@ import_observations <- function( dbClearResult(res) sql <- "CREATE TABLE survey ( - id INTEGER PRIMARY KEY, original INTEGER NOT NULL + id INTEGER PRIMARY KEY, original INTEGER NOT NULL, user INTEGER NOT NULL )" res <- dbSendQuery(conn, sql) dbClearResult(res) @@ -69,43 +78,53 @@ import_observations <- function( sql <- "CREATE TABLE observation ( id INTEGER PRIMARY KEY, x REAL NOT NULL, y REAL NOT NULL, group_x INTEGER NOT NULL, group_y INTEGER NOT NULL, survey INTEGER NOT NULL, - status INTEGER NOT NULL, cluster INTEGER NOT NULL)" + status INTEGER NOT NULL, cluster INTEGER NOT NULL, region INTEGER +)" res <- dbSendQuery(conn, sql) dbClearResult(res) # force observations into a data.frame to avoid problems with sf objects observations <- as.data.frame(observations) + # splits surveys into clearly distinct groups observations$original <- observations$survey + # surveys with a bounding box diagonal smaller that the threshold are OK bb_min <- aggregate(cbind(x, y) ~ survey, data = observations, FUN = min) bb_max <- aggregate(cbind(x, y) ~ survey, data = observations, FUN = max) diagonal <- sqrt((bb_max$x - bb_min$x) ^ 2 + (bb_max$y - bb_min$y) ^ 2) to_do <- bb_min$survey[diagonal >= threshold] done <- observations[!observations$survey %in% to_do, ] + # handle surveys with a bounding box diagonal larger than the threshold for (i in to_do) { candidate <- which(observations$survey == i) + # make Delaunay triangulation dd <- deldir( x = rnorm(length(candidate), mean = observations$x[candidate], sd = 0.01), y = rnorm(length(candidate), mean = observations$y[candidate], sd = 0.01), id = observations$id[candidate] ) + # calculate length of edges edges <- data.frame( id1 = dd$delsgs$ind1, id2 = dd$delsgs$ind2, length = sqrt( (dd$delsgs$x2 - dd$delsgs$x1) ^ 2 + (dd$delsgs$y2 - dd$delsgs$y1) ^ 2 ) ) + # do nothing when every edge is smaller than the threshold if (max(edges$length) < threshold) { done <- rbind(done, observations[candidate, ]) next } + # remove edges larger than the threshold and decompose the graph edges[edges$length < threshold, ] |> graph_from_data_frame(directed = FALSE) |> decompose() -> sg + # do nothing when there is this one graph if (length(sg) == 1) { done <- rbind(done, observations[candidate, ]) next } + # split the survey into groups when there are multiple graphs for (j in seq_along(sg)) { V(sg[[j]]) |> names() |> @@ -117,18 +136,25 @@ import_observations <- function( } done$survey <- interaction(done$original, done$survey, drop = TRUE) |> as.integer() - surveys <- unique(done[, c("survey", "original")]) - colnames(surveys) <- c("id", "original") + + # store the original survey id and user id + surveys <- unique(done[, c("survey", "original", "user")]) + colnames(surveys) <- c("id", "original", "user") dbWriteTable(conn, name = "survey", append = TRUE, value = surveys) + # store the observations with new survey id done$cluster <- done$id done$group_x <- floor(done$x / max_dist / 2) done$group_y <- floor(done$y / max_dist / 2) - cols <- c("id", "x", "y", "survey", "status", "cluster", "group_x", "group_y") + cols <- c( + "id", "x", "y", "survey", "status", "cluster", "group_x", "group_y", + "region" + ) dbWriteTable( conn, name = "observation", append = TRUE, value = done[, cols] ) + # create an index on the observation table sql <- "CREATE INDEX IF NOT EXISTS observation_idx ON observation (group_x, group_y)" res <- dbSendQuery(conn, sql) diff --git a/R/simulate_observations.R b/R/simulate_observations.R index fd35fa1..c2e6994 100644 --- a/R/simulate_observations.R +++ b/R/simulate_observations.R @@ -43,5 +43,7 @@ simulate_observations <- function( nrow(observations), size = 1, prob = p_detection ) == 1 observations$id <- seq_along(observations$x) + observations$user <- 1 + observations$region <- 1 return(list(observations = observations, centroids = centroids)) } diff --git a/README.md b/README.md index c042e62..08e4372 100644 --- a/README.md +++ b/README.md @@ -70,13 +70,13 @@ summary(obs$observations) #> Mean : 946.40 Mean :1238.04 Mean :2.50 Mean :2.096 #> 3rd Qu.:1362.48 3rd Qu.:1764.89 3rd Qu.:3.25 3rd Qu.:3.000 #> Max. :2012.23 Max. :2082.18 Max. :4.00 Max. :3.000 -#> observed id -#> Mode :logical Min. : 1.00 -#> FALSE:44 1st Qu.: 26.75 -#> TRUE :60 Median : 52.50 -#> Mean : 52.50 -#> 3rd Qu.: 78.25 -#> Max. :104.00 +#> observed id user region +#> Mode :logical Min. : 1.00 Min. :1 Min. :1 +#> FALSE:44 1st Qu.: 26.75 1st Qu.:1 1st Qu.:1 +#> TRUE :60 Median : 52.50 Median :1 Median :1 +#> Mean : 52.50 Mean :1 Mean :1 +#> 3rd Qu.: 78.25 3rd Qu.:1 3rd Qu.:1 +#> Max. :104.00 Max. :1 Max. :1 obs <- obs$observations[obs$observations$observed, ] ``` From 2c2efb46854344139a1d94b21040cc8170d39949 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Mon, 12 May 2025 10:47:11 +0200 Subject: [PATCH 07/17] =?UTF-8?q?=F0=9F=90=9B=20Keep=20singletons=20when?= =?UTF-8?q?=20splitting=20surveys=20into=20groups?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- R/import_observations.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/R/import_observations.R b/R/import_observations.R index 2fd1d0e..f9f3304 100644 --- a/R/import_observations.R +++ b/R/import_observations.R @@ -117,6 +117,12 @@ import_observations <- function( } # remove edges larger than the threshold and decompose the graph edges[edges$length < threshold, ] |> + rbind( + data.frame( + id1 = observations$id[candidate], id2 = observations$id[candidate], + length = 0 + ) + ) |> graph_from_data_frame(directed = FALSE) |> decompose() -> sg # do nothing when there is this one graph From e5532a8f6bfceaa92019a1cc268b41b95507e47b Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Mon, 12 May 2025 13:31:20 +0200 Subject: [PATCH 08/17] =?UTF-8?q?=E2=9C=A8=20Add=20function=20to=20detect?= =?UTF-8?q?=20surveys=20with=20an=20unlikely=20large=20area?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DESCRIPTION | 6 +++- NAMESPACE | 11 +++++++ R/import_observations.R | 25 +++++++++------- R/unlikely_survey_area.R | 57 +++++++++++++++++++++++++++++++++++++ man/import_observations.Rd | 9 ++++-- man/unlikely_survey_area.Rd | 39 +++++++++++++++++++++++++ 6 files changed, 132 insertions(+), 15 deletions(-) create mode 100644 R/unlikely_survey_area.R create mode 100644 man/unlikely_survey_area.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 27caa0d..828cbf9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -12,13 +12,17 @@ Description: Clusters individual observations based on breeding indication License: GPL-3 URL: https://github.com/inbo/territoria BugReports: https://github.com/inbo/territoria/issues -Depends: R (>= 4.1.0) +Depends: + R (>= 4.1.0) Imports: assertthat, deldir, + dplyr, igraph, mvtnorm, + rlang, RSQLite, + sf, spatstat.geom, spatstat.random Config/checklist/communities: inbo diff --git a/NAMESPACE b/NAMESPACE index 4e94f37..fe7580e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,6 +9,7 @@ export(import_observations) export(simulate_observations) export(unlikely_edge_distribution) export(unlikely_status) +export(unlikely_survey_area) importFrom(RSQLite,SQLite) importFrom(RSQLite,dbClearResult) importFrom(RSQLite,dbConnect) @@ -24,10 +25,20 @@ importFrom(assertthat,is.number) importFrom(assertthat,is.string) importFrom(assertthat,noNA) importFrom(deldir,deldir) +importFrom(dplyr,group_by) +importFrom(dplyr,mutate) +importFrom(dplyr,summarise) importFrom(igraph,V) importFrom(igraph,decompose) importFrom(igraph,graph_from_data_frame) importFrom(mvtnorm,rmvnorm) +importFrom(rlang,.data) +importFrom(sf,st_area) +importFrom(sf,st_as_sf) +importFrom(sf,st_buffer) +importFrom(sf,st_concave_hull) +importFrom(sf,st_intersection) +importFrom(sf,st_union) importFrom(spatstat.geom,owin) importFrom(spatstat.random,rStrauss) importFrom(stats,aggregate) diff --git a/R/import_observations.R b/R/import_observations.R index f9f3304..d5b0139 100644 --- a/R/import_observations.R +++ b/R/import_observations.R @@ -3,8 +3,11 @@ #' The function overwrites any existing table with observations. #' @param observations a data.frame with the observations. #' @param max_dist maximum clustering distance in m. -#' @param threshold The minimum distance in m between groups of the same survey. -#' This is used to split surveys into different groups. +#' @param max_edge The maximum edge length in m. +#' We apply a Delaunay triangulation to the observations per survey. +#' After removing the edges larger than `max_edge`, we see which observations +#' result in a connected graph. +#' Every connected graph is assigned a new survey id. #' @param conn a DBI connection to an SQLite database. #' @export #' @importFrom assertthat assert_that has_name is.number is.string noNA @@ -13,12 +16,12 @@ #' @importFrom RSQLite dbClearResult dbSendQuery dbWriteTable #' @importFrom stats aggregate rnorm import_observations <- function( - observations, conn, max_dist = 336, threshold = 3000 + observations, conn, max_dist = 336, max_edge = 1500 ) { assert_that( inherits(observations, "data.frame"), is.number(max_dist), noNA(max_dist), max_dist > 0, - is.number(threshold), noNA(threshold), threshold > 0, max_dist < threshold + is.number(max_edge), noNA(max_edge), max_edge > 0, max_dist < max_edge ) assert_that( has_name(observations, "id"), has_name(observations, "user"), @@ -88,13 +91,13 @@ import_observations <- function( # splits surveys into clearly distinct groups observations$original <- observations$survey - # surveys with a bounding box diagonal smaller that the threshold are OK + # surveys with a bounding box diagonal smaller that the max_edge are OK bb_min <- aggregate(cbind(x, y) ~ survey, data = observations, FUN = min) bb_max <- aggregate(cbind(x, y) ~ survey, data = observations, FUN = max) diagonal <- sqrt((bb_max$x - bb_min$x) ^ 2 + (bb_max$y - bb_min$y) ^ 2) - to_do <- bb_min$survey[diagonal >= threshold] + to_do <- bb_min$survey[diagonal >= max_edge] done <- observations[!observations$survey %in% to_do, ] - # handle surveys with a bounding box diagonal larger than the threshold + # handle surveys with a bounding box diagonal larger than the max_edge for (i in to_do) { candidate <- which(observations$survey == i) # make Delaunay triangulation @@ -110,13 +113,13 @@ import_observations <- function( (dd$delsgs$x2 - dd$delsgs$x1) ^ 2 + (dd$delsgs$y2 - dd$delsgs$y1) ^ 2 ) ) - # do nothing when every edge is smaller than the threshold - if (max(edges$length) < threshold) { + # do nothing when every edge is smaller than the max_edge + if (max(edges$length) < max_edge) { done <- rbind(done, observations[candidate, ]) next } - # remove edges larger than the threshold and decompose the graph - edges[edges$length < threshold, ] |> + # remove edges larger than the max_edge and decompose the graph + edges[edges$length < max_edge, ] |> rbind( data.frame( id1 = observations$id[candidate], id2 = observations$id[candidate], diff --git a/R/unlikely_survey_area.R b/R/unlikely_survey_area.R new file mode 100644 index 0000000..b4fd54d --- /dev/null +++ b/R/unlikely_survey_area.R @@ -0,0 +1,57 @@ +#' Detect surveys with an unlikely surveyed area +#' +#' First we create a Delaunay triangulation of the survey area. +#' Then we ignore all triangles with an edge larger than `max_edge`. +#' Finally we calculate the area of the remaining triangles. +#' When the sum of the area of the remaining triangles is larger than +#' `max_area`, we consider the survey area unlikely. +#' @inheritParams import_observations +#' @inheritParams sf::st_as_sf +#' @inheritParams sf::st_buffer +#' @inheritParams sf::st_concave_hull +#' @param max_area the maximum area of the concave hull. +#' @importFrom assertthat assert_that is.number noNA +#' @importFrom dplyr group_by mutate summarise +#' @importFrom RSQLite dbGetQuery dbWriteTable +#' @importFrom rlang .data +#' @importFrom sf st_area st_as_sf st_buffer st_concave_hull st_intersection +#' st_union +#' @export +unlikely_survey_area <- function( + conn, relevant_area, max_area = 3e6, dist = 50, crs = 31370, ratio = 0.95 +) { + assert_that( + inherits(conn, "SQLiteConnection"), inherits(relevant_area, "sf"), + is.number(ratio), noNA(ratio), 0 <= ratio, ratio <= 1, + is.number(dist), noNA(dist), dist > 0, + is.number(max_area), noNA(max_area), max_area > 0 + ) + "SELECT survey, x, y FROM observation" |> + dbGetQuery(conn = conn) |> + st_as_sf(coords = c("x", "y"), crs = crs) |> + group_by(.data$survey) |> + summarise( + geometry = st_union(.data$geometry) |> + st_buffer(dist = dist) |> + st_concave_hull(ratio = ratio), + .groups = "drop" + ) |> + st_intersection(relevant_area) |> + group_by(.data$survey) |> + summarise(geometry = st_union(.data$geometry), .groups = "drop") |> + mutate( + area = st_area(.data$geometry) |> + as.numeric() + ) -> survey + survey |> + st_drop_geometry() |> + filter(.data$area >= max_area) |> + transmute( + .data$survey, value = .data$area, + reason = sprintf( + "Area of relevant concave hull larger than %.0f ha", max_area / 1e4 + ) + ) -> unlikely + dbWriteTable(conn = conn, name = "unlikely", value = unlikely, append = TRUE) + return(nrow(unlikely) / nrow(survey)) +} diff --git a/man/import_observations.Rd b/man/import_observations.Rd index 7e53aaf..8cb6b91 100644 --- a/man/import_observations.Rd +++ b/man/import_observations.Rd @@ -4,7 +4,7 @@ \alias{import_observations} \title{Import the observations} \usage{ -import_observations(observations, conn, max_dist = 336, threshold = 3000) +import_observations(observations, conn, max_dist = 336, max_edge = 1500) } \arguments{ \item{observations}{a data.frame with the observations.} @@ -13,8 +13,11 @@ import_observations(observations, conn, max_dist = 336, threshold = 3000) \item{max_dist}{maximum clustering distance in m.} -\item{threshold}{The minimum distance in m between groups of the same survey. -This is used to split surveys into different groups.} +\item{max_edge}{The maximum edge length in m. +We apply a Delaunay triangulation to the observations per survey. +After removing the edges larger than \code{max_edge}, we see which observations +result in a connected graph. +Every connected graph is assigned a new survey id.} } \description{ The function overwrites any existing table with observations. diff --git a/man/unlikely_survey_area.Rd b/man/unlikely_survey_area.Rd new file mode 100644 index 0000000..0653949 --- /dev/null +++ b/man/unlikely_survey_area.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/unlikely_survey_area.R +\name{unlikely_survey_area} +\alias{unlikely_survey_area} +\title{Detect surveys with an unlikely surveyed area} +\usage{ +unlikely_survey_area( + conn, + relevant_area, + max_area = 3e+06, + dist = 50, + crs = 31370, + ratio = 0.95 +) +} +\arguments{ +\item{conn}{a DBI connection to an SQLite database.} + +\item{max_area}{the maximum area of the concave hull.} + +\item{dist}{numeric or object of class \code{units}; buffer distance(s) for all, or for each of the elements in \code{x}. +In case \code{x} has geodetic coordinates (lon/lat) and \code{sf_use_s2()} is \code{TRUE}, a numeric +\code{dist} is taken as distance in meters and a \code{units} object in \code{dist} is converted to meters. +In case \code{x} has geodetic coordinates (lon/lat) and \code{sf_use_s2()} is \code{FALSE}, a numeric +\code{dist} is taken as degrees, and a \code{units} object in \code{dist} is converted to \code{arc_degree} (and warnings are issued). +In case \code{x} does not have geodetic coordinates (projected) then +numeric \code{dist} is assumed to have the units of the coordinates, and a \code{units} \code{dist} is converted to those if \code{st_crs(x)} is not \code{NA}.} + +\item{crs}{coordinate reference system to be assigned; object of class \code{crs}} + +\item{ratio}{numeric; fraction convex: 1 returns the convex hulls, 0 maximally concave hulls} +} +\description{ +First we create a Delaunay triangulation of the survey area. +Then we ignore all triangles with an edge larger than \code{max_edge}. +Finally we calculate the area of the remaining triangles. +When the sum of the area of the remaining triangles is larger than +\code{max_area}, we consider the survey area unlikely. +} From fad65f68eb53eda3ad3e29667974505b5e5ef039 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Wed, 21 May 2025 11:31:48 +0200 Subject: [PATCH 09/17] =?UTF-8?q?=F0=9F=8E=A8=20Rename=20p=20field=20to=20?= =?UTF-8?q?value=20in=20unlikely=20table?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- R/import_observations.R | 6 ++++++ R/unlikely_edge_distribution.R | 6 +++--- R/unlikely_status.R | 6 +++--- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/R/import_observations.R b/R/import_observations.R index d5b0139..d96b93c 100644 --- a/R/import_observations.R +++ b/R/import_observations.R @@ -78,6 +78,12 @@ import_observations <- function( res <- dbSendQuery(conn, sql) dbClearResult(res) + sql <- "CREATE TABLE unlikely ( + survey INTEGER NOT NULL, reason character NOT NULL, value NUMERIC NOT NULL +)" + res <- dbSendQuery(conn, sql) + dbClearResult(res) + sql <- "CREATE TABLE observation ( id INTEGER PRIMARY KEY, x REAL NOT NULL, y REAL NOT NULL, group_x INTEGER NOT NULL, group_y INTEGER NOT NULL, survey INTEGER NOT NULL, diff --git a/R/unlikely_edge_distribution.R b/R/unlikely_edge_distribution.R index 7a4f36a..a8679ca 100644 --- a/R/unlikely_edge_distribution.R +++ b/R/unlikely_edge_distribution.R @@ -35,11 +35,11 @@ unlikely_edge_distribution <- function( c(0, diff(dist_test$p_value) > 0) |> cumsum() -> dist_test$group dist_group <- aggregate(log_p ~ group, data = dist_test, FUN = sum) - dist_group$p <- 1 - exp(cumsum(dist_group$log_p)) - dist_group[dist_group$p < alpha, c("group", "p")] |> + dist_group$value <- 1 - exp(cumsum(dist_group$log_p)) + dist_group[dist_group$value < alpha, c("group", "value")] |> merge(dist_test[, c("survey", "group")], by = "group") -> unlikely unlikely$reason <- "Difference in distribution of Delaunay edges" - unlikely[, c("survey", "reason", "p")] |> + unlikely[, c("survey", "reason", "value")] |> dbWriteTable(conn = conn, name = "unlikely", append = TRUE) return(nrow(unlikely) / nrow(dist_test)) } diff --git a/R/unlikely_status.R b/R/unlikely_status.R index 02849bb..8bff3bd 100644 --- a/R/unlikely_status.R +++ b/R/unlikely_status.R @@ -52,14 +52,14 @@ GROUP BY survey, status >= %1$i", c(0, diff(status_obs$n.x) != 0 | diff(status_obs$n.y) != 0) |> cumsum() -> status_obs$group status_group <- aggregate(log_p ~ group, data = status_obs, FUN = sum) - status_group$p <- 1 - exp(cumsum(status_group$log_p)) - status_group[status_group$p < alpha, c("group", "p")] |> + status_group$value <- 1 - exp(cumsum(status_group$log_p)) + status_group[status_group$value < alpha, c("group", "value")] |> merge(status_obs[, c("survey", "group")], by = "group") -> unlikely unlikely$reason <- sprintf( "fraction status above or equal to %i greather than %.0f%%", status_split, 100 * threshold ) - unlikely[, c("survey", "reason", "p")] |> + unlikely[, c("survey", "reason", "value")] |> dbWriteTable(conn = conn, name = "unlikely", append = TRUE) return(nrow(unlikely) / nrow(status_obs)) } From 7faa1973109a84c016fedbaf6351f117b1205b96 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Wed, 21 May 2025 14:25:08 +0200 Subject: [PATCH 10/17] =?UTF-8?q?=E2=9C=A8=20Add=20get=5Ftotal()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- NAMESPACE | 1 + R/distance_matrix.R | 8 ++++--- R/get_total.R | 52 +++++++++++++++++++++++++++++++++++++++++++++ man/get_total.Rd | 31 +++++++++++++++++++++++++++ 4 files changed, 89 insertions(+), 3 deletions(-) create mode 100644 R/get_total.R create mode 100644 man/get_total.Rd diff --git a/NAMESPACE b/NAMESPACE index fe7580e..b3fe0e1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,6 +5,7 @@ export(connect_db) export(distance_matrix) export(edge_distribution) export(get_cluster) +export(get_total) export(import_observations) export(simulate_observations) export(unlikely_edge_distribution) diff --git a/R/distance_matrix.R b/R/distance_matrix.R index d28756c..180dc9c 100644 --- a/R/distance_matrix.R +++ b/R/distance_matrix.R @@ -27,9 +27,11 @@ ON distance (distance)" sql <- sprintf( "WITH cte_obs AS ( SELECT - id, x, y, group_x, group_y, group_x + 1 AS group_xp, - group_y + 1 AS group_yp, group_y - 1 AS group_ym - FROM observation + o.id, o.x, o.y, o.group_x, o.group_y, o.group_x + 1 AS group_xp, + o.group_y + 1 AS group_yp, o.group_y - 1 AS group_ym + FROM observation AS o + LEFT JOIN unlikely AS u ON o.survey = u.survey + WHERE u.value IS NULL ), cte_distance AS ( SELECT diff --git a/R/get_total.R b/R/get_total.R new file mode 100644 index 0000000..307faa7 --- /dev/null +++ b/R/get_total.R @@ -0,0 +1,52 @@ +#' Get the total number of individuals per region +#' Returns the sum of `value` per region. +#' Clusters spanning more than one region get each a fraction of `value` based +#' on the number of observations of the cluster in a region divided by the total +#' number of observations in the cluster. +#' @inheritParams import_observations +#' @param value A string representing the value per cluster to be calculated. +#' Default is `"IIF(max_status >= 2, 1, iif(n_total > 1, 0.5, 0))"`. +#' Available variables are +#' - `max_status`: the highest status of a cluster. +#' - `n_total`: the total number of observations per cluster +#' - `n_region`: the number of observations in a cluster from the same region. +#' @export +#' @importFrom assertthat assert_that is.string noNA +#' @importFrom RSQLite dbGetQuery +get_total <- function( + conn, value = "IIF(max_status >= 2, 1, iif(n_total > 1, 0.5, 0))" +) { + assert_that( + inherits(conn, "SQLiteConnection"), is.string(value), noNA(value) + ) + sprintf( + "WITH cte_obs AS ( + SELECT o.region, o.cluster, o.status + FROM observation AS o + LEFT JOIN unlikely AS u ON o.survey = u.survey + WHERE u.value IS NULL +), +cte_region AS ( + SELECT cluster, region, COUNT(cluster) AS n_region + FROM cte_obs + WHERE region IS NOT NULL + GROUP BY cluster, region +), +cte_cluster AS ( + SELECT cluster, MAX(status) AS max_status, COUNT(status) AS n_total + FROM cte_obs + GROUP BY cluster +), +cte AS ( + SELECT + r.region, c.cluster, 1.0 * r.n_region / c.n_total AS fraction, + %s AS value + FROM cte_region AS r + INNER JOIN cte_cluster AS c ON r.cluster = c.cluster +) + +SELECT region, SUM(value * fraction) AS individuals FROM cte GROUP BY region", + value + ) |> + dbGetQuery(conn = conn) +} diff --git a/man/get_total.Rd b/man/get_total.Rd new file mode 100644 index 0000000..91a20a9 --- /dev/null +++ b/man/get_total.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_total.R +\name{get_total} +\alias{get_total} +\title{Get the total number of individuals per region +Returns the sum of \code{value} per region. +Clusters spanning more than one region get each a fraction of \code{value} based +on the number of observations of the cluster in a region divided by the total +number of observations in the cluster.} +\usage{ +get_total(conn, value = "IIF(max_status >= 2, 1, iif(n_total > 1, 0.5, 0))") +} +\arguments{ +\item{conn}{a DBI connection to an SQLite database.} + +\item{value}{A string representing the value per cluster to be calculated. +Default is \code{"IIF(max_status >= 2, 1, iif(n_total > 1, 0.5, 0))"}. +Available variables are +\itemize{ +\item \code{max_status}: the highest status of a cluster. +\item \code{n_total}: the total number of observations per cluster +\item \code{n_region}: the number of observations in a cluster from the same region. +}} +} +\description{ +Get the total number of individuals per region +Returns the sum of \code{value} per region. +Clusters spanning more than one region get each a fraction of \code{value} based +on the number of observations of the cluster in a region divided by the total +number of observations in the cluster. +} From ff9dbcc6023983c65164339929957b48734e25eb Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Wed, 21 May 2025 15:08:24 +0200 Subject: [PATCH 11/17] =?UTF-8?q?=E2=9C=A8=20Add=20get=5Funlikely=5Fsummar?= =?UTF-8?q?y()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- NAMESPACE | 1 + R/get_unlikely.R | 22 ++++++++++++++++++++++ man/get_unlikely_summary.Rd | 14 ++++++++++++++ 3 files changed, 37 insertions(+) create mode 100644 R/get_unlikely.R create mode 100644 man/get_unlikely_summary.Rd diff --git a/NAMESPACE b/NAMESPACE index b3fe0e1..76d4742 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,6 +6,7 @@ export(distance_matrix) export(edge_distribution) export(get_cluster) export(get_total) +export(get_unlikely_summary) export(import_observations) export(simulate_observations) export(unlikely_edge_distribution) diff --git a/R/get_unlikely.R b/R/get_unlikely.R new file mode 100644 index 0000000..73229ed --- /dev/null +++ b/R/get_unlikely.R @@ -0,0 +1,22 @@ +#' Get an overview of the unlikely reasons for each survey and region +#' @inheritParams import_observations +#' @export +#' @importFrom assertthat assert_that +#' @importFrom RSQLite dbGetQuery +get_unlikely_summary <- function(conn = conn) { + assert_that(inherits(conn, "SQLiteConnection")) + "WITH cte AS ( + SELECT survey, region, COUNT(id) n + FROM observation + WHERE region IS NOT NULL + GROUP BY survey, region +), +cte_survey AS ( + SELECT survey, region FROM cte GROUP BY survey HAVING n = MAX(n) +) +SELECT c.region, c.survey, u.reason +FROM cte_survey AS c +INNER JOIN unlikely AS u ON c.survey = u.survey +ORDER BY c.region, c.survey" |> + dbGetQuery(conn = conn) +} diff --git a/man/get_unlikely_summary.Rd b/man/get_unlikely_summary.Rd new file mode 100644 index 0000000..b72b5db --- /dev/null +++ b/man/get_unlikely_summary.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_unlikely.R +\name{get_unlikely_summary} +\alias{get_unlikely_summary} +\title{Get an overview of the unlikely reasons for each survey and region} +\usage{ +get_unlikely_summary(conn = conn) +} +\arguments{ +\item{conn}{a DBI connection to an SQLite database.} +} +\description{ +Get an overview of the unlikely reasons for each survey and region +} From 09f42920ccfe911bf41b5f6cad658de39a8b3e7f Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Wed, 21 May 2025 15:26:49 +0200 Subject: [PATCH 12/17] =?UTF-8?q?=E2=9C=A8=20Add=20get=5Funlikely=5Fobserv?= =?UTF-8?q?ation()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- NAMESPACE | 1 + R/get_unlikely.R | 16 ++++++++++++++++ man/get_unlikely_observation.Rd | 14 ++++++++++++++ 3 files changed, 31 insertions(+) create mode 100644 man/get_unlikely_observation.Rd diff --git a/NAMESPACE b/NAMESPACE index 76d4742..219f79c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,6 +6,7 @@ export(distance_matrix) export(edge_distribution) export(get_cluster) export(get_total) +export(get_unlikely_observation) export(get_unlikely_summary) export(import_observations) export(simulate_observations) diff --git a/R/get_unlikely.R b/R/get_unlikely.R index 73229ed..a0706d7 100644 --- a/R/get_unlikely.R +++ b/R/get_unlikely.R @@ -20,3 +20,19 @@ INNER JOIN unlikely AS u ON c.survey = u.survey ORDER BY c.region, c.survey" |> dbGetQuery(conn = conn) } + +#' Get the unlikely observations +#' @inheritParams import_observations +#' @export +#' @importFrom assertthat assert_that +#' @importFrom RSQLite dbGetQuery +#' @importFrom sf st_as_sf +get_unlikely_observation <- function(conn, crs = 31370) { + assert_that(inherits(conn, "SQLiteConnection")) + "SELECT o.id, o.x, o.y, o.status, o.region, o.survey, s.original, s.user +FROM observation AS o +INNER JOIN unlikely AS u ON o.survey = u.survey +INNER JOIN survey AS s on o.survey = s.id" |> + dbGetQuery(conn = conn) |> + st_as_sf(coords = c("x", "y"), crs = crs) +} diff --git a/man/get_unlikely_observation.Rd b/man/get_unlikely_observation.Rd new file mode 100644 index 0000000..fd016ed --- /dev/null +++ b/man/get_unlikely_observation.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_unlikely.R +\name{get_unlikely_observation} +\alias{get_unlikely_observation} +\title{Get the unlikely observations} +\usage{ +get_unlikely_observation(conn, crs = 31370) +} +\arguments{ +\item{conn}{a DBI connection to an SQLite database.} +} +\description{ +Get the unlikely observations +} From a19a794e5345dbc595a9bf793c1067a9f356312d Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Wed, 21 May 2025 16:11:01 +0200 Subject: [PATCH 13/17] =?UTF-8?q?=F0=9F=92=9A=20Fix=20checklist=20issues?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- NAMESPACE | 3 +++ R/get_unlikely.R | 1 + R/unlikely_survey_area.R | 7 ++++--- man/get_unlikely_observation.Rd | 2 ++ man/unlikely_survey_area.Rd | 2 ++ 5 files changed, 12 insertions(+), 3 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 219f79c..f9dce5c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -28,9 +28,11 @@ importFrom(assertthat,is.number) importFrom(assertthat,is.string) importFrom(assertthat,noNA) importFrom(deldir,deldir) +importFrom(dplyr,filter) importFrom(dplyr,group_by) importFrom(dplyr,mutate) importFrom(dplyr,summarise) +importFrom(dplyr,transmute) importFrom(igraph,V) importFrom(igraph,decompose) importFrom(igraph,graph_from_data_frame) @@ -40,6 +42,7 @@ importFrom(sf,st_area) importFrom(sf,st_as_sf) importFrom(sf,st_buffer) importFrom(sf,st_concave_hull) +importFrom(sf,st_drop_geometry) importFrom(sf,st_intersection) importFrom(sf,st_union) importFrom(spatstat.geom,owin) diff --git a/R/get_unlikely.R b/R/get_unlikely.R index a0706d7..dc66d47 100644 --- a/R/get_unlikely.R +++ b/R/get_unlikely.R @@ -23,6 +23,7 @@ ORDER BY c.region, c.survey" |> #' Get the unlikely observations #' @inheritParams import_observations +#' @inheritParams sf::st_as_sf #' @export #' @importFrom assertthat assert_that #' @importFrom RSQLite dbGetQuery diff --git a/R/unlikely_survey_area.R b/R/unlikely_survey_area.R index b4fd54d..a86a692 100644 --- a/R/unlikely_survey_area.R +++ b/R/unlikely_survey_area.R @@ -9,13 +9,14 @@ #' @inheritParams sf::st_as_sf #' @inheritParams sf::st_buffer #' @inheritParams sf::st_concave_hull +#' @param relevant_area An `sf` object with the relevant area. #' @param max_area the maximum area of the concave hull. #' @importFrom assertthat assert_that is.number noNA -#' @importFrom dplyr group_by mutate summarise +#' @importFrom dplyr filter group_by mutate summarise transmute #' @importFrom RSQLite dbGetQuery dbWriteTable #' @importFrom rlang .data -#' @importFrom sf st_area st_as_sf st_buffer st_concave_hull st_intersection -#' st_union +#' @importFrom sf st_area st_as_sf st_buffer st_concave_hull st_drop_geometry +#' st_intersection st_union #' @export unlikely_survey_area <- function( conn, relevant_area, max_area = 3e6, dist = 50, crs = 31370, ratio = 0.95 diff --git a/man/get_unlikely_observation.Rd b/man/get_unlikely_observation.Rd index fd016ed..9bf422f 100644 --- a/man/get_unlikely_observation.Rd +++ b/man/get_unlikely_observation.Rd @@ -8,6 +8,8 @@ get_unlikely_observation(conn, crs = 31370) } \arguments{ \item{conn}{a DBI connection to an SQLite database.} + +\item{crs}{coordinate reference system to be assigned; object of class \code{crs}} } \description{ Get the unlikely observations diff --git a/man/unlikely_survey_area.Rd b/man/unlikely_survey_area.Rd index 0653949..ef35e65 100644 --- a/man/unlikely_survey_area.Rd +++ b/man/unlikely_survey_area.Rd @@ -16,6 +16,8 @@ unlikely_survey_area( \arguments{ \item{conn}{a DBI connection to an SQLite database.} +\item{relevant_area}{An \code{sf} object with the relevant area.} + \item{max_area}{the maximum area of the concave hull.} \item{dist}{numeric or object of class \code{units}; buffer distance(s) for all, or for each of the elements in \code{x}. From 82ae7fe1c5191553c661d0da9afd0c31de017f66 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Wed, 21 May 2025 16:20:41 +0200 Subject: [PATCH 14/17] =?UTF-8?q?=E2=9C=A8=20Get=5Fcluster()=20ignores=20u?= =?UTF-8?q?nlikely=20surveys?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- R/get_cluster.R | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/R/get_cluster.R b/R/get_cluster.R index 67c641a..95049be 100644 --- a/R/get_cluster.R +++ b/R/get_cluster.R @@ -9,14 +9,19 @@ get_cluster <- function(conn) { "observation" %in% dbListTables(conn, "observation"), msg = "No observations found. Did you run `import_observations()`?" ) - obs <- dbGetQuery( - conn, "SELECT id, x, y, survey, status, cluster FROM observation" - ) + "SELECT +o.id, o.x, o.y, o.survey, o.status, o.cluster +FROM observation AS o +LEFT JOIN unlikely AS u ON o.survey = u.survey +WHERE u.value IS NULL" |> + dbGetQuery(conn = conn) -> obs cluster <- dbGetQuery(conn, " SELECT - cluster, COUNT(cluster) AS n_obs, MAX(status) AS max_status, - AVG(x) AS centroid_x, AVG(y) AS centroid_y -FROM observation -GROUP BY cluster") + o.cluster, COUNT(o.cluster) AS n_obs, MAX(status) AS max_status, + AVG(o.x) AS centroid_x, AVG(o.y) AS centroid_y +FROM observation AS o +LEFT JOIN unlikely AS u ON o.survey = u.survey +WHERE u.value IS NULL +GROUP BY o.cluster") return(list(cluster = cluster, observations = obs)) } From 99e69caea375801681f41fb1d9cd24bd089ff102 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Wed, 21 May 2025 16:47:09 +0200 Subject: [PATCH 15/17] =?UTF-8?q?=F0=9F=93=9D=20Update=20NEWS?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- NEWS.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/NEWS.md b/NEWS.md index a7a7b79..4ec4f5f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,22 @@ # territoria 0.1.0 +* `import_observations()` splits surveys with clearly distinct groups of + observations into multiple surveys. + This is done by clustering the observations and checking if the clusters are + too far apart. + It also requires to provide a user and region id. +* `edge_distribution()` return the length of the edges of a Delaunay + triangulation on the observations per survey. +* Add `unlikely_survey_area()`, `unlikely_status()` and + `unlikely_edge_distribution()` to detect surveys which are unlikely done + according to the sampling protocol. +* `distance_matrix()` ignores observations from unlikely surveys. + Hence `cluster_observation()` will ignore them during the clustering and + `get_cluster()` ignores them. +* `get_total()` return the total number of individuals per region based on the + clustering. +* `get_unlikely_summary()` and `get_unlikely_observation()` returns the unlikely + surveys and observations. * Update [`checklist`](https://inbo.github.io/checklist/) machinery. # territoria 0.0.3 From 1056ed49ae349f52b59dc76c980929ff1a6ed7c2 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Wed, 21 May 2025 17:28:46 +0200 Subject: [PATCH 16/17] =?UTF-8?q?=F0=9F=91=B7=20Use=20latest=20checklist?= =?UTF-8?q?=20version?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/check_on_branch.yml | 2 +- .github/workflows/check_on_main.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/check_on_branch.yml b/.github/workflows/check_on_branch.yml index 2f09f44..bc635a0 100644 --- a/.github/workflows/check_on_branch.yml +++ b/.github/workflows/check_on_branch.yml @@ -17,4 +17,4 @@ jobs: permissions: contents: read steps: - - uses: inbo/actions/check_pkg@checklist-0.3.6 + - uses: inbo/actions/check_pkg@main diff --git a/.github/workflows/check_on_main.yml b/.github/workflows/check_on_main.yml index d5a137b..6e8268d 100644 --- a/.github/workflows/check_on_main.yml +++ b/.github/workflows/check_on_main.yml @@ -16,4 +16,4 @@ jobs: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: inbo/actions/check_pkg@checklist-0.3.6 + - uses: inbo/actions/check_pkg@main From a95a46c7ebf3e8fd4a4a7d102d1ad16c9dbdda08 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Wed, 4 Jun 2025 13:32:07 +0200 Subject: [PATCH 17/17] Apply suggestions from code review Co-authored-by: raisa_carmen <56317093+RCinbo@users.noreply.github.com> --- NEWS.md | 8 ++++---- R/edge_distribution.R | 2 +- R/get_unlikely.R | 2 +- R/unlikely_edge_distribution.R | 2 +- R/unlikely_status.R | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/NEWS.md b/NEWS.md index 4ec4f5f..ba80664 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,9 +3,9 @@ * `import_observations()` splits surveys with clearly distinct groups of observations into multiple surveys. This is done by clustering the observations and checking if the clusters are - too far apart. + too far apart geographically. It also requires to provide a user and region id. -* `edge_distribution()` return the length of the edges of a Delaunay +* `edge_distribution()` return the distribution of the length of the edges of a Delaunay triangulation on the observations per survey. * Add `unlikely_survey_area()`, `unlikely_status()` and `unlikely_edge_distribution()` to detect surveys which are unlikely done @@ -13,9 +13,9 @@ * `distance_matrix()` ignores observations from unlikely surveys. Hence `cluster_observation()` will ignore them during the clustering and `get_cluster()` ignores them. -* `get_total()` return the total number of individuals per region based on the +* `get_total()` returns the total number of individuals per region based on the clustering. -* `get_unlikely_summary()` and `get_unlikely_observation()` returns the unlikely +* `get_unlikely_summary()` and `get_unlikely_observation()` return the unlikely surveys and observations. * Update [`checklist`](https://inbo.github.io/checklist/) machinery. diff --git a/R/edge_distribution.R b/R/edge_distribution.R index 7070043..1c92636 100644 --- a/R/edge_distribution.R +++ b/R/edge_distribution.R @@ -1,6 +1,6 @@ #' @title Edge distribution #' @description -#' This function return the length of the edges of the Delaunay triangulation +#' This function returns the length of the edges of the Delaunay triangulation #' per survey. #' The function returns only edges with length smaller than twice `max_dist`. #' @inheritParams import_observations diff --git a/R/get_unlikely.R b/R/get_unlikely.R index dc66d47..846b1d7 100644 --- a/R/get_unlikely.R +++ b/R/get_unlikely.R @@ -1,4 +1,4 @@ -#' Get an overview of the unlikely reasons for each survey and region +#' Get an overview of why the data of each survey and/or region is unlikely to be the result of the counting protocol. #' @inheritParams import_observations #' @export #' @importFrom assertthat assert_that diff --git a/R/unlikely_edge_distribution.R b/R/unlikely_edge_distribution.R index a8679ca..4d59e46 100644 --- a/R/unlikely_edge_distribution.R +++ b/R/unlikely_edge_distribution.R @@ -9,7 +9,7 @@ #' and writes the list of unlikely surveys to the database. #' @inheritParams import_observations #' @inheritParams unlikely_status -#' @param conn_reference a connection to the reference database. +#' @param conn_reference A connection to the reference database. #' @export #' @importFrom assertthat assert_that is.number noNA #' @importFrom RSQLite dbWriteTable diff --git a/R/unlikely_status.R b/R/unlikely_status.R index 8bff3bd..be33c04 100644 --- a/R/unlikely_status.R +++ b/R/unlikely_status.R @@ -4,7 +4,7 @@ #' proportion of the observations with status above or equal to the #' `status_split `. #' Default of 0.5. -#' @param status_split An integer values indicating which status level splits +#' @param status_split An integer value indicating which status level splits #' the status into two groups. #' The default is 2. #' @param alpha A numeric value between 0 and 1 indicating the family-wise Type