Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/check_on_branch.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ jobs:
permissions:
contents: read
steps:
- uses: inbo/actions/check_pkg@checklist-0.3.6
- uses: inbo/actions/check_pkg@main
2 changes: 1 addition & 1 deletion .github/workflows/check_on_main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@ jobs:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: inbo/actions/check_pkg@checklist-0.3.6
- uses: inbo/actions/check_pkg@main
8 changes: 4 additions & 4 deletions .zenodo.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"title": "territoria: Clustering Observations from Breeding Birds into Territoria",
"version": "0.0.3",
"version": "0.1.0",
"license": "GPL-3.0",
"upload_type": "software",
"description": "<p>Clusters individual observations based on breeding indication and\ndistance between observations.<\/p>",
"description": "<p>Clusters individual observations based on breeding indication and distance between observations.<\/p>",
"keywords": [
"breeding bird",
"cluster"
Expand All @@ -15,11 +15,11 @@
"name": "Onkelinx, Thierry",
"affiliation": "Research Institute for Nature and Forest (INBO)",
"orcid": "0000-0001-8804-4216",
"type": "ContactPerson"
"type": "contactperson"
},
{
"name": "Research Institute for Nature and Forest (INBO)",
"type": "RightsHolder"
"type": "rightsholder"
}
],
"creators": [
Expand Down
2 changes: 1 addition & 1 deletion CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@ repository-code: https://github.com/inbo/territoria/
type: software
abstract: "Clusters individual observations based on breeding indication and distance
between observations."
version: 0.0.3
version: 0.1.0
11 changes: 9 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: territoria
Title: Clustering Observations from Breeding Birds into Territoria
Version: 0.0.3
Version: 0.1.0
Authors@R: c(
person("Thierry", "Onkelinx", , "thierry.onkelinx@inbo.be", role = c("aut", "cre"),
comment = c(ORCID = "0000-0001-8804-4216", affiliation = "Research Institute for Nature and Forest (INBO)")),
Expand All @@ -12,15 +12,22 @@ Description: Clusters individual observations based on breeding indication
License: GPL-3
URL: https://github.com/inbo/territoria
BugReports: https://github.com/inbo/territoria/issues
Depends:
R (>= 4.1.0)
Imports:
assertthat,
deldir,
dplyr,
igraph,
mvtnorm,
rlang,
RSQLite,
sf,
spatstat.geom,
spatstat.random
Config/checklist/communities: inbo
Config/checklist/keywords: breeding bird; cluster
Encoding: UTF-8
Language: en-GB
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
RoxygenNote: 7.3.2
2 changes: 1 addition & 1 deletion LICENSE.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
### GNU GENERAL PUBLIC LICENSE
# GNU GENERAL PUBLIC LICENSE

Version 3, 29 June 2007

Expand Down
28 changes: 28 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,16 @@
export(cluster_observation)
export(connect_db)
export(distance_matrix)
export(edge_distribution)
export(get_cluster)
export(get_total)
export(get_unlikely_observation)
export(get_unlikely_summary)
export(import_observations)
export(simulate_observations)
export(unlikely_edge_distribution)
export(unlikely_status)
export(unlikely_survey_area)
importFrom(RSQLite,SQLite)
importFrom(RSQLite,dbClearResult)
importFrom(RSQLite,dbConnect)
Expand All @@ -20,7 +27,28 @@ importFrom(assertthat,is.flag)
importFrom(assertthat,is.number)
importFrom(assertthat,is.string)
importFrom(assertthat,noNA)
importFrom(deldir,deldir)
importFrom(dplyr,filter)
importFrom(dplyr,group_by)
importFrom(dplyr,mutate)
importFrom(dplyr,summarise)
importFrom(dplyr,transmute)
importFrom(igraph,V)
importFrom(igraph,decompose)
importFrom(igraph,graph_from_data_frame)
importFrom(mvtnorm,rmvnorm)
importFrom(rlang,.data)
importFrom(sf,st_area)
importFrom(sf,st_as_sf)
importFrom(sf,st_buffer)
importFrom(sf,st_concave_hull)
importFrom(sf,st_drop_geometry)
importFrom(sf,st_intersection)
importFrom(sf,st_union)
importFrom(spatstat.geom,owin)
importFrom(spatstat.random,rStrauss)
importFrom(stats,aggregate)
importFrom(stats,binom.test)
importFrom(stats,rbinom)
importFrom(stats,rnorm)
importFrom(stats,wilcox.test)
21 changes: 21 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,24 @@
# territoria 0.1.0

* `import_observations()` splits surveys with clearly distinct groups of
observations into multiple surveys.
This is done by clustering the observations and checking if the clusters are
too far apart geographically.
It also requires to provide a user and region id.
* `edge_distribution()` return the distribution of the length of the edges of a Delaunay
triangulation on the observations per survey.
* Add `unlikely_survey_area()`, `unlikely_status()` and
`unlikely_edge_distribution()` to detect surveys which are unlikely done
according to the sampling protocol.
* `distance_matrix()` ignores observations from unlikely surveys.
Hence `cluster_observation()` will ignore them during the clustering and
`get_cluster()` ignores them.
* `get_total()` returns the total number of individuals per region based on the
clustering.
* `get_unlikely_summary()` and `get_unlikely_observation()` return the unlikely
surveys and observations.
* Update [`checklist`](https://inbo.github.io/checklist/) machinery.

# territoria 0.0.3

* Update [`checklist`](https://inbo.github.io/checklist/) machinery.
Expand Down
8 changes: 5 additions & 3 deletions R/distance_matrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,11 @@ ON distance (distance)"
sql <- sprintf(
"WITH cte_obs AS (
SELECT
id, x, y, group_x, group_y, group_x + 1 AS group_xp,
group_y + 1 AS group_yp, group_y - 1 AS group_ym
FROM observation
o.id, o.x, o.y, o.group_x, o.group_y, o.group_x + 1 AS group_xp,
o.group_y + 1 AS group_yp, o.group_y - 1 AS group_ym
FROM observation AS o
LEFT JOIN unlikely AS u ON o.survey = u.survey
WHERE u.value IS NULL
),
cte_distance AS (
SELECT
Expand Down
40 changes: 40 additions & 0 deletions R/edge_distribution.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#' @title Edge distribution
#' @description
#' This function returns the length of the edges of the Delaunay triangulation
#' per survey.
#' The function returns only edges with length smaller than twice `max_dist`.
#' @inheritParams import_observations
#' @importFrom assertthat assert_that is.number noNA
#' @importFrom deldir deldir
#' @importFrom RSQLite dbGetQuery
#' @export
edge_distribution <- function(conn, max_dist = 336) {
assert_that(
inherits(conn, "SQLiteConnection"),
is.number(max_dist), noNA(max_dist), max_dist > 0
)
"SELECT survey, x, y FROM observation" |>
dbGetQuery(conn = conn) -> observations
table(observations$survey) |>
as.data.frame() -> n_survey
n_survey[n_survey$Freq > 1, ] |>
merge(x = observations, by.y = "Var1", by.x = "survey") -> observations
edges <- data.frame(survey = integer(0), length = numeric(0))
for (i in unique(observations$survey)) {
candidate <- which(observations$survey == i)
dd <- deldir(
x = rnorm(length(candidate), mean = observations$x[candidate], sd = 0.01),
y = rnorm(length(candidate), mean = observations$y[candidate], sd = 0.01),
id = observations$id[candidate]
)
extra <- data.frame(
survey = i,
length = sqrt(
(dd$delsgs$x2 - dd$delsgs$x1) ^ 2 + (dd$delsgs$y2 - dd$delsgs$y1) ^ 2
)
)
extra[extra$length <= 2 * max_dist, ] |>
rbind(edges) -> edges
}
return(edges)
}
19 changes: 12 additions & 7 deletions R/get_cluster.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,19 @@ get_cluster <- function(conn) {
"observation" %in% dbListTables(conn, "observation"),
msg = "No observations found. Did you run `import_observations()`?"
)
obs <- dbGetQuery(
conn, "SELECT id, x, y, survey, status, cluster FROM observation"
)
"SELECT
o.id, o.x, o.y, o.survey, o.status, o.cluster
FROM observation AS o
LEFT JOIN unlikely AS u ON o.survey = u.survey
WHERE u.value IS NULL" |>
dbGetQuery(conn = conn) -> obs
cluster <- dbGetQuery(conn, "
SELECT
cluster, COUNT(cluster) AS n_obs, MAX(status) AS max_status,
AVG(x) AS centroid_x, AVG(y) AS centroid_y
FROM observation
GROUP BY cluster")
o.cluster, COUNT(o.cluster) AS n_obs, MAX(status) AS max_status,
AVG(o.x) AS centroid_x, AVG(o.y) AS centroid_y
FROM observation AS o
LEFT JOIN unlikely AS u ON o.survey = u.survey
WHERE u.value IS NULL
GROUP BY o.cluster")
return(list(cluster = cluster, observations = obs))
}
52 changes: 52 additions & 0 deletions R/get_total.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#' Get the total number of individuals per region
#' Returns the sum of `value` per region.
#' Clusters spanning more than one region get each a fraction of `value` based
#' on the number of observations of the cluster in a region divided by the total
#' number of observations in the cluster.
#' @inheritParams import_observations
#' @param value A string representing the value per cluster to be calculated.
#' Default is `"IIF(max_status >= 2, 1, iif(n_total > 1, 0.5, 0))"`.
#' Available variables are
#' - `max_status`: the highest status of a cluster.
#' - `n_total`: the total number of observations per cluster
#' - `n_region`: the number of observations in a cluster from the same region.
#' @export
#' @importFrom assertthat assert_that is.string noNA
#' @importFrom RSQLite dbGetQuery
get_total <- function(
conn, value = "IIF(max_status >= 2, 1, iif(n_total > 1, 0.5, 0))"
) {
assert_that(
inherits(conn, "SQLiteConnection"), is.string(value), noNA(value)
)
sprintf(
"WITH cte_obs AS (
SELECT o.region, o.cluster, o.status
FROM observation AS o
LEFT JOIN unlikely AS u ON o.survey = u.survey
WHERE u.value IS NULL
),
cte_region AS (
SELECT cluster, region, COUNT(cluster) AS n_region
FROM cte_obs
WHERE region IS NOT NULL
GROUP BY cluster, region
),
cte_cluster AS (
SELECT cluster, MAX(status) AS max_status, COUNT(status) AS n_total
FROM cte_obs
GROUP BY cluster
),
cte AS (
SELECT
r.region, c.cluster, 1.0 * r.n_region / c.n_total AS fraction,
%s AS value
FROM cte_region AS r
INNER JOIN cte_cluster AS c ON r.cluster = c.cluster
)

SELECT region, SUM(value * fraction) AS individuals FROM cte GROUP BY region",
value
) |>
dbGetQuery(conn = conn)
}
39 changes: 39 additions & 0 deletions R/get_unlikely.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#' Get an overview of why the data of each survey and/or region is unlikely to be the result of the counting protocol.

Check warning on line 1 in R/get_unlikely.R

View workflow job for this annotation

GitHub Actions / check package

file=R/get_unlikely.R,line=1,col=81,[line_length_linter] Lines should not be more than 80 characters. This line is 118 characters.
#' @inheritParams import_observations
#' @export
#' @importFrom assertthat assert_that
#' @importFrom RSQLite dbGetQuery
get_unlikely_summary <- function(conn = conn) {
assert_that(inherits(conn, "SQLiteConnection"))
"WITH cte AS (
SELECT survey, region, COUNT(id) n
FROM observation
WHERE region IS NOT NULL
GROUP BY survey, region
),
cte_survey AS (
SELECT survey, region FROM cte GROUP BY survey HAVING n = MAX(n)
)
SELECT c.region, c.survey, u.reason
FROM cte_survey AS c
INNER JOIN unlikely AS u ON c.survey = u.survey
ORDER BY c.region, c.survey" |>
dbGetQuery(conn = conn)
}

#' Get the unlikely observations
#' @inheritParams import_observations
#' @inheritParams sf::st_as_sf
#' @export
#' @importFrom assertthat assert_that
#' @importFrom RSQLite dbGetQuery
#' @importFrom sf st_as_sf
get_unlikely_observation <- function(conn, crs = 31370) {
assert_that(inherits(conn, "SQLiteConnection"))
"SELECT o.id, o.x, o.y, o.status, o.region, o.survey, s.original, s.user
FROM observation AS o
INNER JOIN unlikely AS u ON o.survey = u.survey
INNER JOIN survey AS s on o.survey = s.id" |>
dbGetQuery(conn = conn) |>
st_as_sf(coords = c("x", "y"), crs = crs)
}
Loading