Skip to content
Open
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/check_on_branch.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ jobs:
permissions:
contents: read
steps:
- uses: inbo/actions/check_pkg@checklist-0.3.6
- uses: inbo/actions/check_pkg@main
2 changes: 1 addition & 1 deletion .github/workflows/check_on_main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@ jobs:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: inbo/actions/check_pkg@checklist-0.3.6
- uses: inbo/actions/check_pkg@main
8 changes: 4 additions & 4 deletions .zenodo.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"title": "territoria: Clustering Observations from Breeding Birds into Territoria",
"version": "0.0.3",
"version": "0.1.0",
"license": "GPL-3.0",
"upload_type": "software",
"description": "<p>Clusters individual observations based on breeding indication and\ndistance between observations.<\/p>",
"description": "<p>Clusters individual observations based on breeding indication and distance between observations.<\/p>",
"keywords": [
"breeding bird",
"cluster"
Expand All @@ -15,11 +15,11 @@
"name": "Onkelinx, Thierry",
"affiliation": "Research Institute for Nature and Forest (INBO)",
"orcid": "0000-0001-8804-4216",
"type": "ContactPerson"
"type": "contactperson"
},
{
"name": "Research Institute for Nature and Forest (INBO)",
"type": "RightsHolder"
"type": "rightsholder"
}
],
"creators": [
Expand Down
2 changes: 1 addition & 1 deletion CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@ repository-code: https://github.com/inbo/territoria/
type: software
abstract: "Clusters individual observations based on breeding indication and distance
between observations."
version: 0.0.3
version: 0.1.0
11 changes: 9 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: territoria
Title: Clustering Observations from Breeding Birds into Territoria
Version: 0.0.3
Version: 0.1.0
Authors@R: c(
person("Thierry", "Onkelinx", , "thierry.onkelinx@inbo.be", role = c("aut", "cre"),
comment = c(ORCID = "0000-0001-8804-4216", affiliation = "Research Institute for Nature and Forest (INBO)")),
Expand All @@ -12,15 +12,22 @@ Description: Clusters individual observations based on breeding indication
License: GPL-3
URL: https://github.com/inbo/territoria
BugReports: https://github.com/inbo/territoria/issues
Depends:
R (>= 4.1.0)
Imports:
assertthat,
deldir,
dplyr,
igraph,
mvtnorm,
rlang,
RSQLite,
sf,
spatstat.geom,
spatstat.random
Config/checklist/communities: inbo
Config/checklist/keywords: breeding bird; cluster
Encoding: UTF-8
Language: en-GB
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
RoxygenNote: 7.3.2
2 changes: 1 addition & 1 deletion LICENSE.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
### GNU GENERAL PUBLIC LICENSE
# GNU GENERAL PUBLIC LICENSE

Version 3, 29 June 2007

Expand Down
28 changes: 28 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,16 @@
export(cluster_observation)
export(connect_db)
export(distance_matrix)
export(edge_distribution)
export(get_cluster)
export(get_total)
export(get_unlikely_observation)
export(get_unlikely_summary)
export(import_observations)
export(simulate_observations)
export(unlikely_edge_distribution)
export(unlikely_status)
export(unlikely_survey_area)
importFrom(RSQLite,SQLite)
importFrom(RSQLite,dbClearResult)
importFrom(RSQLite,dbConnect)
Expand All @@ -20,7 +27,28 @@ importFrom(assertthat,is.flag)
importFrom(assertthat,is.number)
importFrom(assertthat,is.string)
importFrom(assertthat,noNA)
importFrom(deldir,deldir)
importFrom(dplyr,filter)
importFrom(dplyr,group_by)
importFrom(dplyr,mutate)
importFrom(dplyr,summarise)
importFrom(dplyr,transmute)
importFrom(igraph,V)
importFrom(igraph,decompose)
importFrom(igraph,graph_from_data_frame)
importFrom(mvtnorm,rmvnorm)
importFrom(rlang,.data)
importFrom(sf,st_area)
importFrom(sf,st_as_sf)
importFrom(sf,st_buffer)
importFrom(sf,st_concave_hull)
importFrom(sf,st_drop_geometry)
importFrom(sf,st_intersection)
importFrom(sf,st_union)
importFrom(spatstat.geom,owin)
importFrom(spatstat.random,rStrauss)
importFrom(stats,aggregate)
importFrom(stats,binom.test)
importFrom(stats,rbinom)
importFrom(stats,rnorm)
importFrom(stats,wilcox.test)
21 changes: 21 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,24 @@
# territoria 0.1.0

* `import_observations()` splits surveys with clearly distinct groups of
observations into multiple surveys.
This is done by clustering the observations and checking if the clusters are
too far apart.
It also requires to provide a user and region id.
* `edge_distribution()` return the length of the edges of a Delaunay
triangulation on the observations per survey.
* Add `unlikely_survey_area()`, `unlikely_status()` and
`unlikely_edge_distribution()` to detect surveys which are unlikely done
according to the sampling protocol.
* `distance_matrix()` ignores observations from unlikely surveys.
Hence `cluster_observation()` will ignore them during the clustering and
`get_cluster()` ignores them.
* `get_total()` return the total number of individuals per region based on the
clustering.
* `get_unlikely_summary()` and `get_unlikely_observation()` returns the unlikely
surveys and observations.
* Update [`checklist`](https://inbo.github.io/checklist/) machinery.

# territoria 0.0.3

* Update [`checklist`](https://inbo.github.io/checklist/) machinery.
Expand Down
8 changes: 5 additions & 3 deletions R/distance_matrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,11 @@
sql <- sprintf(
"WITH cte_obs AS (
SELECT
id, x, y, group_x, group_y, group_x + 1 AS group_xp,
group_y + 1 AS group_yp, group_y - 1 AS group_ym
FROM observation
o.id, o.x, o.y, o.group_x, o.group_y, o.group_x + 1 AS group_xp,
o.group_y + 1 AS group_yp, o.group_y - 1 AS group_ym
FROM observation AS o
LEFT JOIN unlikely AS u ON o.survey = u.survey
WHERE u.value IS NULL

Check warning on line 34 in R/distance_matrix.R

View check run for this annotation

Codecov / codecov/patch

R/distance_matrix.R#L30-L34

Added lines #L30 - L34 were not covered by tests
),
cte_distance AS (
SELECT
Expand Down
40 changes: 40 additions & 0 deletions R/edge_distribution.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#' @title Edge distribution
#' @description
#' This function return the length of the edges of the Delaunay triangulation
#' per survey.
#' The function returns only edges with length smaller than twice `max_dist`.
#' @inheritParams import_observations
#' @importFrom assertthat assert_that is.number noNA
#' @importFrom deldir deldir
#' @importFrom RSQLite dbGetQuery
#' @export
edge_distribution <- function(conn, max_dist = 336) {
assert_that(
inherits(conn, "SQLiteConnection"),
is.number(max_dist), noNA(max_dist), max_dist > 0

Check warning on line 14 in R/edge_distribution.R

View check run for this annotation

Codecov / codecov/patch

R/edge_distribution.R#L12-L14

Added lines #L12 - L14 were not covered by tests
)
"SELECT survey, x, y FROM observation" |>
dbGetQuery(conn = conn) -> observations
table(observations$survey) |>
as.data.frame() -> n_survey
n_survey[n_survey$Freq > 1, ] |>
merge(x = observations, by.y = "Var1", by.x = "survey") -> observations
edges <- data.frame(survey = integer(0), length = numeric(0))
for (i in unique(observations$survey)) {
candidate <- which(observations$survey == i)
dd <- deldir(
x = rnorm(length(candidate), mean = observations$x[candidate], sd = 0.01),
y = rnorm(length(candidate), mean = observations$y[candidate], sd = 0.01),
id = observations$id[candidate]

Check warning on line 28 in R/edge_distribution.R

View check run for this annotation

Codecov / codecov/patch

R/edge_distribution.R#L16-L28

Added lines #L16 - L28 were not covered by tests
)
extra <- data.frame(
survey = i,
length = sqrt(
(dd$delsgs$x2 - dd$delsgs$x1) ^ 2 + (dd$delsgs$y2 - dd$delsgs$y1) ^ 2

Check warning on line 33 in R/edge_distribution.R

View check run for this annotation

Codecov / codecov/patch

R/edge_distribution.R#L30-L33

Added lines #L30 - L33 were not covered by tests
)
)
extra[extra$length <= 2 * max_dist, ] |>
rbind(edges) -> edges

Check warning on line 37 in R/edge_distribution.R

View check run for this annotation

Codecov / codecov/patch

R/edge_distribution.R#L36-L37

Added lines #L36 - L37 were not covered by tests
}
return(edges)

Check warning on line 39 in R/edge_distribution.R

View check run for this annotation

Codecov / codecov/patch

R/edge_distribution.R#L39

Added line #L39 was not covered by tests
}
19 changes: 12 additions & 7 deletions R/get_cluster.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,19 @@
"observation" %in% dbListTables(conn, "observation"),
msg = "No observations found. Did you run `import_observations()`?"
)
obs <- dbGetQuery(
conn, "SELECT id, x, y, survey, status, cluster FROM observation"
)
"SELECT
o.id, o.x, o.y, o.survey, o.status, o.cluster
FROM observation AS o
LEFT JOIN unlikely AS u ON o.survey = u.survey
WHERE u.value IS NULL" |>
dbGetQuery(conn = conn) -> obs

Check warning on line 17 in R/get_cluster.R

View check run for this annotation

Codecov / codecov/patch

R/get_cluster.R#L12-L17

Added lines #L12 - L17 were not covered by tests
cluster <- dbGetQuery(conn, "
SELECT
cluster, COUNT(cluster) AS n_obs, MAX(status) AS max_status,
AVG(x) AS centroid_x, AVG(y) AS centroid_y
FROM observation
GROUP BY cluster")
o.cluster, COUNT(o.cluster) AS n_obs, MAX(status) AS max_status,
AVG(o.x) AS centroid_x, AVG(o.y) AS centroid_y
FROM observation AS o
LEFT JOIN unlikely AS u ON o.survey = u.survey
WHERE u.value IS NULL
GROUP BY o.cluster")

Check warning on line 25 in R/get_cluster.R

View check run for this annotation

Codecov / codecov/patch

R/get_cluster.R#L20-L25

Added lines #L20 - L25 were not covered by tests
return(list(cluster = cluster, observations = obs))
}
52 changes: 52 additions & 0 deletions R/get_total.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#' Get the total number of individuals per region
#' Returns the sum of `value` per region.
#' Clusters spanning more than one region get each a fraction of `value` based
#' on the number of observations of the cluster in a region divided by the total
#' number of observations in the cluster.
#' @inheritParams import_observations
#' @param value A string representing the value per cluster to be calculated.
#' Default is `"IIF(max_status >= 2, 1, iif(n_total > 1, 0.5, 0))"`.
#' Available variables are
#' - `max_status`: the highest status of a cluster.
#' - `n_total`: the total number of observations per cluster
#' - `n_region`: the number of observations in a cluster from the same region.
#' @export
#' @importFrom assertthat assert_that is.string noNA
#' @importFrom RSQLite dbGetQuery
get_total <- function(
conn, value = "IIF(max_status >= 2, 1, iif(n_total > 1, 0.5, 0))"
) {
assert_that(
inherits(conn, "SQLiteConnection"), is.string(value), noNA(value)

Check warning on line 20 in R/get_total.R

View check run for this annotation

Codecov / codecov/patch

R/get_total.R#L19-L20

Added lines #L19 - L20 were not covered by tests
)
sprintf(
"WITH cte_obs AS (
SELECT o.region, o.cluster, o.status
FROM observation AS o
LEFT JOIN unlikely AS u ON o.survey = u.survey
WHERE u.value IS NULL

Check warning on line 27 in R/get_total.R

View check run for this annotation

Codecov / codecov/patch

R/get_total.R#L22-L27

Added lines #L22 - L27 were not covered by tests
),
cte_region AS (
SELECT cluster, region, COUNT(cluster) AS n_region
FROM cte_obs
WHERE region IS NOT NULL
GROUP BY cluster, region

Check warning on line 33 in R/get_total.R

View check run for this annotation

Codecov / codecov/patch

R/get_total.R#L29-L33

Added lines #L29 - L33 were not covered by tests
),
cte_cluster AS (
SELECT cluster, MAX(status) AS max_status, COUNT(status) AS n_total
FROM cte_obs
GROUP BY cluster

Check warning on line 38 in R/get_total.R

View check run for this annotation

Codecov / codecov/patch

R/get_total.R#L35-L38

Added lines #L35 - L38 were not covered by tests
),
cte AS (
SELECT
r.region, c.cluster, 1.0 * r.n_region / c.n_total AS fraction,
%s AS value
FROM cte_region AS r
INNER JOIN cte_cluster AS c ON r.cluster = c.cluster

Check warning on line 45 in R/get_total.R

View check run for this annotation

Codecov / codecov/patch

R/get_total.R#L40-L45

Added lines #L40 - L45 were not covered by tests
)

SELECT region, SUM(value * fraction) AS individuals FROM cte GROUP BY region",
value

Check warning on line 49 in R/get_total.R

View check run for this annotation

Codecov / codecov/patch

R/get_total.R#L48-L49

Added lines #L48 - L49 were not covered by tests
) |>
dbGetQuery(conn = conn)

Check warning on line 51 in R/get_total.R

View check run for this annotation

Codecov / codecov/patch

R/get_total.R#L51

Added line #L51 was not covered by tests
}
39 changes: 39 additions & 0 deletions R/get_unlikely.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#' Get an overview of the unlikely reasons for each survey and region
#' @inheritParams import_observations
#' @export
#' @importFrom assertthat assert_that
#' @importFrom RSQLite dbGetQuery
get_unlikely_summary <- function(conn = conn) {
assert_that(inherits(conn, "SQLiteConnection"))
"WITH cte AS (
SELECT survey, region, COUNT(id) n
FROM observation
WHERE region IS NOT NULL
GROUP BY survey, region

Check warning on line 12 in R/get_unlikely.R

View check run for this annotation

Codecov / codecov/patch

R/get_unlikely.R#L7-L12

Added lines #L7 - L12 were not covered by tests
),
cte_survey AS (
SELECT survey, region FROM cte GROUP BY survey HAVING n = MAX(n)

Check warning on line 15 in R/get_unlikely.R

View check run for this annotation

Codecov / codecov/patch

R/get_unlikely.R#L14-L15

Added lines #L14 - L15 were not covered by tests
)
SELECT c.region, c.survey, u.reason
FROM cte_survey AS c
INNER JOIN unlikely AS u ON c.survey = u.survey
ORDER BY c.region, c.survey" |>
dbGetQuery(conn = conn)

Check warning on line 21 in R/get_unlikely.R

View check run for this annotation

Codecov / codecov/patch

R/get_unlikely.R#L17-L21

Added lines #L17 - L21 were not covered by tests
}

#' Get the unlikely observations
#' @inheritParams import_observations
#' @inheritParams sf::st_as_sf
#' @export
#' @importFrom assertthat assert_that
#' @importFrom RSQLite dbGetQuery
#' @importFrom sf st_as_sf
get_unlikely_observation <- function(conn, crs = 31370) {
assert_that(inherits(conn, "SQLiteConnection"))
"SELECT o.id, o.x, o.y, o.status, o.region, o.survey, s.original, s.user
FROM observation AS o
INNER JOIN unlikely AS u ON o.survey = u.survey
INNER JOIN survey AS s on o.survey = s.id" |>
dbGetQuery(conn = conn) |>
st_as_sf(coords = c("x", "y"), crs = crs)

Check warning on line 38 in R/get_unlikely.R

View check run for this annotation

Codecov / codecov/patch

R/get_unlikely.R#L32-L38

Added lines #L32 - L38 were not covered by tests
}
Loading