Skip to content

Commit 6e3ab01

Browse files
author
John Waller
committed
updating name_backbone and name_backbone_verbose to match v2
1 parent c94c95a commit 6e3ab01

File tree

4 files changed

+1026
-260
lines changed

4 files changed

+1026
-260
lines changed

R/name_backbone.r

Lines changed: 246 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -1,120 +1,271 @@
1-
#' Lookup names in the GBIF backbone taxonomy.
2-
#'
3-
#' @template otherlimstart
4-
#' @template occ
5-
#' @export
1+
#' Match names to GBIF backbone and other checklists.
62
#'
73
#' @param name (character) Full scientific name potentially with authorship
84
#' (required)
9-
#' @param rank (character) The rank given as our rank enum. (optional)
10-
#' @param kingdom (character) If provided default matching will also try to
11-
#' match against this if no direct match is found for the name alone.
12-
#' (optional)
13-
#' @param phylum (character) If provided default matching will also try to
14-
#' match against this if no direct match is found for the name alone.
15-
#' (optional)
16-
#' @param class (character) If provided default matching will also try to
17-
#' match against this if no direct match is found for the name alone.
18-
#' (optional)
19-
#' @param order (character) If provided default matching will also try to
20-
#' match against this if no direct match is found for the name alone.
21-
#' (optional)
22-
#' @param family (character) If provided default matching will also try to
23-
#' match against this if no direct match is found for the name alone.
24-
#' (optional)
25-
#' @param genus (character) If provided default matching will also try to
26-
#' match against this if no direct match is found for the name alone.
27-
#' (optional)
28-
#' @param strict (logical) If `TRUE` it (fuzzy) matches only the given name,
29-
#' but never a taxon in the upper classification (optional)
30-
#' @param verbose (logical) should the function give back more (less reliable)
31-
#' results. See function `name_backbone_verbose()`
5+
#' @param rank (character) Filter by taxonomic rank. See API reference for
6+
#' available values.
7+
#' @param usageKey (character) The usage key to look up. When provided, all
8+
#' other fields are ignored.
9+
#' @param kingdom (character) Kingdom to match.
10+
#' @param phylum (character) Phylum to match.
11+
#' @param class (character) Class to match.
12+
#' @param order (character) Order to match.
13+
#' @param superfamily (character) Superfamily to match.
14+
#' @param family (character) Family to match.
15+
#' @param subfamily (character) Subfamily to match.
16+
#' @param tribe (character) Tribe to match.
17+
#' @param subtribe (character) Subtribe to match.
18+
#' @param genus (character) Genus to match.
19+
#' @param subgenus (character) Subgenus to match.
20+
#' @param species (character) Species to match.
21+
#' @param taxonID (character) The taxon ID to look up. Matches to a taxonID
22+
#' will take precedence over scientificName values supplied. A comparison of
23+
#' the matched scientific and taxonID is performed tocheck for inconsistencies.
24+
#' @param taxonConceptID (character) The taxonConceptID to match. Matches to a
25+
#' taxonConceptID will take precedence over scientificName values supplied. A
26+
#' comparison of the matched scientific and taxonConceptID is performed to
27+
#' check for inconsistencies.
28+
#' @param scientificNameID (character) Matches to a scientificNameID will take
29+
#' precedence over scientificName values supplied. A comparison of the matched
30+
#' scientific and scientificNameID is performed to check for inconsistencies.
31+
#' @param scientificNameAuthorship (character) The scientific name authorship
32+
#' to match against.
33+
#' @param genericName (character) Generic part of the name to match when given
34+
#' as atomised parts instead of the full name parameter.
35+
#' @param specificEpithet (character) Specific epithet to match.
36+
#' @param infraspecificEpithet (character) Infraspecific epithet to match.
37+
#' @param verbatimTaxonRank (character) Filters by free text taxon rank.
38+
#' @param exclude (character) An array of usage keys to exclude from the match.
39+
#' @param strict (logical) If set to true, fuzzy matches only the given name,
40+
#' but never a taxon in the upper classification.
41+
#' @param verbose (logical) If set to true, it shows alternative matches which
42+
#' were considered but then rejected.
43+
#' @param checklistKey (character) The key of a checklist to use. The default is
44+
#' the GBIF Backbone taxanomy.
45+
#' @param start (integer) Currently ignored.
46+
#' @param limit (integer) Currently ignored.
47+
#' @param curlopts A list of curl options passed on to [httr::GET()].
3248
#'
33-
#' @return For `name_backbone`, a data.frame for a single taxon with many
34-
#' columns. For `name_backbone_verbose`, a larger number of results in a
35-
#' data.frame the results of resulting from fuzzy matching.
36-
#' You will also get back your input name, rank, kingdom, phylum ect. as
37-
#' columns input_name, input_rank, input_kingdom ect. so you can check the
38-
#' results.
3949
#'
4050
#' @details
4151
#' If you don't get a match, GBIF gives back a data.frame with columns
4252
#' `synonym`, `confidence`, and `matchType='NONE'`.
43-
#'
44-
#' @references <https://www.gbif.org/developer/species#searching>
45-
#'
53+
#'
54+
#' `name_backbone_verbose()` is a legacy wrapper function that returns
55+
#' returns alternatives in a separate `tibble`.
56+
#'
57+
#' @returns A single row `tibble` of the best matched name. If `verbose=TRUE`, a
58+
#' longer `tibble` with all potential alternatives is returned.
59+
#'
60+
#' @export
61+
#'
62+
#' @references
63+
#' \url{https://techdocs.gbif.org/en/openapi/v1/species#/Searching%20names/matchNames}
64+
#'
4665
#' @examples \dontrun{
47-
#' name_backbone(name='Helianthus annuus', kingdom='plants')
48-
#' name_backbone(name='Helianthus', rank='genus', kingdom='plants')
49-
#' name_backbone(name='Poa', rank='genus', family='Poaceae')
50-
#'
51-
#' # Verbose - gives back alternatives
52-
#' ## Strictness
53-
#' name_backbone_verbose(name='Poa', kingdom='plants',
54-
#' strict=FALSE)
55-
#' name_backbone_verbose(name='Helianthus annuus', kingdom='plants',
56-
#' strict=TRUE)
66+
#' name_backbone("Calopteryx splendens")
67+
#' name_backbone("Calopteryx splendens", kingdom = "Animalia")
68+
#' name_backbone("Calopteryx splendens", kingdom = "Animalia", verbose = TRUE)
69+
#' name_backbone_verbose("Calopteryx splendens", kingdom = "Animalia")
70+
#' name_backbone("Calopteryx splendens", kingdom = "Plantae")
5771
#'
58-
#' # Non-existent name - returns list of lenght 3 stating no match
59-
#' name_backbone(name='Aso')
60-
#' name_backbone(name='Oenante')
61-
#'
62-
#' # Pass on curl options
63-
#' name_backbone(name='Oenante', curlopts = list(verbose=TRUE))
6472
#' }
65-
name_backbone <- function(name, rank=NULL, kingdom=NULL, phylum=NULL,
66-
class=NULL, order=NULL, family=NULL, genus=NULL, strict=FALSE, verbose=FALSE,
67-
start=NULL, limit=100, curlopts = list()) {
68-
69-
# pchk(verbose, "name_backbone")
70-
url <- paste0(gbif_base(), '/species/match')
73+
name_backbone <- function(
74+
name,
75+
rank = NULL,
76+
kingdom = NULL,
77+
phylum = NULL,
78+
class = NULL,
79+
order = NULL,
80+
superfamily = NULL,
81+
family = NULL,
82+
subfamily = NULL,
83+
tribe = NULL,
84+
subtribe = NULL,
85+
genus = NULL,
86+
subgenus = NULL,
87+
species = NULL,
88+
usageKey = NULL,
89+
taxonID = NULL,
90+
taxonConceptID = NULL,
91+
scientificNameID = NULL,
92+
scientificNameAuthorship = NULL,
93+
genericName = NULL,
94+
specificEpithet = NULL,
95+
infraspecificEpithet = NULL,
96+
verbatimTaxonRank = NULL,
97+
exclude = NULL,
98+
strict = NULL,
99+
verbose = FALSE,
100+
checklistKey = NULL,
101+
start = NULL,
102+
limit = NULL,
103+
curlopts = list(http_version=2)
104+
) {
105+
url <- paste0('https://api.gbif.org/v2', '/species/match')
71106
args <- rgbif_compact(
72-
list(name=name, rank=rank, kingdom=kingdom, phylum=phylum,
73-
class=class, order=order, family=family, genus=genus,
74-
strict=as_log(strict), verbose = verbose, offset=start, limit=limit))
107+
list(
108+
scientificName = name,
109+
scientificNameAuthorship = scientificNameAuthorship,
110+
genericName = genericName,
111+
specificEpithet = specificEpithet,
112+
infraspecificEpithet = infraspecificEpithet,
113+
taxonRank = rank,
114+
verbatimTaxonRank = verbatimTaxonRank,
115+
kingdom = kingdom,
116+
phylum = phylum,
117+
class = class,
118+
order = order,
119+
superfamily = superfamily,
120+
family = family,
121+
subfamily = subfamily,
122+
tribe = tribe,
123+
subtribe = subtribe,
124+
genus = genus,
125+
subgenus = subgenus,
126+
species = species,
127+
exclude = exclude,
128+
strict = strict,
129+
verbose = verbose,
130+
checklistKey = checklistKey,
131+
start = start,
132+
limit = limit
133+
)
134+
)
75135
tt <- gbif_GET(url, args, FALSE, curlopts)
76-
input_args_clean <- args[!names(args) %in% c("strict","verbose","start","limit","curlopts")]
77-
input_args_clean <- stats::setNames(input_args_clean,paste0("verbatim_",names(input_args_clean)))
78-
tt <- c(tt,input_args_clean)
79-
if(verbose) {
80-
alternatives <- tt[["alternatives"]]
81-
alternatives <- lapply(alternatives,function(x) c(x,input_args_clean))
82-
alternatives <- bind_rows(lapply(alternatives,tibble::as_tibble))
83-
accepted <- tibble::as_tibble(tt)
84-
out <- bind_rows(list(accepted,alternatives))
85-
out <- out[!colnames(out) %in% c("alternatives", "note")]
136+
if(!verbose) {
137+
out <- process_name_backbone_output(tt,args,checklistKey = checklistKey)
86138
} else {
87-
out <- tibble::as_tibble(tt[!names(tt) %in% c("alternatives", "note")])
139+
alternatives <- bind_rows(lapply(tt$diagnostics$alternatives, function(x)
140+
process_name_backbone_output(x,args,checklistKey = checklistKey))
141+
)
142+
tt$diagnostics$alternatives <- NULL
143+
accepted <- process_name_backbone_output(tt,args,checklistKey = checklistKey)
144+
out <- bind_rows(list(accepted,alternatives))
88145
}
89-
col_idx <- grep("verbatim_", names(out))
90-
ordering <- c((1:ncol(out))[-col_idx],col_idx)
91-
out <- unique(out[, ordering])
92-
structure(out, args = args, note = tt$note, type = "single")
146+
structure(out, args = args, note = tt$diagnostics$note, type = "single")
93147
}
94-
95148

96149
#' @export
97150
#' @rdname name_backbone
98-
name_backbone_verbose <- function(name, rank=NULL, kingdom=NULL, phylum=NULL,
99-
class=NULL, order=NULL, family=NULL, genus=NULL, strict=FALSE,
100-
start=NULL, limit=100, curlopts = list()) {
101-
102-
url <- paste0(gbif_base(), '/species/match')
151+
name_backbone_verbose <- function(name,
152+
rank = NULL,
153+
kingdom = NULL,
154+
phylum = NULL,
155+
class = NULL,
156+
order = NULL,
157+
superfamily = NULL,
158+
family = NULL,
159+
subfamily = NULL,
160+
tribe = NULL,
161+
subtribe = NULL,
162+
genus = NULL,
163+
subgenus = NULL,
164+
species = NULL,
165+
usageKey = NULL,
166+
taxonID = NULL,
167+
taxonConceptID = NULL,
168+
scientificNameID = NULL,
169+
scientificNameAuthorship = NULL,
170+
genericName = NULL,
171+
specificEpithet = NULL,
172+
infraspecificEpithet = NULL,
173+
verbatimTaxonRank = NULL,
174+
exclude = NULL,
175+
strict = NULL,
176+
checklistKey = NULL,
177+
start = NULL,
178+
limit = NULL,
179+
curlopts = list(http_version=2)
180+
) {
181+
url <- paste0('https://api.gbif.org/v2', '/species/match')
103182
args <- rgbif_compact(
104-
list(name=name, rank=rank, kingdom=kingdom, phylum=phylum,
105-
class=class, order=order, family=family, genus=genus,
106-
strict=as_log(strict), verbose=TRUE, offset=start, limit=limit))
183+
list(
184+
scientificName = name,
185+
scientificNameAuthorship = scientificNameAuthorship,
186+
genericName = genericName,
187+
specificEpithet = specificEpithet,
188+
infraspecificEpithet = infraspecificEpithet,
189+
taxonRank = rank,
190+
verbatimTaxonRank = verbatimTaxonRank,
191+
kingdom = kingdom,
192+
phylum = phylum,
193+
class = class,
194+
order = order,
195+
superfamily = superfamily,
196+
family = family,
197+
subfamily = subfamily,
198+
tribe = tribe,
199+
subtribe = subtribe,
200+
genus = genus,
201+
subgenus = subgenus,
202+
species = species,
203+
exclude = exclude,
204+
strict = strict,
205+
verbose = TRUE,
206+
checklistKey = checklistKey,
207+
start = start,
208+
limit = limit
209+
)
210+
)
107211
tt <- gbif_GET(url, args, FALSE, curlopts)
108-
alt <- tibble::as_tibble(data.table::setDF(
109-
data.table::rbindlist(
110-
lapply(tt$alternatives, function(x)
111-
lapply(x, function(x) if (length(x) == 0) NA else x)),
112-
use.names = TRUE, fill = TRUE)))
113-
dat <- tibble::as_tibble(
114-
data.frame(tt[!names(tt) %in% c("alternatives", "note")],
115-
stringsAsFactors = FALSE))
212+
alt <- bind_rows(lapply(tt$diagnostics$alternatives, function(x)
213+
process_name_backbone_output(x, args, checklistKey = NULL))
214+
)
215+
tt$diagnostics$alternatives <- NULL
216+
dat <- process_name_backbone_output(tt, args, checklistKey = checklistKey)
116217
out <- list(data = dat, alternatives = alt)
117-
structure(out, args = args, note = tt$note, type = "single")
218+
structure(out, args = args, note = tt$diagnostics$note, type = "single")
118219
}
119220

221+
process_name_backbone_output <- function(tt, args, checklistKey = NULL) {
222+
usage <- if (!is.null(tt$usage)) {
223+
u <- tibble::as_tibble(tt$usage)
224+
colnames(u)[colnames(u) == "key"] <- "usageKey"
225+
colnames(u)[colnames(u) == "name"] <- "scientificName"
226+
u
227+
} else {
228+
NULL
229+
}
230+
diagnostics <- if (!is.null(tt$diagnostics)) {
231+
tt$diagnostics["timings"] <- NULL
232+
tt$diagnostics["issues"] <- NULL
233+
d <- tibble::as_tibble(tt$diagnostics)
234+
d
235+
} else {
236+
NULL
237+
}
238+
classification <- if (!is.null(tt$classification)) {
239+
c <- bind_rows(lapply(tt$classification, tibble::as_tibble))
240+
nv <- setNames(c$name, tolower(c$rank))
241+
kv <- setNames(c$key, paste0(tolower(c$rank), "Key"))
242+
c <- tibble::as_tibble(as.list(c(nv, kv)))
243+
c
244+
} else {
245+
NULL
246+
}
247+
synonym <- if (!is.null(tt$synonym)) {
248+
tibble::as_tibble(tt$synonym)
249+
} else {
250+
NULL
251+
}
252+
verbatim <- if (!is.null(args)) {
253+
input_args_clean <- args[!names(args) %in%
254+
c("strict","verbose","start","limit","curlopts")]
255+
v <- stats::setNames(input_args_clean,
256+
paste0("verbatim_",names(input_args_clean)))
257+
names(v)[names(v) == "verbatim_taxonRank"] <- "verbatim_rank"
258+
names(v)[names(v) == "verbatim_scientificName"] <- "verbatim_name"
259+
tibble::as_tibble(v)
260+
} else {
261+
NULL
262+
}
263+
264+
out <- do.call("cbind", rgbif_compact(list(usage,
265+
diagnostics,
266+
classification,
267+
synonym,
268+
verbatim)))
269+
tibble::as_tibble(out)
270+
}
120271

0 commit comments

Comments
 (0)