|
1 |
| -#' Lookup names in the GBIF backbone taxonomy. |
2 |
| -#' |
3 |
| -#' @template otherlimstart |
4 |
| -#' @template occ |
5 |
| -#' @export |
| 1 | +#' Match names to GBIF backbone and other checklists. |
6 | 2 | #'
|
7 | 3 | #' @param name (character) Full scientific name potentially with authorship
|
8 | 4 | #' (required)
|
9 |
| -#' @param rank (character) The rank given as our rank enum. (optional) |
10 |
| -#' @param kingdom (character) If provided default matching will also try to |
11 |
| -#' match against this if no direct match is found for the name alone. |
12 |
| -#' (optional) |
13 |
| -#' @param phylum (character) If provided default matching will also try to |
14 |
| -#' match against this if no direct match is found for the name alone. |
15 |
| -#' (optional) |
16 |
| -#' @param class (character) If provided default matching will also try to |
17 |
| -#' match against this if no direct match is found for the name alone. |
18 |
| -#' (optional) |
19 |
| -#' @param order (character) If provided default matching will also try to |
20 |
| -#' match against this if no direct match is found for the name alone. |
21 |
| -#' (optional) |
22 |
| -#' @param family (character) If provided default matching will also try to |
23 |
| -#' match against this if no direct match is found for the name alone. |
24 |
| -#' (optional) |
25 |
| -#' @param genus (character) If provided default matching will also try to |
26 |
| -#' match against this if no direct match is found for the name alone. |
27 |
| -#' (optional) |
28 |
| -#' @param strict (logical) If `TRUE` it (fuzzy) matches only the given name, |
29 |
| -#' but never a taxon in the upper classification (optional) |
30 |
| -#' @param verbose (logical) should the function give back more (less reliable) |
31 |
| -#' results. See function `name_backbone_verbose()` |
| 5 | +#' @param rank (character) Filter by taxonomic rank. See API reference for |
| 6 | +#' available values. |
| 7 | +#' @param usageKey (character) The usage key to look up. When provided, all |
| 8 | +#' other fields are ignored. |
| 9 | +#' @param kingdom (character) Kingdom to match. |
| 10 | +#' @param phylum (character) Phylum to match. |
| 11 | +#' @param class (character) Class to match. |
| 12 | +#' @param order (character) Order to match. |
| 13 | +#' @param superfamily (character) Superfamily to match. |
| 14 | +#' @param family (character) Family to match. |
| 15 | +#' @param subfamily (character) Subfamily to match. |
| 16 | +#' @param tribe (character) Tribe to match. |
| 17 | +#' @param subtribe (character) Subtribe to match. |
| 18 | +#' @param genus (character) Genus to match. |
| 19 | +#' @param subgenus (character) Subgenus to match. |
| 20 | +#' @param species (character) Species to match. |
| 21 | +#' @param taxonID (character) The taxon ID to look up. Matches to a taxonID |
| 22 | +#' will take precedence over scientificName values supplied. A comparison of |
| 23 | +#' the matched scientific and taxonID is performed tocheck for inconsistencies. |
| 24 | +#' @param taxonConceptID (character) The taxonConceptID to match. Matches to a |
| 25 | +#' taxonConceptID will take precedence over scientificName values supplied. A |
| 26 | +#' comparison of the matched scientific and taxonConceptID is performed to |
| 27 | +#' check for inconsistencies. |
| 28 | +#' @param scientificNameID (character) Matches to a scientificNameID will take |
| 29 | +#' precedence over scientificName values supplied. A comparison of the matched |
| 30 | +#' scientific and scientificNameID is performed to check for inconsistencies. |
| 31 | +#' @param scientificNameAuthorship (character) The scientific name authorship |
| 32 | +#' to match against. |
| 33 | +#' @param genericName (character) Generic part of the name to match when given |
| 34 | +#' as atomised parts instead of the full name parameter. |
| 35 | +#' @param specificEpithet (character) Specific epithet to match. |
| 36 | +#' @param infraspecificEpithet (character) Infraspecific epithet to match. |
| 37 | +#' @param verbatimTaxonRank (character) Filters by free text taxon rank. |
| 38 | +#' @param exclude (character) An array of usage keys to exclude from the match. |
| 39 | +#' @param strict (logical) If set to true, fuzzy matches only the given name, |
| 40 | +#' but never a taxon in the upper classification. |
| 41 | +#' @param verbose (logical) If set to true, it shows alternative matches which |
| 42 | +#' were considered but then rejected. |
| 43 | +#' @param checklistKey (character) The key of a checklist to use. The default is |
| 44 | +#' the GBIF Backbone taxanomy. |
| 45 | +#' @param start (integer) Currently ignored. |
| 46 | +#' @param limit (integer) Currently ignored. |
| 47 | +#' @param curlopts A list of curl options passed on to [httr::GET()]. |
32 | 48 | #'
|
33 |
| -#' @return For `name_backbone`, a data.frame for a single taxon with many |
34 |
| -#' columns. For `name_backbone_verbose`, a larger number of results in a |
35 |
| -#' data.frame the results of resulting from fuzzy matching. |
36 |
| -#' You will also get back your input name, rank, kingdom, phylum ect. as |
37 |
| -#' columns input_name, input_rank, input_kingdom ect. so you can check the |
38 |
| -#' results. |
39 | 49 | #'
|
40 | 50 | #' @details
|
41 | 51 | #' If you don't get a match, GBIF gives back a data.frame with columns
|
42 | 52 | #' `synonym`, `confidence`, and `matchType='NONE'`.
|
43 |
| -#' |
44 |
| -#' @references <https://www.gbif.org/developer/species#searching> |
45 |
| -#' |
| 53 | +#' |
| 54 | +#' `name_backbone_verbose()` is a legacy wrapper function that returns |
| 55 | +#' returns alternatives in a separate `tibble`. |
| 56 | +#' |
| 57 | +#' @returns A single row `tibble` of the best matched name. If `verbose=TRUE`, a |
| 58 | +#' longer `tibble` with all potential alternatives is returned. |
| 59 | +#' |
| 60 | +#' @export |
| 61 | +#' |
| 62 | +#' @references |
| 63 | +#' \url{https://techdocs.gbif.org/en/openapi/v1/species#/Searching%20names/matchNames} |
| 64 | +#' |
46 | 65 | #' @examples \dontrun{
|
47 |
| -#' name_backbone(name='Helianthus annuus', kingdom='plants') |
48 |
| -#' name_backbone(name='Helianthus', rank='genus', kingdom='plants') |
49 |
| -#' name_backbone(name='Poa', rank='genus', family='Poaceae') |
50 |
| -#' |
51 |
| -#' # Verbose - gives back alternatives |
52 |
| -#' ## Strictness |
53 |
| -#' name_backbone_verbose(name='Poa', kingdom='plants', |
54 |
| -#' strict=FALSE) |
55 |
| -#' name_backbone_verbose(name='Helianthus annuus', kingdom='plants', |
56 |
| -#' strict=TRUE) |
| 66 | +#' name_backbone("Calopteryx splendens") |
| 67 | +#' name_backbone("Calopteryx splendens", kingdom = "Animalia") |
| 68 | +#' name_backbone("Calopteryx splendens", kingdom = "Animalia", verbose = TRUE) |
| 69 | +#' name_backbone_verbose("Calopteryx splendens", kingdom = "Animalia") |
| 70 | +#' name_backbone("Calopteryx splendens", kingdom = "Plantae") |
57 | 71 | #'
|
58 |
| -#' # Non-existent name - returns list of lenght 3 stating no match |
59 |
| -#' name_backbone(name='Aso') |
60 |
| -#' name_backbone(name='Oenante') |
61 |
| -#' |
62 |
| -#' # Pass on curl options |
63 |
| -#' name_backbone(name='Oenante', curlopts = list(verbose=TRUE)) |
64 | 72 | #' }
|
65 |
| -name_backbone <- function(name, rank=NULL, kingdom=NULL, phylum=NULL, |
66 |
| - class=NULL, order=NULL, family=NULL, genus=NULL, strict=FALSE, verbose=FALSE, |
67 |
| - start=NULL, limit=100, curlopts = list()) { |
68 |
| - |
69 |
| - # pchk(verbose, "name_backbone") |
70 |
| - url <- paste0(gbif_base(), '/species/match') |
| 73 | +name_backbone <- function( |
| 74 | + name, |
| 75 | + rank = NULL, |
| 76 | + kingdom = NULL, |
| 77 | + phylum = NULL, |
| 78 | + class = NULL, |
| 79 | + order = NULL, |
| 80 | + superfamily = NULL, |
| 81 | + family = NULL, |
| 82 | + subfamily = NULL, |
| 83 | + tribe = NULL, |
| 84 | + subtribe = NULL, |
| 85 | + genus = NULL, |
| 86 | + subgenus = NULL, |
| 87 | + species = NULL, |
| 88 | + usageKey = NULL, |
| 89 | + taxonID = NULL, |
| 90 | + taxonConceptID = NULL, |
| 91 | + scientificNameID = NULL, |
| 92 | + scientificNameAuthorship = NULL, |
| 93 | + genericName = NULL, |
| 94 | + specificEpithet = NULL, |
| 95 | + infraspecificEpithet = NULL, |
| 96 | + verbatimTaxonRank = NULL, |
| 97 | + exclude = NULL, |
| 98 | + strict = NULL, |
| 99 | + verbose = FALSE, |
| 100 | + checklistKey = NULL, |
| 101 | + start = NULL, |
| 102 | + limit = NULL, |
| 103 | + curlopts = list(http_version=2) |
| 104 | +) { |
| 105 | + url <- paste0('https://api.gbif.org/v2', '/species/match') |
71 | 106 | args <- rgbif_compact(
|
72 |
| - list(name=name, rank=rank, kingdom=kingdom, phylum=phylum, |
73 |
| - class=class, order=order, family=family, genus=genus, |
74 |
| - strict=as_log(strict), verbose = verbose, offset=start, limit=limit)) |
| 107 | + list( |
| 108 | + scientificName = name, |
| 109 | + scientificNameAuthorship = scientificNameAuthorship, |
| 110 | + genericName = genericName, |
| 111 | + specificEpithet = specificEpithet, |
| 112 | + infraspecificEpithet = infraspecificEpithet, |
| 113 | + taxonRank = rank, |
| 114 | + verbatimTaxonRank = verbatimTaxonRank, |
| 115 | + kingdom = kingdom, |
| 116 | + phylum = phylum, |
| 117 | + class = class, |
| 118 | + order = order, |
| 119 | + superfamily = superfamily, |
| 120 | + family = family, |
| 121 | + subfamily = subfamily, |
| 122 | + tribe = tribe, |
| 123 | + subtribe = subtribe, |
| 124 | + genus = genus, |
| 125 | + subgenus = subgenus, |
| 126 | + species = species, |
| 127 | + exclude = exclude, |
| 128 | + strict = strict, |
| 129 | + verbose = verbose, |
| 130 | + checklistKey = checklistKey, |
| 131 | + start = start, |
| 132 | + limit = limit |
| 133 | + ) |
| 134 | + ) |
75 | 135 | tt <- gbif_GET(url, args, FALSE, curlopts)
|
76 |
| - input_args_clean <- args[!names(args) %in% c("strict","verbose","start","limit","curlopts")] |
77 |
| - input_args_clean <- stats::setNames(input_args_clean,paste0("verbatim_",names(input_args_clean))) |
78 |
| - tt <- c(tt,input_args_clean) |
79 |
| - if(verbose) { |
80 |
| - alternatives <- tt[["alternatives"]] |
81 |
| - alternatives <- lapply(alternatives,function(x) c(x,input_args_clean)) |
82 |
| - alternatives <- bind_rows(lapply(alternatives,tibble::as_tibble)) |
83 |
| - accepted <- tibble::as_tibble(tt) |
84 |
| - out <- bind_rows(list(accepted,alternatives)) |
85 |
| - out <- out[!colnames(out) %in% c("alternatives", "note")] |
| 136 | + if(!verbose) { |
| 137 | + out <- process_name_backbone_output(tt,args,checklistKey = checklistKey) |
86 | 138 | } else {
|
87 |
| - out <- tibble::as_tibble(tt[!names(tt) %in% c("alternatives", "note")]) |
| 139 | + alternatives <- bind_rows(lapply(tt$diagnostics$alternatives, function(x) |
| 140 | + process_name_backbone_output(x,args,checklistKey = checklistKey)) |
| 141 | + ) |
| 142 | + tt$diagnostics$alternatives <- NULL |
| 143 | + accepted <- process_name_backbone_output(tt,args,checklistKey = checklistKey) |
| 144 | + out <- bind_rows(list(accepted,alternatives)) |
88 | 145 | }
|
89 |
| - col_idx <- grep("verbatim_", names(out)) |
90 |
| - ordering <- c((1:ncol(out))[-col_idx],col_idx) |
91 |
| - out <- unique(out[, ordering]) |
92 |
| - structure(out, args = args, note = tt$note, type = "single") |
| 146 | + structure(out, args = args, note = tt$diagnostics$note, type = "single") |
93 | 147 | }
|
94 |
| - |
95 | 148 |
|
96 | 149 | #' @export
|
97 | 150 | #' @rdname name_backbone
|
98 |
| -name_backbone_verbose <- function(name, rank=NULL, kingdom=NULL, phylum=NULL, |
99 |
| - class=NULL, order=NULL, family=NULL, genus=NULL, strict=FALSE, |
100 |
| - start=NULL, limit=100, curlopts = list()) { |
101 |
| - |
102 |
| - url <- paste0(gbif_base(), '/species/match') |
| 151 | +name_backbone_verbose <- function(name, |
| 152 | + rank = NULL, |
| 153 | + kingdom = NULL, |
| 154 | + phylum = NULL, |
| 155 | + class = NULL, |
| 156 | + order = NULL, |
| 157 | + superfamily = NULL, |
| 158 | + family = NULL, |
| 159 | + subfamily = NULL, |
| 160 | + tribe = NULL, |
| 161 | + subtribe = NULL, |
| 162 | + genus = NULL, |
| 163 | + subgenus = NULL, |
| 164 | + species = NULL, |
| 165 | + usageKey = NULL, |
| 166 | + taxonID = NULL, |
| 167 | + taxonConceptID = NULL, |
| 168 | + scientificNameID = NULL, |
| 169 | + scientificNameAuthorship = NULL, |
| 170 | + genericName = NULL, |
| 171 | + specificEpithet = NULL, |
| 172 | + infraspecificEpithet = NULL, |
| 173 | + verbatimTaxonRank = NULL, |
| 174 | + exclude = NULL, |
| 175 | + strict = NULL, |
| 176 | + checklistKey = NULL, |
| 177 | + start = NULL, |
| 178 | + limit = NULL, |
| 179 | + curlopts = list(http_version=2) |
| 180 | + ) { |
| 181 | + url <- paste0('https://api.gbif.org/v2', '/species/match') |
103 | 182 | args <- rgbif_compact(
|
104 |
| - list(name=name, rank=rank, kingdom=kingdom, phylum=phylum, |
105 |
| - class=class, order=order, family=family, genus=genus, |
106 |
| - strict=as_log(strict), verbose=TRUE, offset=start, limit=limit)) |
| 183 | + list( |
| 184 | + scientificName = name, |
| 185 | + scientificNameAuthorship = scientificNameAuthorship, |
| 186 | + genericName = genericName, |
| 187 | + specificEpithet = specificEpithet, |
| 188 | + infraspecificEpithet = infraspecificEpithet, |
| 189 | + taxonRank = rank, |
| 190 | + verbatimTaxonRank = verbatimTaxonRank, |
| 191 | + kingdom = kingdom, |
| 192 | + phylum = phylum, |
| 193 | + class = class, |
| 194 | + order = order, |
| 195 | + superfamily = superfamily, |
| 196 | + family = family, |
| 197 | + subfamily = subfamily, |
| 198 | + tribe = tribe, |
| 199 | + subtribe = subtribe, |
| 200 | + genus = genus, |
| 201 | + subgenus = subgenus, |
| 202 | + species = species, |
| 203 | + exclude = exclude, |
| 204 | + strict = strict, |
| 205 | + verbose = TRUE, |
| 206 | + checklistKey = checklistKey, |
| 207 | + start = start, |
| 208 | + limit = limit |
| 209 | + ) |
| 210 | + ) |
107 | 211 | tt <- gbif_GET(url, args, FALSE, curlopts)
|
108 |
| - alt <- tibble::as_tibble(data.table::setDF( |
109 |
| - data.table::rbindlist( |
110 |
| - lapply(tt$alternatives, function(x) |
111 |
| - lapply(x, function(x) if (length(x) == 0) NA else x)), |
112 |
| - use.names = TRUE, fill = TRUE))) |
113 |
| - dat <- tibble::as_tibble( |
114 |
| - data.frame(tt[!names(tt) %in% c("alternatives", "note")], |
115 |
| - stringsAsFactors = FALSE)) |
| 212 | + alt <- bind_rows(lapply(tt$diagnostics$alternatives, function(x) |
| 213 | + process_name_backbone_output(x, args, checklistKey = NULL)) |
| 214 | + ) |
| 215 | + tt$diagnostics$alternatives <- NULL |
| 216 | + dat <- process_name_backbone_output(tt, args, checklistKey = checklistKey) |
116 | 217 | out <- list(data = dat, alternatives = alt)
|
117 |
| - structure(out, args = args, note = tt$note, type = "single") |
| 218 | + structure(out, args = args, note = tt$diagnostics$note, type = "single") |
118 | 219 | }
|
119 | 220 |
|
| 221 | +process_name_backbone_output <- function(tt, args, checklistKey = NULL) { |
| 222 | + usage <- if (!is.null(tt$usage)) { |
| 223 | + u <- tibble::as_tibble(tt$usage) |
| 224 | + colnames(u)[colnames(u) == "key"] <- "usageKey" |
| 225 | + colnames(u)[colnames(u) == "name"] <- "scientificName" |
| 226 | + u |
| 227 | + } else { |
| 228 | + NULL |
| 229 | + } |
| 230 | + diagnostics <- if (!is.null(tt$diagnostics)) { |
| 231 | + tt$diagnostics["timings"] <- NULL |
| 232 | + tt$diagnostics["issues"] <- NULL |
| 233 | + d <- tibble::as_tibble(tt$diagnostics) |
| 234 | + d |
| 235 | + } else { |
| 236 | + NULL |
| 237 | + } |
| 238 | + classification <- if (!is.null(tt$classification)) { |
| 239 | + c <- bind_rows(lapply(tt$classification, tibble::as_tibble)) |
| 240 | + nv <- setNames(c$name, tolower(c$rank)) |
| 241 | + kv <- setNames(c$key, paste0(tolower(c$rank), "Key")) |
| 242 | + c <- tibble::as_tibble(as.list(c(nv, kv))) |
| 243 | + c |
| 244 | + } else { |
| 245 | + NULL |
| 246 | + } |
| 247 | + synonym <- if (!is.null(tt$synonym)) { |
| 248 | + tibble::as_tibble(tt$synonym) |
| 249 | + } else { |
| 250 | + NULL |
| 251 | + } |
| 252 | + verbatim <- if (!is.null(args)) { |
| 253 | + input_args_clean <- args[!names(args) %in% |
| 254 | + c("strict","verbose","start","limit","curlopts")] |
| 255 | + v <- stats::setNames(input_args_clean, |
| 256 | + paste0("verbatim_",names(input_args_clean))) |
| 257 | + names(v)[names(v) == "verbatim_taxonRank"] <- "verbatim_rank" |
| 258 | + names(v)[names(v) == "verbatim_scientificName"] <- "verbatim_name" |
| 259 | + tibble::as_tibble(v) |
| 260 | + } else { |
| 261 | + NULL |
| 262 | + } |
| 263 | + |
| 264 | + out <- do.call("cbind", rgbif_compact(list(usage, |
| 265 | + diagnostics, |
| 266 | + classification, |
| 267 | + synonym, |
| 268 | + verbatim))) |
| 269 | + tibble::as_tibble(out) |
| 270 | +} |
120 | 271 |
|
0 commit comments