Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Description: A programmatic interface to the Web Service methods
retrieving information on data providers, getting species occurrence
records, getting counts of occurrence records, and using the GBIF
tile map service to make rasters summarizing huge amounts of data.
Version: 3.8.2.1
Version: 3.8.2.2
License: MIT + file LICENSE
Authors@R: c(
person("Scott", "Chamberlain", role = "aut", comment = c("0000-0003-1444-9135")),
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ export(gbif_photos)
export(gbif_wkt2bbox)
export(installations)
export(lit_count)
export(lit_export)
export(lit_search)
export(map_fetch)
export(mvt_fetch)
Expand Down
94 changes: 90 additions & 4 deletions R/lit_search.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@
#' @param journalSource (character) Journal of publication.
#' @param journalPublisher (character) Publisher of journal.
#' @param flatten (logical) should any lists in the resulting data be flattened
#' into comma-seperated strings?
#' into comma-seperated strings? Ignored in lit_export.
#' @param abstract (logical) should the abstract be included in the results.
#' Ignored for lit_search.
#' @param limit how many records to return. limit=NULL will fetch up to 10,000.
#' @param curlopts list of named curl options passed on to HttpClient.
#' see curl::curl_options for curl options.
Expand Down Expand Up @@ -127,6 +129,7 @@ lit_search <- function(
journalSource=NULL,
journalPublisher=NULL,
flatten=TRUE,
abstract=FALSE,
limit=NULL,
curlopts = list()
) {
Expand Down Expand Up @@ -226,7 +229,7 @@ lit_search <- function(
urls <- sapply(urls,function(x) gsub("\\[|\\]","",x)) # remove any square brackets
# make request
ll <- gbif_async_get(urls,parse=TRUE)
data <- process_lit_async_results(ll,flatten=flatten)
data <- process_lit_async_results(ll,flatten=flatten,abstract=abstract)
meta <- rgbif_compact(ll[[length(urls)]])
# clean results
meta$results <- NULL
Expand Down Expand Up @@ -282,12 +285,95 @@ lit_count <- function(...) {
count
}

process_lit_async_results <- function(ll,flatten=TRUE) {

#' @export
#' @rdname lit_search
lit_export <- function(
q=NULL,
countriesOfResearcher=NULL,
countriesOfCoverage=NULL,
literatureType=NULL,
relevance=NULL,
year=NULL,
topics=NULL,
datasetKey=NULL,
publishingOrg=NULL,
peerReview=NULL,
openAccess=NULL,
downloadKey=NULL,
doi=NULL,
journalSource=NULL,
journalPublisher=NULL,
flatten=NULL,
abstract=FALSE,
limit=NULL,
curlopts = list()
) {

if(!is_uuid(datasetKey) & !is.null(datasetKey)) stop("'datasetKey' should be a GBIF dataset uuid.")
if(!is_uuid(publishingOrg) & !is.null(publishingOrg)) stop("'publishingOrg' should be a GBIF publisher uuid.")
if(!is_download_key(downloadKey) & !is.null(downloadKey)) stop("'downloadKey' should be a GBIF downloadkey.")

assert(q,"character")
assert(countriesOfResearcher,"character")
assert(countriesOfCoverage,"character")
assert(literatureType,"character")
assert(relevance,"character")
assert(topics,"character")
assert(peerReview,"logical")
assert(openAccess,"logical")
assert(doi,"character")
assert(journalSource,"character")
assert(journalPublisher,"character")
if(!is.null(flatten)) message("flatten argument is ignored for lit_export, results are returned flat.")
if(!is.null(limit)) message("limit argument is ignored for lit_export, all results are returned.")
if(length(curlopts) != 0) message("curlopts argument are ignored for lit_export")
# https://api.gbif.org/v1/literature/export?format=TSV&gbifDownloadKey=0138953-210914110416597

args <- rgbif_compact(
list(q = q,
year = year,
peerReview = peerReview,
openAccess = openAccess
))

args <- rgbif_compact(
c(args,
convmany(relevance),
convmany(countriesOfResearcher),
convmany(countriesOfCoverage),
convmany(literatureType),
convmany(topics),
convmany_rename(datasetKey,"gbifDatasetKey"),
convmany_rename(publishingOrg,"publishingOrganizationKey"),
convmany_rename(downloadKey,"gbifDownloadKey"),
convmany(doi),
convmany_rename(journalSource,"source"),
convmany_rename(journalPublisher,"publisher")
))

url_query <- paste0(names(args),"=",args,collapse="&")
url_query <- utils::URLencode(url_query)
url <- paste0(gbif_base(),"/literature/export?",url_query)
temp_file <- tempfile()
utils::download.file(url,destfile=temp_file,quiet=TRUE)
out <- tibble::as_tibble(data.table::fread(temp_file, showProgress=FALSE))
if(!abstract) out$abstract <- NULL
colnames(out) <- to_camel(colnames(out))
out[] <- lapply(out, as.character)
out$peerReview <- as.logical(out$peerReview)
out$openAccess <- as.logical(out$openAccess)
out[out == ""] <- NA
out
}


process_lit_async_results <- function(ll,flatten=TRUE,abstract=FALSE) {
data_list <- lapply(ll,function(x) x$results)
# handle complex identifiers
data_list <- lapply(data_list,function(x) tibble::tibble(x,x$identifiers))
for(i in 1:length(data_list)) data_list[[i]]$identifiers <- NULL
for(i in 1:length(data_list)) data_list[[i]]$abstract <- NULL
if(!abstract) for(i in 1:length(data_list)) data_list[[i]]$abstract <- NULL
data <- bind_rows(data_list)
# data
if(flatten) {
Expand Down
29 changes: 28 additions & 1 deletion man/lit_search.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion man/organizations.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

35 changes: 35 additions & 0 deletions tests/testthat/test-lit_search.R
Original file line number Diff line number Diff line change
Expand Up @@ -139,4 +139,39 @@ test_that("lit_count works as expected", {

})

test_that("lit_export works as expected", {
skip_on_ci()
skip_on_cran()

# export with no filters
ee <- lit_export(datasetKey="50c9509d-22c7-4a22-a47d-8c48425ef4a7")
expect_is(ee,"tbl_df")
expect_true(nrow(ee) > 6000) # don't expect citations to go down
expect_true(ncol(ee) > 15) # don't expect columns to go down
expect_true(all(c("title","id","gbifDownloadKey") %in% colnames(ee)))

aa <- lit_export(year=2003,abstract=TRUE)
expect_is(aa,"tbl_df")
expect_true(nrow(aa) > 5) # don't expect citations to go down
expect_true(ncol(aa) > 15) # don't expect columns to go down
expect_true(all(c("title","id","gbifDownloadKey","abstract") %in% colnames(aa)))

yy <- lit_export(year="2011,2015")
expect_is(yy,"tbl_df")
expect_true(nrow(yy) > 3000) # don't expect citations to go down
expect_true(nrow(yy) < 57000) # shouldn't return a really large number
expect_true(ncol(yy) > 15) # don't expect columns to go down
expect_true(all(c("title","id","gbifDownloadKey") %in% colnames(yy)))

# complex example using many arguments
cc <- lit_export(year="2000,2020",countriesOfResearcher="US",
topics="BIODIVERSITY_SCIENCE",
relevance="GBIF_USED;GBIF_CITED")
expect_is(cc,"tbl_df")
expect_true(ncol(cc) > 15) # don't expect columns to go down
expect_true(all(c("title","id","gbifDownloadKey") %in% colnames(cc)))
expect_true(all(grepl("UNITED_STATES",cc$countriesOfResearcher)))

})


Loading