Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ export(gbif_photos)
export(gbif_wkt2bbox)
export(installations)
export(lit_count)
export(lit_export)
export(lit_search)
export(map_fetch)
export(mvt_fetch)
Expand Down
94 changes: 90 additions & 4 deletions R/lit_search.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@
#' @param journalSource (character) Journal of publication.
#' @param journalPublisher (character) Publisher of journal.
#' @param flatten (logical) should any lists in the resulting data be flattened
#' into comma-seperated strings?
#' into comma-seperated strings? Ignored in lit_export.
#' @param abstract (logical) should the abstract be included in the results.
#' Ignored for lit_search.
#' @param limit how many records to return. limit=NULL will fetch up to 10,000.
#' @param curlopts list of named curl options passed on to HttpClient.
#' see curl::curl_options for curl options.
Expand Down Expand Up @@ -127,6 +129,7 @@ lit_search <- function(
journalSource=NULL,
journalPublisher=NULL,
flatten=TRUE,
abstract=FALSE,
limit=NULL,
curlopts = list()
) {
Expand Down Expand Up @@ -226,7 +229,7 @@ lit_search <- function(
urls <- sapply(urls,function(x) gsub("\\[|\\]","",x)) # remove any square brackets
# make request
ll <- gbif_async_get(urls,parse=TRUE)
data <- process_lit_async_results(ll,flatten=flatten)
data <- process_lit_async_results(ll,flatten=flatten,abstract=abstract)
meta <- rgbif_compact(ll[[length(urls)]])
# clean results
meta$results <- NULL
Expand Down Expand Up @@ -282,12 +285,95 @@ lit_count <- function(...) {
count
}

process_lit_async_results <- function(ll,flatten=TRUE) {

#' @export
#' @rdname lit_search
lit_export <- function(
q=NULL,
countriesOfResearcher=NULL,
countriesOfCoverage=NULL,
literatureType=NULL,
relevance=NULL,
year=NULL,
topics=NULL,
datasetKey=NULL,
publishingOrg=NULL,
peerReview=NULL,
openAccess=NULL,
downloadKey=NULL,
doi=NULL,
journalSource=NULL,
journalPublisher=NULL,
flatten=NULL,
abstract=FALSE,
limit=NULL,
curlopts = list()
) {

if(!is_uuid(datasetKey) & !is.null(datasetKey)) stop("'datasetKey' should be a GBIF dataset uuid.")
if(!is_uuid(publishingOrg) & !is.null(publishingOrg)) stop("'publishingOrg' should be a GBIF publisher uuid.")
if(!is_download_key(downloadKey) & !is.null(downloadKey)) stop("'downloadKey' should be a GBIF downloadkey.")

assert(q,"character")
assert(countriesOfResearcher,"character")
assert(countriesOfCoverage,"character")
assert(literatureType,"character")
assert(relevance,"character")
assert(topics,"character")
assert(peerReview,"logical")
assert(openAccess,"logical")
assert(doi,"character")
assert(journalSource,"character")
assert(journalPublisher,"character")
if(!is.null(flatten)) message("flatten argument is ignored for lit_export, results are returned flat.")
if(!is.null(limit)) message("limit argument is ignored for lit_export, all results are returned.")
if(!length(curlopts)==0) message("curlopts argument are ignored for lit_export")
# https://api.gbif.org/v1/literature/export?format=TSV&gbifDownloadKey=0138953-210914110416597

args <- rgbif_compact(
list(q = q,
year = year,
peerReview = peerReview,
openAccess = openAccess
))

args <- rgbif_compact(
c(args,
convmany(relevance),
convmany(countriesOfResearcher),
convmany(countriesOfCoverage),
convmany(literatureType),
convmany(topics),
convmany_rename(datasetKey,"gbifDatasetKey"),
convmany_rename(publishingOrg,"publishingOrganizationKey"),
convmany_rename(downloadKey,"gbifDownloadKey"),
convmany(doi),
convmany_rename(journalSource,"source"),
convmany_rename(journalPublisher,"publisher")
))

url_query <- paste0(names(args),"=",args,collapse="&")
url_query <- utils::URLencode(url_query)
url <- paste0(gbif_base(),"/literature/export?",url_query)
temp_file <- tempfile()
utils::download.file(url,destfile=temp_file,quiet=TRUE)
out <- tibble::as_tibble(data.table::fread(temp_file, showProgress=FALSE))
if(!abstract) out$abstract <- NULL
colnames(out) <- to_camel(colnames(out))
out[] <- lapply(out, as.character)
out$peerReview <- as.logical(out$peerReview)
out$openAccess <- as.logical(out$openAccess)
out[out == ""] <- NA
out
}


process_lit_async_results <- function(ll,flatten=TRUE,abstract=FALSE) {
data_list <- lapply(ll,function(x) x$results)
# handle complex identifiers
data_list <- lapply(data_list,function(x) tibble::tibble(x,x$identifiers))
for(i in 1:length(data_list)) data_list[[i]]$identifiers <- NULL
for(i in 1:length(data_list)) data_list[[i]]$abstract <- NULL
if(!abstract) for(i in 1:length(data_list)) data_list[[i]]$abstract <- NULL
data <- bind_rows(data_list)
# data
if(flatten) {
Expand Down
29 changes: 28 additions & 1 deletion man/lit_search.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion man/organizations.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

35 changes: 35 additions & 0 deletions tests/testthat/test-lit_search.R
Original file line number Diff line number Diff line change
Expand Up @@ -139,4 +139,39 @@ test_that("lit_count works as expected", {

})

test_that("lit_export works as expected", {
skip_on_ci()
skip_on_cran()

# export with no filters
ee <- lit_export(datasetKey="50c9509d-22c7-4a22-a47d-8c48425ef4a7")
expect_is(ee,"tbl_df")
expect_true(nrow(ee) > 6000) # don't expect citations to go down
expect_true(ncol(ee) > 15) # don't expect columns to go down
expect_true(all(c("title","id","gbifDownloadKey") %in% colnames(ee)))

aa <- lit_export(year=2003,abstract=TRUE)
expect_is(aa,"tbl_df")
expect_true(nrow(aa) > 5) # don't expect citations to go down
expect_true(ncol(aa) > 15) # don't expect columns to go down
expect_true(all(c("title","id","gbifDownloadKey","abstract") %in% colnames(aa)))

yy <- lit_export(year="2011,2015")
expect_is(yy,"tbl_df")
expect_true(nrow(yy) > 3000) # don't expect citations to go down
expect_true(nrow(yy) < 57000) # shouldn't return a really large number
expect_true(ncol(yy) > 15) # don't expect columns to go down
expect_true(all(c("title","id","gbifDownloadKey") %in% colnames(yy)))

# complex example using many arguements
cc <- lit_export(year="2000,2020",countriesOfResearcher="US",
topics="BIODIVERSITY_SCIENCE",
relevance="GBIF_USED;GBIF_CITED")
expect_is(cc,"tbl_df")
expect_true(ncol(cc) > 15) # don't expect columns to go down
expect_true(all(c("title","id","gbifDownloadKey") %in% colnames(cc)))
expect_true(all(grepl("UNITED_STATES",cc$countriesOfResearcher)))

})


Loading