ropensci · jhnwllr · Jun 24, 2025 · Jun 24, 2025 · Jun 24, 2025 · Jun 24, 2025
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -8,7 +8,7 @@ Description: A programmatic interface to the Web Service methods
     retrieving information on data providers, getting species occurrence
     records, getting counts of occurrence records, and using the GBIF
     tile map service to make rasters summarizing huge amounts of data.
-Version: 3.8.2.1
+Version: 3.8.2.2
 License: MIT + file LICENSE
 Authors@R: c(
     person("Scott", "Chamberlain", role = "aut", comment = c("0000-0003-1444-9135")),

diff --git a/NAMESPACE b/NAMESPACE
@@ -71,6 +71,7 @@ export(gbif_photos)
 export(gbif_wkt2bbox)
 export(installations)
 export(lit_count)
+export(lit_export)
 export(lit_search)
 export(map_fetch)
 export(mvt_fetch)

diff --git a/R/lit_search.R b/R/lit_search.R
@@ -28,7 +28,9 @@
 #' @param journalSource (character) Journal of publication.
 #' @param journalPublisher (character) Publisher of journal.
 #' @param flatten (logical) should any lists in the resulting data be flattened
-#' into comma-seperated strings?
+#' into comma-seperated strings? Ignored in lit_export.
+#' @param abstract (logical) should the abstract be included in the results. 
+#' Ignored for lit_search. 
 #' @param limit how many records to return. limit=NULL will fetch up to 10,000. 
 #' @param curlopts list of named curl options passed on to HttpClient. 
 #' see curl::curl_options for curl options.
@@ -127,6 +129,7 @@ lit_search <- function(
     journalSource=NULL, 
     journalPublisher=NULL,
     flatten=TRUE,
+    abstract=FALSE,
     limit=NULL,
     curlopts = list()
 ) {
@@ -226,7 +229,7 @@ lit_search <- function(
   urls <- sapply(urls,function(x) gsub("\\[|\\]","",x)) # remove any square brackets
   # make request 
   ll <- gbif_async_get(urls,parse=TRUE)
-  data <- process_lit_async_results(ll,flatten=flatten)
+  data <- process_lit_async_results(ll,flatten=flatten,abstract=abstract)
   meta <- rgbif_compact(ll[[length(urls)]])
   # clean results
   meta$results <- NULL
@@ -282,12 +285,95 @@ lit_count <- function(...) {
   count
 }
 
-process_lit_async_results <- function(ll,flatten=TRUE) {
+
+#' @export
+#' @rdname lit_search
+lit_export <- function(
+    q=NULL, 
+    countriesOfResearcher=NULL, 
+    countriesOfCoverage=NULL, 
+    literatureType=NULL, 
+    relevance=NULL, 
+    year=NULL, 
+    topics=NULL, 
+    datasetKey=NULL, 
+    publishingOrg=NULL, 
+    peerReview=NULL, 
+    openAccess=NULL, 
+    downloadKey=NULL, 
+    doi=NULL, 
+    journalSource=NULL, 
+    journalPublisher=NULL,
+    flatten=NULL,
+    abstract=FALSE,
+    limit=NULL,
+    curlopts = list()
+  ) {
+
+  if(!is_uuid(datasetKey) & !is.null(datasetKey)) stop("'datasetKey' should be a GBIF dataset uuid.")
+  if(!is_uuid(publishingOrg) & !is.null(publishingOrg)) stop("'publishingOrg' should be a GBIF publisher uuid.")
+  if(!is_download_key(downloadKey) & !is.null(downloadKey)) stop("'downloadKey' should be a GBIF downloadkey.")
+
+  assert(q,"character")
+  assert(countriesOfResearcher,"character")
+  assert(countriesOfCoverage,"character")
+  assert(literatureType,"character")
+  assert(relevance,"character")
+  assert(topics,"character")
+  assert(peerReview,"logical")
+  assert(openAccess,"logical")
+  assert(doi,"character")
+  assert(journalSource,"character")
+  assert(journalPublisher,"character")
+  if(!is.null(flatten)) message("flatten argument is ignored for lit_export, results are returned flat.")
+  if(!is.null(limit)) message("limit argument is ignored for lit_export, all results are returned.")
+  if(length(curlopts) != 0) message("curlopts argument are ignored for lit_export") 
+  # https://api.gbif.org/v1/literature/export?format=TSV&gbifDownloadKey=0138953-210914110416597
+
+  args <- rgbif_compact(
+    list(q = q,
+         year = year,
+         peerReview = peerReview,
+         openAccess = openAccess
+        ))
+
+  args <- rgbif_compact(
+            c(args,
+            convmany(relevance),
+            convmany(countriesOfResearcher),
+            convmany(countriesOfCoverage),
+            convmany(literatureType),
+            convmany(topics),
+            convmany_rename(datasetKey,"gbifDatasetKey"),
+            convmany_rename(publishingOrg,"publishingOrganizationKey"),
+            convmany_rename(downloadKey,"gbifDownloadKey"), 
+            convmany(doi), 
+            convmany_rename(journalSource,"source"), 
+            convmany_rename(journalPublisher,"publisher")
+            ))
+
+  url_query <- paste0(names(args),"=",args,collapse="&")
+  url_query <- utils::URLencode(url_query) 
+  url <- paste0(gbif_base(),"/literature/export?",url_query)
+  temp_file <- tempfile()
+  utils::download.file(url,destfile=temp_file,quiet=TRUE)
+  out <- tibble::as_tibble(data.table::fread(temp_file, showProgress=FALSE))
+  if(!abstract) out$abstract <- NULL
+  colnames(out) <- to_camel(colnames(out))
+  out[] <- lapply(out, as.character)
+  out$peerReview <- as.logical(out$peerReview)
+  out$openAccess <- as.logical(out$openAccess)
+  out[out == ""] <- NA
+  out
+}
+
+
+process_lit_async_results <- function(ll,flatten=TRUE,abstract=FALSE) {
   data_list <- lapply(ll,function(x) x$results)
   # handle complex identifiers
   data_list <- lapply(data_list,function(x) tibble::tibble(x,x$identifiers))
   for(i in 1:length(data_list)) data_list[[i]]$identifiers <- NULL
-  for(i in 1:length(data_list)) data_list[[i]]$abstract <- NULL
+  if(!abstract) for(i in 1:length(data_list)) data_list[[i]]$abstract <- NULL
   data <- bind_rows(data_list)
   # data
   if(flatten) {

diff --git a/man/lit_search.Rd b/man/lit_search.Rd
diff --git a/man/organizations.Rd b/man/organizations.Rd
diff --git a/tests/testthat/test-lit_search.R b/tests/testthat/test-lit_search.R
@@ -139,4 +139,39 @@ test_that("lit_count works as expected", {
 
 })
 
+test_that("lit_export works as expected", {
+  skip_on_ci()
+  skip_on_cran()
+
+  # export with no filters
+  ee <- lit_export(datasetKey="50c9509d-22c7-4a22-a47d-8c48425ef4a7")
+  expect_is(ee,"tbl_df")
+  expect_true(nrow(ee) > 6000) # don't expect citations to go down
+  expect_true(ncol(ee) > 15) # don't expect columns to go down
+  expect_true(all(c("title","id","gbifDownloadKey") %in% colnames(ee)))
+
+  aa <- lit_export(year=2003,abstract=TRUE)
+  expect_is(aa,"tbl_df")
+  expect_true(nrow(aa) > 5) # don't expect citations to go down
+  expect_true(ncol(aa) > 15) # don't expect columns to go down
+  expect_true(all(c("title","id","gbifDownloadKey","abstract") %in% colnames(aa)))
+
+  yy <- lit_export(year="2011,2015")
+  expect_is(yy,"tbl_df")
+  expect_true(nrow(yy) > 3000) # don't expect citations to go down
+  expect_true(nrow(yy) < 57000) # shouldn't return a really large number
+  expect_true(ncol(yy) > 15) # don't expect columns to go down
+  expect_true(all(c("title","id","gbifDownloadKey") %in% colnames(yy)))
+
+  # complex example using many arguments 
+  cc <- lit_export(year="2000,2020",countriesOfResearcher="US",
+                   topics="BIODIVERSITY_SCIENCE",
+                   relevance="GBIF_USED;GBIF_CITED")
+  expect_is(cc,"tbl_df")
+  expect_true(ncol(cc) > 15) # don't expect columns to go down
+  expect_true(all(c("title","id","gbifDownloadKey") %in% colnames(cc)))
+  expect_true(all(grepl("UNITED_STATES",cc$countriesOfResearcher)))
+
+})
+