Skip to content

Commit 988215f

Browse files
author
John Waller
committed
adding support for lit_export #695
1 parent d09b0bd commit 988215f

File tree

5 files changed

+155
-6
lines changed

5 files changed

+155
-6
lines changed

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ export(gbif_photos)
7171
export(gbif_wkt2bbox)
7272
export(installations)
7373
export(lit_count)
74+
export(lit_export)
7475
export(lit_search)
7576
export(map_fetch)
7677
export(mvt_fetch)

R/lit_search.R

Lines changed: 90 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,9 @@
2828
#' @param journalSource (character) Journal of publication.
2929
#' @param journalPublisher (character) Publisher of journal.
3030
#' @param flatten (logical) should any lists in the resulting data be flattened
31-
#' into comma-seperated strings?
31+
#' into comma-seperated strings? Ignored in lit_export.
32+
#' @param abstract (logical) should the abstract be included in the results.
33+
#' Ignored for lit_search.
3234
#' @param limit how many records to return. limit=NULL will fetch up to 10,000.
3335
#' @param curlopts list of named curl options passed on to HttpClient.
3436
#' see curl::curl_options for curl options.
@@ -127,6 +129,7 @@ lit_search <- function(
127129
journalSource=NULL,
128130
journalPublisher=NULL,
129131
flatten=TRUE,
132+
abstract=FALSE,
130133
limit=NULL,
131134
curlopts = list()
132135
) {
@@ -226,7 +229,7 @@ lit_search <- function(
226229
urls <- sapply(urls,function(x) gsub("\\[|\\]","",x)) # remove any square brackets
227230
# make request
228231
ll <- gbif_async_get(urls,parse=TRUE)
229-
data <- process_lit_async_results(ll,flatten=flatten)
232+
data <- process_lit_async_results(ll,flatten=flatten,abstract=abstract)
230233
meta <- rgbif_compact(ll[[length(urls)]])
231234
# clean results
232235
meta$results <- NULL
@@ -282,12 +285,95 @@ lit_count <- function(...) {
282285
count
283286
}
284287

285-
process_lit_async_results <- function(ll,flatten=TRUE) {
288+
289+
#' @export
290+
#' @rdname lit_search
291+
lit_export <- function(
292+
q=NULL,
293+
countriesOfResearcher=NULL,
294+
countriesOfCoverage=NULL,
295+
literatureType=NULL,
296+
relevance=NULL,
297+
year=NULL,
298+
topics=NULL,
299+
datasetKey=NULL,
300+
publishingOrg=NULL,
301+
peerReview=NULL,
302+
openAccess=NULL,
303+
downloadKey=NULL,
304+
doi=NULL,
305+
journalSource=NULL,
306+
journalPublisher=NULL,
307+
flatten=NULL,
308+
abstract=FALSE,
309+
limit=NULL,
310+
curlopts = NULL
311+
) {
312+
313+
if(!is_uuid(datasetKey) & !is.null(datasetKey)) stop("'datasetKey' should be a GBIF dataset uuid.")
314+
if(!is_uuid(publishingOrg) & !is.null(publishingOrg)) stop("'publishingOrg' should be a GBIF publisher uuid.")
315+
if(!is_download_key(downloadKey) & !is.null(downloadKey)) stop("'downloadKey' should be a GBIF downloadkey.")
316+
317+
assert(q,"character")
318+
assert(countriesOfResearcher,"character")
319+
assert(countriesOfCoverage,"character")
320+
assert(literatureType,"character")
321+
assert(relevance,"character")
322+
assert(topics,"character")
323+
assert(peerReview,"logical")
324+
assert(openAccess,"logical")
325+
assert(doi,"character")
326+
assert(journalSource,"character")
327+
assert(journalPublisher,"character")
328+
if(!is.null(flatten)) message("flatten argument is ignored for lit_export, results are returned flat.")
329+
if(!is.null(limit)) message("limit argument is ignored for lit_export, all results are returned.")
330+
if(!is.null(curlopts)) message("curlopts argument are ignored for lit_export")
331+
# https://api.gbif.org/v1/literature/export?format=TSV&gbifDownloadKey=0138953-210914110416597
332+
333+
args <- rgbif_compact(
334+
list(q = q,
335+
year = year,
336+
peerReview = peerReview,
337+
openAccess = openAccess
338+
))
339+
340+
args <- rgbif_compact(
341+
c(args,
342+
convmany(relevance),
343+
convmany(countriesOfResearcher),
344+
convmany(countriesOfCoverage),
345+
convmany(literatureType),
346+
convmany(topics),
347+
convmany_rename(datasetKey,"gbifDatasetKey"),
348+
convmany_rename(publishingOrg,"publishingOrganizationKey"),
349+
convmany_rename(downloadKey,"gbifDownloadKey"),
350+
convmany(doi),
351+
convmany_rename(journalSource,"source"),
352+
convmany_rename(journalPublisher,"publisher")
353+
))
354+
355+
url_query <- paste0(names(args),"=",args,collapse="&")
356+
url_query <- utils::URLencode(url_query)
357+
url <- paste0(gbif_base(),"/literature/export?",url_query)
358+
temp_file <- tempfile()
359+
utils::download.file(url,destfile=temp_file,quiet=TRUE)
360+
out <- tibble::as_tibble(data.table::fread(temp_file, showProgress=FALSE))
361+
if(!abstract) out$abstract <- NULL
362+
colnames(out) <- to_camel(colnames(out))
363+
out[] <- lapply(out, as.character)
364+
out$peerReview <- as.logical(out$peerReview)
365+
out$openAccess <- as.logical(out$openAccess)
366+
out[out == ""] <- NA
367+
out
368+
}
369+
370+
371+
process_lit_async_results <- function(ll,flatten=TRUE,abstract=FALSE) {
286372
data_list <- lapply(ll,function(x) x$results)
287373
# handle complex identifiers
288374
data_list <- lapply(data_list,function(x) tibble::tibble(x,x$identifiers))
289375
for(i in 1:length(data_list)) data_list[[i]]$identifiers <- NULL
290-
for(i in 1:length(data_list)) data_list[[i]]$abstract <- NULL
376+
if(!abstract) for(i in 1:length(data_list)) data_list[[i]]$abstract <- NULL
291377
data <- bind_rows(data_list)
292378
# data
293379
if(flatten) {

man/lit_search.Rd

Lines changed: 27 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/organizations.Rd

Lines changed: 2 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-lit_search.R

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,4 +139,39 @@ test_that("lit_count works as expected", {
139139

140140
})
141141

142+
test_that("lit_export works as expected", {
143+
skip_on_ci()
144+
skip_on_cran()
145+
146+
# export with no filters
147+
ee <- lit_export(datasetKey="50c9509d-22c7-4a22-a47d-8c48425ef4a7")
148+
expect_is(ee,"tbl_df")
149+
expect_true(nrow(ee) > 6000) # don't expect citations to go down
150+
expect_true(ncol(ee) > 15) # don't expect columns to go down
151+
expect_true(all(c("title","id","gbifDownloadKey") %in% colnames(ee)))
152+
153+
aa <- lit_export(year=2003,abstract=TRUE)
154+
expect_is(aa,"tbl_df")
155+
expect_true(nrow(aa) > 5) # don't expect citations to go down
156+
expect_true(ncol(aa) > 15) # don't expect columns to go down
157+
expect_true(all(c("title","id","gbifDownloadKey","abstract") %in% colnames(aa)))
158+
159+
yy <- lit_export(year="2011,2015")
160+
expect_is(yy,"tbl_df")
161+
expect_true(nrow(yy) > 3000) # don't expect citations to go down
162+
expect_true(nrow(yy) < 57000) # shouldn't return a really large number
163+
expect_true(ncol(yy) > 15) # don't expect columns to go down
164+
expect_true(all(c("title","id","gbifDownloadKey") %in% colnames(yy)))
165+
166+
# complex example using many arguements
167+
cc <- lit_export(year="2000,2020",countriesOfResearcher="US",
168+
topics="BIODIVERSITY_SCIENCE",
169+
relevance="GBIF_USED;GBIF_CITED")
170+
expect_is(cc,"tbl_df")
171+
expect_true(ncol(cc) > 15) # don't expect columns to go down
172+
expect_true(all(c("title","id","gbifDownloadKey") %in% colnames(cc)))
173+
expect_true(all(grepl("UNITED_STATES",cc$countriesOfResearcher)))
174+
175+
})
176+
142177

0 commit comments

Comments
 (0)