Skip to content

Commit 9a37a85

Browse files
committed
Add days to last follow up from the new GDC API model #639
1 parent 4075235 commit 9a37a85

File tree

2 files changed

+21
-3
lines changed

2 files changed

+21
-3
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: TCGAbiolinks
22
Type: Package
33
Title: TCGAbiolinks: An R/Bioconductor package for integrative analysis with GDC data
4-
Version: 2.35.3
4+
Version: 2.35.4
55
Date: 2024-01-01
66
Author: Antonio Colaprico,
77
Tiago Chedraoui Silva,

R/clinical.R

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ GDCquery_clinic <- function(
229229
options.pretty <- "pretty=true"
230230

231231
if (grepl("clinical",type,ignore.case = TRUE)) {
232-
options.expand <- "expand=diagnoses,diagnoses.treatments,annotations,family_histories,demographic,exposures"
232+
options.expand <- "expand=diagnoses,follow_ups,diagnoses.treatments,annotations,family_histories,demographic,exposures"
233233
option.size <- paste0("size=",getNbCases(project,"Clinical"))
234234
files.data_category <- "Clinical"
235235
} else {
@@ -266,7 +266,7 @@ GDCquery_clinic <- function(
266266

267267
#message(paste0(baseURL,paste(options.pretty,options.expand, option.size, options.filter, sep = "&")))
268268
results <- json$data$hits
269-
269+
saveRDS(results,"tcgabiolinks_debug.rda")
270270
if (grepl("clinical",type,ignore.case = TRUE)) {
271271
if (grepl("TCGA",project)) {
272272
df <- data.frame("submitter_id" = results$submitter_id)
@@ -276,6 +276,7 @@ GDCquery_clinic <- function(
276276

277277
# we are getting more results than what we should
278278
diagnoses <- diagnoses[diagnoses$submitter_id %in% df$submitter_id,]
279+
diagnoses$days_to_last_follow_up <- NULL
279280
df <- merge(df,diagnoses, by="submitter_id", all = TRUE, sort = FALSE)
280281
}
281282

@@ -300,6 +301,23 @@ GDCquery_clinic <- function(
300301
)
301302
}
302303

304+
if ("follow_ups" %in% colnames(results)){
305+
follow_ups <- rbindlist(lapply(results$follow_ups, function(x) if(is.null(x)) data.frame(NA) else x),fill = T)
306+
follow_ups$submitter_id <- gsub("_follow_up*","", follow_ups$submitter_id)
307+
308+
# we are getting more results than what we should
309+
follow_ups <- follow_ups[follow_ups$submitter_id %in% df$submitter_id,]
310+
311+
# Get the max value of days to follow up
312+
follow_ups_last <- follow_ups %>%
313+
dplyr::group_by(submitter_id) %>%
314+
dplyr::summarise(
315+
days_to_last_follow_up = ifelse(any(!is.na(days_to_follow_up)),max(days_to_follow_up,na.rm = TRUE),NA)
316+
)
317+
318+
df <- dplyr::full_join(df,follow_ups_last, by = "submitter_id")
319+
}
320+
303321
if( "treatments" %in% colnames(df)) {
304322

305323
treatments <- purrr::map_dfr(

0 commit comments

Comments
 (0)