Skip to content

Commit 56a304b

Browse files
committed
some improvements for #639
1 parent 9a37a85 commit 56a304b

File tree

2 files changed

+38
-2
lines changed

2 files changed

+38
-2
lines changed

R/clinical.R

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,7 @@ GDCquery_clinic <- function(
303303

304304
if ("follow_ups" %in% colnames(results)){
305305
follow_ups <- rbindlist(lapply(results$follow_ups, function(x) if(is.null(x)) data.frame(NA) else x),fill = T)
306-
follow_ups$submitter_id <- gsub("_follow_up*","", follow_ups$submitter_id)
306+
follow_ups$submitter_id <- gsub("_follow_up.*","", follow_ups$submitter_id)
307307

308308
# we are getting more results than what we should
309309
follow_ups <- follow_ups[follow_ups$submitter_id %in% df$submitter_id,]
@@ -315,7 +315,42 @@ GDCquery_clinic <- function(
315315
days_to_last_follow_up = ifelse(any(!is.na(days_to_follow_up)),max(days_to_follow_up,na.rm = TRUE),NA)
316316
)
317317

318-
df <- dplyr::full_join(df,follow_ups_last, by = "submitter_id")
318+
follow_ups_last <- follow_ups %>%
319+
dplyr::select(
320+
c(
321+
submitter_id,
322+
days_to_follow_up,
323+
#days_to_recurrence,
324+
#progression_or_recurrence_type,
325+
disease_response
326+
)
327+
) %>%
328+
dplyr::filter(!is.na(submitter_id), !is.na(days_to_follow_up)) %>%
329+
dplyr::group_by(submitter_id) %>%
330+
dplyr::filter(dplyr::row_number() == which.max(days_to_follow_up)) %>%
331+
dplyr::ungroup() %>%
332+
dplyr::rename_at(disease_response,.funs = function(x) paste0("follow_ups_",x))
333+
334+
df <- dplyr::full_join(df, follow_ups_last, by = "submitter_id")
335+
336+
# Keep all follow ups
337+
# follow_ups <- follow_ups %>%
338+
# dplyr::select(
339+
# c(
340+
# submitter_id,
341+
# days_to_follow_up,
342+
# days_to_recurrence,
343+
# progression_or_recurrence_type,
344+
# disease_response
345+
# )
346+
# ) %>%
347+
# dplyr::group_by(submitter_id) %>%
348+
# dplyr::summarise(
349+
# across(everything(),~ paste(., collapse = ";"))
350+
# ) %>% dplyr::rename_at(-1,.funs = function(x) paste0("follow_ups_",x))
351+
#
352+
# df <- dplyr::full_join(df, follow_ups, by = "submitter_id")
353+
319354
}
320355

321356
if( "treatments" %in% colnames(df)) {

TCGAbiolinks.Rproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
Version: 1.0
2+
ProjectId: 08f2ee77-2304-4374-a044-3036962139b4
23

34
RestoreWorkspace: Default
45
SaveWorkspace: Default

0 commit comments

Comments
 (0)