@@ -300,7 +300,8 @@ GDCquery_clinic <- function(
300300 )
301301 }
302302
303- if (" treatments" %in% colnames(df )){
303+ if ( " treatments" %in% colnames(df )) {
304+
304305 treatments <- purrr :: map_dfr(
305306 df $ treatments ,
306307 .f = function (row ) {
@@ -312,40 +313,45 @@ GDCquery_clinic <- function(
312313 })
313314
314315 df $ treatments <- NULL
315- treatments $ submitter_id <- gsub(" _treatment(_[0-9])?|_treatment([0-9])?" ," " , treatments $ submitter_id )
316- treatments <- treatments %> % dplyr :: select(
317- ! c(" updated_datetime" , " state" , " created_datetime" )
318- )
319- treatments <- treatments [treatments $ submitter_id %in% df $ submitter_id ,]
320316
317+ # case where all treatments are NULL
318+ if (! nrow(treatments ) == 0 ) {
321319
322- # we have now two types of treatment
323- treatments.pharmaceutical <- treatments [grep(" Pharmaceutical" ,treatments $ treatment_type ,ignore.case = TRUE ),]
324- treatments.radiation <- treatments [grep(" radiation" ,treatments $ treatment_type ,ignore.case = TRUE ),]
320+ treatments $ submitter_id <- gsub(" _treatment(_[0-9])?|_treatment([0-9])?" ," " , treatments $ submitter_id )
321+ treatments <- treatments %> % dplyr :: select(
322+ ! c(" updated_datetime" , " state" , " created_datetime" )
323+ )
324+ treatments <- treatments [treatments $ submitter_id %in% df $ submitter_id ,]
325325
326- # Adding a prefix
327- colnames(treatments.pharmaceutical ) <- paste0(" treatments_pharmaceutical_" ,colnames(treatments.pharmaceutical ))
328- colnames(treatments.radiation ) <- paste0(" treatments_radiation_" ,colnames(treatments.radiation ))
329- colnames(treatments.radiation )[grep(" submitter" ,colnames(treatments.radiation ))] <- " submitter_id"
330- colnames(treatments.pharmaceutical )[grep(" submitter" ,colnames(treatments.pharmaceutical ))] <- " submitter_id"
331326
332- # If there are two rows for the same submitter_id
333- # we will collapse them into one single row
334- # concatenating all columns using ;
335- aux <- treatments.pharmaceutical %> % dplyr :: group_by(submitter_id ) %> %
336- dplyr :: summarise(
337- across(everything(),~ paste(unique(. ), collapse = " ;" ))
338- )
339- df <- merge(df , as.data.table(aux ), by = " submitter_id" , all = TRUE , sort = FALSE )
340-
341- # If there are two rows for the same submitter_id
342- # we will collapse them into one single row
343- # concatenating all columns using ;
344- aux <- treatments.radiation %> % dplyr :: group_by(submitter_id ) %> %
345- dplyr :: summarise(
346- across(everything(),~ paste(unique(. ), collapse = " ;" ))
347- )
348- df <- merge(df , as.data.table(aux ), by = " submitter_id" , all = TRUE , sort = FALSE )
327+ # we have now two types of treatment
328+ treatments.pharmaceutical <- treatments [grep(" Pharmaceutical" ,treatments $ treatment_type ,ignore.case = TRUE ),]
329+ treatments.radiation <- treatments [grep(" radiation" ,treatments $ treatment_type ,ignore.case = TRUE ),]
330+
331+ # Adding a prefix
332+ colnames(treatments.pharmaceutical ) <- paste0(" treatments_pharmaceutical_" ,colnames(treatments.pharmaceutical ))
333+ colnames(treatments.radiation ) <- paste0(" treatments_radiation_" ,colnames(treatments.radiation ))
334+ colnames(treatments.radiation )[grep(" submitter" ,colnames(treatments.radiation ))] <- " submitter_id"
335+ colnames(treatments.pharmaceutical )[grep(" submitter" ,colnames(treatments.pharmaceutical ))] <- " submitter_id"
336+
337+ # If there are two rows for the same submitter_id
338+ # we will collapse them into one single row
339+ # concatenating all columns using ;
340+ aux <- treatments.pharmaceutical %> % dplyr :: group_by(submitter_id ) %> %
341+ dplyr :: summarise(
342+ across(everything(),~ paste(unique(. ), collapse = " ;" ))
343+ )
344+ df <- merge(df , as.data.table(aux ), by = " submitter_id" , all = TRUE , sort = FALSE )
345+
346+ # If there are two rows for the same submitter_id
347+ # we will collapse them into one single row
348+ # concatenating all columns using ;
349+ aux <- treatments.radiation %> % dplyr :: group_by(submitter_id ) %> %
350+ dplyr :: summarise(
351+ across(everything(),~ paste(unique(. ), collapse = " ;" ))
352+ )
353+ df <- merge(df , as.data.table(aux ), by = " submitter_id" , all = TRUE , sort = FALSE )
354+ }
349355 }
350356
351357 df $ bcr_patient_barcode <- df $ submitter_id
@@ -370,7 +376,7 @@ GDCquery_clinic <- function(
370376 df <- cbind(df ,primary_site )
371377 }
372378
373- if (" diagnoses" %in% colnames(results )){
379+ if (" diagnoses" %in% colnames(results )) {
374380 diagnoses <- rbindlist(
375381 lapply(
376382 results $ diagnoses ,
@@ -379,7 +385,7 @@ GDCquery_clinic <- function(
379385 data.frame (NA )
380386 } else {
381387 # HTMCP-03-06-02061 has two diagnosis
382- x $ submitter_id <- gsub(" _diagnosis.*" ," " ,x $ submitter_id )
388+ x $ submitter_id <- gsub(" _diagnosis.*|-diagnosis.* " ," " ,x $ submitter_id )
383389 # If there are two rows for the same submitter_id
384390 # we will collapse them into one single row
385391 # concatenating all columns using ;
0 commit comments