@@ -300,7 +300,8 @@ GDCquery_clinic <- function(
300
300
)
301
301
}
302
302
303
- if (" treatments" %in% colnames(df )){
303
+ if ( " treatments" %in% colnames(df )) {
304
+
304
305
treatments <- purrr :: map_dfr(
305
306
df $ treatments ,
306
307
.f = function (row ) {
@@ -312,40 +313,45 @@ GDCquery_clinic <- function(
312
313
})
313
314
314
315
df $ treatments <- NULL
315
- treatments $ submitter_id <- gsub(" _treatment(_[0-9])?|_treatment([0-9])?" ," " , treatments $ submitter_id )
316
- treatments <- treatments %> % dplyr :: select(
317
- ! c(" updated_datetime" , " state" , " created_datetime" )
318
- )
319
- treatments <- treatments [treatments $ submitter_id %in% df $ submitter_id ,]
320
316
317
+ # case where all treatments are NULL
318
+ if (! nrow(treatments ) == 0 ) {
321
319
322
- # we have now two types of treatment
323
- treatments.pharmaceutical <- treatments [grep(" Pharmaceutical" ,treatments $ treatment_type ,ignore.case = TRUE ),]
324
- treatments.radiation <- treatments [grep(" radiation" ,treatments $ treatment_type ,ignore.case = TRUE ),]
320
+ treatments $ submitter_id <- gsub(" _treatment(_[0-9])?|_treatment([0-9])?" ," " , treatments $ submitter_id )
321
+ treatments <- treatments %> % dplyr :: select(
322
+ ! c(" updated_datetime" , " state" , " created_datetime" )
323
+ )
324
+ treatments <- treatments [treatments $ submitter_id %in% df $ submitter_id ,]
325
325
326
- # Adding a prefix
327
- colnames(treatments.pharmaceutical ) <- paste0(" treatments_pharmaceutical_" ,colnames(treatments.pharmaceutical ))
328
- colnames(treatments.radiation ) <- paste0(" treatments_radiation_" ,colnames(treatments.radiation ))
329
- colnames(treatments.radiation )[grep(" submitter" ,colnames(treatments.radiation ))] <- " submitter_id"
330
- colnames(treatments.pharmaceutical )[grep(" submitter" ,colnames(treatments.pharmaceutical ))] <- " submitter_id"
331
326
332
- # If there are two rows for the same submitter_id
333
- # we will collapse them into one single row
334
- # concatenating all columns using ;
335
- aux <- treatments.pharmaceutical %> % dplyr :: group_by(submitter_id ) %> %
336
- dplyr :: summarise(
337
- across(everything(),~ paste(unique(. ), collapse = " ;" ))
338
- )
339
- df <- merge(df , as.data.table(aux ), by = " submitter_id" , all = TRUE , sort = FALSE )
340
-
341
- # If there are two rows for the same submitter_id
342
- # we will collapse them into one single row
343
- # concatenating all columns using ;
344
- aux <- treatments.radiation %> % dplyr :: group_by(submitter_id ) %> %
345
- dplyr :: summarise(
346
- across(everything(),~ paste(unique(. ), collapse = " ;" ))
347
- )
348
- df <- merge(df , as.data.table(aux ), by = " submitter_id" , all = TRUE , sort = FALSE )
327
+ # we have now two types of treatment
328
+ treatments.pharmaceutical <- treatments [grep(" Pharmaceutical" ,treatments $ treatment_type ,ignore.case = TRUE ),]
329
+ treatments.radiation <- treatments [grep(" radiation" ,treatments $ treatment_type ,ignore.case = TRUE ),]
330
+
331
+ # Adding a prefix
332
+ colnames(treatments.pharmaceutical ) <- paste0(" treatments_pharmaceutical_" ,colnames(treatments.pharmaceutical ))
333
+ colnames(treatments.radiation ) <- paste0(" treatments_radiation_" ,colnames(treatments.radiation ))
334
+ colnames(treatments.radiation )[grep(" submitter" ,colnames(treatments.radiation ))] <- " submitter_id"
335
+ colnames(treatments.pharmaceutical )[grep(" submitter" ,colnames(treatments.pharmaceutical ))] <- " submitter_id"
336
+
337
+ # If there are two rows for the same submitter_id
338
+ # we will collapse them into one single row
339
+ # concatenating all columns using ;
340
+ aux <- treatments.pharmaceutical %> % dplyr :: group_by(submitter_id ) %> %
341
+ dplyr :: summarise(
342
+ across(everything(),~ paste(unique(. ), collapse = " ;" ))
343
+ )
344
+ df <- merge(df , as.data.table(aux ), by = " submitter_id" , all = TRUE , sort = FALSE )
345
+
346
+ # If there are two rows for the same submitter_id
347
+ # we will collapse them into one single row
348
+ # concatenating all columns using ;
349
+ aux <- treatments.radiation %> % dplyr :: group_by(submitter_id ) %> %
350
+ dplyr :: summarise(
351
+ across(everything(),~ paste(unique(. ), collapse = " ;" ))
352
+ )
353
+ df <- merge(df , as.data.table(aux ), by = " submitter_id" , all = TRUE , sort = FALSE )
354
+ }
349
355
}
350
356
351
357
df $ bcr_patient_barcode <- df $ submitter_id
@@ -370,7 +376,7 @@ GDCquery_clinic <- function(
370
376
df <- cbind(df ,primary_site )
371
377
}
372
378
373
- if (" diagnoses" %in% colnames(results )){
379
+ if (" diagnoses" %in% colnames(results )) {
374
380
diagnoses <- rbindlist(
375
381
lapply(
376
382
results $ diagnoses ,
@@ -379,7 +385,7 @@ GDCquery_clinic <- function(
379
385
data.frame (NA )
380
386
} else {
381
387
# HTMCP-03-06-02061 has two diagnosis
382
- x $ submitter_id <- gsub(" _diagnosis.*" ," " ,x $ submitter_id )
388
+ x $ submitter_id <- gsub(" _diagnosis.*|-diagnosis.* " ," " ,x $ submitter_id )
383
389
# If there are two rows for the same submitter_id
384
390
# we will collapse them into one single row
385
391
# concatenating all columns using ;
0 commit comments