Skip to content

Commit b431dc9

Browse files
committed
Fix for cases with no tratment information #640
1 parent 75cd233 commit b431dc9

File tree

1 file changed

+39
-33
lines changed

1 file changed

+39
-33
lines changed

R/clinical.R

Lines changed: 39 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,8 @@ GDCquery_clinic <- function(
300300
)
301301
}
302302

303-
if("treatments" %in% colnames(df)){
303+
if( "treatments" %in% colnames(df)) {
304+
304305
treatments <- purrr::map_dfr(
305306
df$treatments,
306307
.f = function(row) {
@@ -312,40 +313,45 @@ GDCquery_clinic <- function(
312313
})
313314

314315
df$treatments <- NULL
315-
treatments$submitter_id <- gsub("_treatment(_[0-9])?|_treatment([0-9])?","", treatments$submitter_id)
316-
treatments <- treatments %>% dplyr::select(
317-
!c("updated_datetime", "state", "created_datetime")
318-
)
319-
treatments <- treatments[treatments$submitter_id %in% df$submitter_id,]
320316

317+
# case where all treatments are NULL
318+
if (!nrow(treatments) == 0) {
321319

322-
# we have now two types of treatment
323-
treatments.pharmaceutical <- treatments[grep("Pharmaceutical",treatments$treatment_type,ignore.case = TRUE),]
324-
treatments.radiation <- treatments[grep("radiation",treatments$treatment_type,ignore.case = TRUE),]
320+
treatments$submitter_id <- gsub("_treatment(_[0-9])?|_treatment([0-9])?","", treatments$submitter_id)
321+
treatments <- treatments %>% dplyr::select(
322+
!c("updated_datetime", "state", "created_datetime")
323+
)
324+
treatments <- treatments[treatments$submitter_id %in% df$submitter_id,]
325325

326-
# Adding a prefix
327-
colnames(treatments.pharmaceutical) <- paste0("treatments_pharmaceutical_",colnames(treatments.pharmaceutical))
328-
colnames(treatments.radiation) <- paste0("treatments_radiation_",colnames(treatments.radiation))
329-
colnames(treatments.radiation)[grep("submitter",colnames(treatments.radiation))] <- "submitter_id"
330-
colnames(treatments.pharmaceutical)[grep("submitter",colnames(treatments.pharmaceutical))] <- "submitter_id"
331326

332-
# If there are two rows for the same submitter_id
333-
# we will collapse them into one single row
334-
# concatenating all columns using ;
335-
aux <- treatments.pharmaceutical %>% dplyr::group_by(submitter_id) %>%
336-
dplyr::summarise(
337-
across(everything(),~ paste(unique(.), collapse = ";"))
338-
)
339-
df <- merge(df, as.data.table(aux), by = "submitter_id", all = TRUE, sort = FALSE)
340-
341-
# If there are two rows for the same submitter_id
342-
# we will collapse them into one single row
343-
# concatenating all columns using ;
344-
aux <- treatments.radiation %>% dplyr::group_by(submitter_id) %>%
345-
dplyr::summarise(
346-
across(everything(),~ paste(unique(.), collapse = ";"))
347-
)
348-
df <- merge(df, as.data.table(aux), by = "submitter_id", all = TRUE, sort = FALSE)
327+
# we have now two types of treatment
328+
treatments.pharmaceutical <- treatments[grep("Pharmaceutical",treatments$treatment_type,ignore.case = TRUE),]
329+
treatments.radiation <- treatments[grep("radiation",treatments$treatment_type,ignore.case = TRUE),]
330+
331+
# Adding a prefix
332+
colnames(treatments.pharmaceutical) <- paste0("treatments_pharmaceutical_",colnames(treatments.pharmaceutical))
333+
colnames(treatments.radiation) <- paste0("treatments_radiation_",colnames(treatments.radiation))
334+
colnames(treatments.radiation)[grep("submitter",colnames(treatments.radiation))] <- "submitter_id"
335+
colnames(treatments.pharmaceutical)[grep("submitter",colnames(treatments.pharmaceutical))] <- "submitter_id"
336+
337+
# If there are two rows for the same submitter_id
338+
# we will collapse them into one single row
339+
# concatenating all columns using ;
340+
aux <- treatments.pharmaceutical %>% dplyr::group_by(submitter_id) %>%
341+
dplyr::summarise(
342+
across(everything(),~ paste(unique(.), collapse = ";"))
343+
)
344+
df <- merge(df, as.data.table(aux), by = "submitter_id", all = TRUE, sort = FALSE)
345+
346+
# If there are two rows for the same submitter_id
347+
# we will collapse them into one single row
348+
# concatenating all columns using ;
349+
aux <- treatments.radiation %>% dplyr::group_by(submitter_id) %>%
350+
dplyr::summarise(
351+
across(everything(),~ paste(unique(.), collapse = ";"))
352+
)
353+
df <- merge(df, as.data.table(aux), by = "submitter_id", all = TRUE, sort = FALSE)
354+
}
349355
}
350356

351357
df$bcr_patient_barcode <- df$submitter_id
@@ -370,7 +376,7 @@ GDCquery_clinic <- function(
370376
df <- cbind(df,primary_site)
371377
}
372378

373-
if("diagnoses" %in% colnames(results)){
379+
if ("diagnoses" %in% colnames(results)) {
374380
diagnoses <- rbindlist(
375381
lapply(
376382
results$diagnoses,
@@ -379,7 +385,7 @@ GDCquery_clinic <- function(
379385
data.frame(NA)
380386
} else {
381387
# HTMCP-03-06-02061 has two diagnosis
382-
x$submitter_id <- gsub("_diagnosis.*","",x$submitter_id)
388+
x$submitter_id <- gsub("_diagnosis.*|-diagnosis.*","",x$submitter_id)
383389
# If there are two rows for the same submitter_id
384390
# we will collapse them into one single row
385391
# concatenating all columns using ;

0 commit comments

Comments
 (0)