-
Notifications
You must be signed in to change notification settings - Fork 0
Baptiste Nusaibah validation #11
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
63521d9
15f314a
b3c2fb8
2af87f8
6a4b9e7
c39b6fd
7d743c5
e5d6c1a
99e08ff
dcb1467
e5447f8
d8c1d68
cc6e7fa
60b5318
e272d83
3d6074c
09a1057
632809c
eca9df5
4115b8a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,183 @@ | ||
# function that compares the summrised tables in R versus those in SAS | ||
# output: if difference, dataframe with each row details of difference | ||
# if no difference, dataframe displaying table R and table SAS side by side. | ||
# author: Nusaibah | ||
|
||
|
||
compare_grade <- function(tabR,tabSAS){ | ||
|
||
tab=list() # list containing all difference between the 2 tables | ||
tabRraw=tabR | ||
tabSASraw=tabSAS | ||
|
||
#warning("Different number of grade levels")------------ | ||
if (nrow(tabR)!=nrow(tabSAS)){ | ||
|
||
if (nrow(tabR)<nrow(tabSAS)){ | ||
tab$grade=c(tab$grade,tabSAS[which(!(tabSAS$grade%in% tabR$grade)),"grade"]%>%pull) | ||
tab$level=c(tab$level,rep("Mineur",length(tabSAS[which(!(tabSAS$grade%in% tabR$grade)),"grade"]%>%pull))) | ||
tab$table=c(tab$table,rep("R",length(tabSAS[which(!(tabSAS$grade%in% tabR$grade)),"grade"]%>%pull))) | ||
tab$main=c(tab$main,rep("Missing grade level",length(tabSAS[which(!(tabSAS$grade%in% tabR$grade)),"grade"]%>%pull))) | ||
tab$valueR=c(tab$valueR,rep(NA,length(tabSAS[which(!(tabSAS$grade%in% tabR$grade)),"grade"]%>%pull))) | ||
tab$valueSAS=c(tab$valueSAS,rep("Filled",length(tabSAS[which(!(tabSAS$grade%in% tabR$grade)),"grade"]%>%pull))) | ||
tab$arm=c(tab$arm,rep("All",length(tabSAS[which(!(tabSAS$grade%in% tabR$grade)),"grade"]%>%pull))) | ||
|
||
}else if (nrow(tabR)>nrow(tabSAS)){ | ||
tab$grade=c(tab$grade,tabR[which(!(tabR$grade%in% tabSAS$grade)),"grade"]%>%pull) | ||
tab$level=c(tab$level,rep("Mineur",length(tabR[which(!(tabR$grade%in% tabSAS$grade)),"grade"]%>%pull))) | ||
tab$table=c(tab$table,rep("SAS",length(tabR[which(!(tabR$grade%in% tabSAS$grade)),"grade"]%>%pull))) | ||
tab$main=c(tab$main,rep("Missing grade level",length(tabR[which(!(tabR$grade%in% tabSAS$grade)),"grade"]%>%pull))) | ||
tab$valueR=c(tab$valueR,rep("Filled",length(tabR[which(!(tabR$grade%in% tabSAS$grade)),"grade"]%>%pull))) | ||
tab$valueSAS=c(tab$valueSAS,rep(NA,length(tabR[which(!(tabR$grade%in% tabSAS$grade)),"grade"]%>%pull))) | ||
tab$arm=c(tab$arm,rep("All",length(tabR[which(!(tabR$grade%in% tabSAS$grade)),"grade"]%>%pull))) | ||
|
||
|
||
|
||
} | ||
} | ||
# warning("Different number of arm")---------- | ||
if (ncol(tabR)!=ncol(tabSAS)){ | ||
|
||
if (ncol(tabR)<ncol(tabSAS)){ | ||
tab$level=c(tab$level,"Mineur") | ||
tab$arm=c(tab$arm,paste(unique(str_extract(colnames(tabSAS)[which(!(colnames(tabSAS)%in% colnames(tabR)))],"[:digit:]")),collapse = " & ")) | ||
tab$table=c(tab$table,"R") | ||
tab$main=c(tab$main,"Missing arm") | ||
tab$valueR=c(tab$valueR,NA) | ||
tab$valueSAS=c(tab$valueSAS,"Filled") | ||
tab$grade=c(tab$grade,"All") | ||
|
||
|
||
}else if (ncol(tabR)>ncol(tabSAS)){ | ||
tab$level=c(tab$level,"Mineur") | ||
tab$arm=c(tab$arm,paste(unique(str_extract(colnames(tabR)[which(!(colnames(tabR)%in% colnames(tabSAS)))],"[:digit:]")),collapse = " & ")) | ||
tab$table=c(tab$table,"SAS") | ||
tab$main=c(tab$main,"Missing arm") | ||
tab$valueR=c(tab$valueR,"Filled") | ||
tab$valueSAS=c(tab$valueSAS,NA) | ||
tab$grade=c(tab$grade,"All") | ||
|
||
}} | ||
|
||
#compare the commun elements--------------- | ||
tabR=tabR[,colnames(tabR) %in% colnames(tabSAS)] | ||
tabSAS=tabSAS[,colnames(tabSAS) %in% colnames(tabR)] | ||
|
||
df=tabR%>%arrange(grade)%>%full_join(tabSAS,by="grade",suffix = c(".r",".sas")) | ||
|
||
|
||
indice=which(df[,paste0(tabR%>%select(-grade)%>%colnames(),paste=".r")]!=df[,paste0(tabSAS%>%select(-grade)%>%colnames(),paste=".sas")], | ||
arr.ind=TRUE) # difference numérique number or percentage | ||
indice[,"col"]=indice[,"col"]+1 # parce qu'on avait retiré le grade | ||
|
||
indice=rbind2(indice, | ||
which(is.na(df),arr.ind = T)%>%as.data.frame()) | ||
# difference NA quand 2 grades ne se correspondent pas dans les 2 tables. je garde NA | ||
|
||
if (nrow(indice)!=0){ | ||
for (i in 1: nrow(indice)){ | ||
|
||
if (is.na(df[indice[i,"row"],indice[i,"col"]]) & grepl(".r",colnames(df)[indice[i,"col"]])){ | ||
|
||
tab$level=c(tab$level,"Mineur") | ||
tab$grade=c(tab$grade,df[indice[i,"row"],"grade"]%>%pull) | ||
tab$table=c(tab$table,"R") | ||
tab$main=c(tab$main,"Missing grade level") | ||
tab$valueR=c(tab$valueR,NA) | ||
if (indice[i,"col"]%%2==0){ | ||
tab$valueSAS=c(tab$valueSAS, | ||
df[indice[i,"row"],indice[i,"col"]+2] %>%pull ) | ||
}else{ | ||
tab$valueSAS=c(tab$valueSAS, | ||
paste0(df[indice[i,"row"],indice[i,"col"]+2] %>%pull ,"%")) | ||
} | ||
tab$arm = c(tab$arm,str_extract(string=colnames(df)[indice[i,"col"]],pattern = "[:digit:]")) | ||
|
||
|
||
}else if (is.na(df[indice[i,"row"],indice[i,"col"]]) & grepl(".sas",colnames(df)[indice[i,"col"]])){ | ||
|
||
tab$level=c(tab$level,"Mineur") | ||
tab$grade=c(tab$grade,df[indice[i,"row"],"grade"]%>%pull) | ||
tab$table=c(tab$table,"SAS") | ||
tab$main=c(tab$main,"Missing grade level") | ||
if (indice[i,"col"]%%2==0){ #means number | ||
tab$valueR=c(tab$valueR, df[indice[i,"row"],indice[i,"col"]-2] %>% pull) | ||
}else{ #mean percentage | ||
tab$valueR=c(tab$valueR, paste0(df[indice[i,"row"],indice[i,"col"]-2] %>% pull,"%")) | ||
} | ||
tab$valueSAS=c(tab$valueSAS,NA) | ||
tab$arm = c(tab$arm,str_extract(string=colnames(df)[indice[i,"col"]],pattern = "[:digit:]")) | ||
|
||
}#else | ||
|
||
if(indice[i,"col"]%%2==0){ | ||
|
||
tab$level=c(tab$level,"MAJEUR") | ||
tab$grade=c(tab$grade,df[indice[i,"row"],"grade"]%>%pull) | ||
tab$table=c(tab$table,"Both") | ||
tab$main=c(tab$main,"Different number") | ||
tab$arm = c(tab$arm,str_extract(string=colnames(df)[indice[i,"col"]],pattern = "[:digit:]")) | ||
if (grepl(".sas",colnames(df)[indice[i,"col"]])){ | ||
tab$valueR=c(tab$valueR,df[indice[i,"row"],indice[i,"col"]-2]%>%pull) | ||
tab$valueSAS=c(tab$valueSAS,df[indice[i,"row"],indice[i,"col"]]%>%pull) | ||
}else{ | ||
tab$valueR=c(tab$valueR,df[indice[i,"row"],indice[i,"col"]]%>%pull) | ||
tab$valueSAS=c(tab$valueSAS,df[indice[i,"row"],indice[i,"col"]+2]%>%pull) | ||
} | ||
}else if (indice[i,"col"]%%2==1){ | ||
|
||
tab$level=c(tab$level,"MAJEUR") | ||
tab$grade=c(tab$grade,df[indice[i,"row"],"grade"]%>%pull) | ||
tab$table=c(tab$table,"Both") | ||
tab$main=c(tab$main,"Different percentage") | ||
tab$arm = c(tab$arm,str_extract(string=colnames(df)[indice[i,"col"]],pattern = "[:digit:]")) | ||
if (grepl(".sas",colnames(df)[indice[i,"col"]])){ | ||
tab$valueR=c(tab$valueR,df[indice[i,"row"],indice[i,"col"]-2]%>%pull) | ||
tab$valueSAS=c(tab$valueSAS,df[indice[i,"row"],indice[i,"col"]]%>%pull) | ||
}else{ | ||
tab$valueR=c(tab$valueR,df[indice[i,"row"],indice[i,"col"]]%>%pull) | ||
tab$valueSAS=c(tab$valueSAS,df[indice[i,"row"],indice[i,"col"]+2]%>%pull) | ||
} | ||
|
||
} | ||
|
||
|
||
}} | ||
|
||
|
||
|
||
if (nrow(tabRraw)==nrow(tabSASraw) & ncol(tabRraw)==ncol(tabSASraw) & nrow(indice)==0 ){ | ||
# warning("Comparison result: same outputs") | ||
|
||
tablo = tabRraw%>% | ||
full_join(tabSASraw, | ||
by="grade", | ||
suffix = c("_r","_sas"))%>% | ||
flextable()%>% | ||
add_footer_lines( "Comparison result: same outputs", top=TRUE) %>% | ||
autofit() %>% | ||
add_header_row( values = c("grade","R table", "SAS table"), colwidths = c(1,ncol(tabRraw)-1, ncol(tabSASraw)-1)) %>% | ||
align(part="header", align="center",i=1)%>% | ||
bold(part="header") %>% | ||
# bold(i=~N1_r!=N1_sas,j=c(2,4))%>% | ||
vline(j=ncol(tabRraw), part = "body") %>% | ||
|
||
set_header_labels(values= c(colnames(tabRraw),colnames(tabSASraw)[-1])) %>% | ||
merge_at(i=c(1,2),j=1, part = "header")%>% | ||
align(part="body", align="center",j=1) | ||
}else{ | ||
tablo=as.data.frame(tab)%>% | ||
#distinct(level,grade,table,main,.keep_all = TRUE) %>% | ||
flextable()%>% | ||
bold(part="header") %>% | ||
align(part="body", align="right",i=~str_detect(valueR,pattern="[:digit:]"),j="valueR")%>% | ||
align(part="body", align="right",i=~str_detect(valueSAS,pattern="[:digit:]"),j="valueSAS")%>% | ||
autofit() %>% | ||
bg(i=~level=="MAJEUR",bg="#f06d4d") | ||
} | ||
|
||
|
||
return(tablo) | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,194 @@ | ||
# function that compares the summurised tables in R versus those in SAS | ||
# output: if difference, dataframe with each row details of difference | ||
# if no difference, dataframe displaying table R and table SAS side by side. | ||
# author: Nusaibah | ||
|
||
compare_soc <- function(tabR,tabSAS){ | ||
|
||
# ae_table_soc() affiche toutes les colonnes grade 1à 5 NA et total(option) (pour chaque bras si le bras est indiqué) | ||
# supprimée les colonnes et lignes vides pour optimiser l'algorithme | ||
tabR = tabR[,!apply(is.na(tabR), 2, all)] | ||
tabR = tabR[!apply(is.na(tabR%>%select(-soc)), 1, all),] | ||
|
||
tabRraw=tabR | ||
tabSASraw=tabSAS | ||
tab=list() | ||
|
||
if ("term_"%in% c(colnames(tabR),colnames(tabSAS))){ | ||
tabR = tabR %>% arrange(soc,term_) | ||
}else{ | ||
tabR = tabR %>% arrange(soc) | ||
} | ||
|
||
## warning("Different number of SOC")---------------- | ||
if (nrow(tabR)!=nrow(tabSAS)){ | ||
|
||
socdiff=table(tabR$soc) %>% | ||
as.data.frame() %>% | ||
full_join(table(tabSAS$soc) %>% | ||
as.data.frame(), | ||
by="Var1",suffix = c(".r",".sas")) %>% | ||
filter(Freq.r !=Freq.sas | (is.na(Freq.r) |is.na(Freq.sas))) | ||
|
||
if (nrow(tabR)<nrow(tabSAS)){ | ||
|
||
tab$level=c(tab$level,"Mineur") | ||
tab$soc=c(tab$soc,paste(socdiff%>%filter(Freq.r<Freq.sas| is.na(Freq.r))%>%select(Var1)%>%pull,collapse = " & ")) | ||
tab$table=c(tab$table,"R") | ||
if(any(grepl("level",colnames(tabR)))){ | ||
tab$arm=c(tab$arm,NA)} | ||
if ("term_"%in% c(colnames(tabR),colnames(tabSAS))){ | ||
tab$term_=c(tab$term_,paste(tabSAS[which(!(tabSAS$term_%in% tabR$term_)),"term_"]%>%pull,collapse = " & ")) | ||
tab$main=c(tab$main,"Missing PT item") | ||
}else{ | ||
tab$term_=c(tab$term_,NA) | ||
tab$main=c(tab$main,"Missing SOC item") | ||
} | ||
tab$valueR=c(tab$valueR,NA) | ||
tab$valueSAS=c(tab$valueSAS,"Filled") | ||
|
||
}else if (nrow(tabR)>nrow(tabSAS)){ | ||
|
||
|
||
tab$level=c(tab$level,"Mineur") | ||
tab$soc=c(tab$soc,paste(socdiff%>%filter(Freq.r>Freq.sas | is.na(Freq.sas))%>%select(Var1)%>%pull,collapse = " & ")) | ||
tab$table=c(tab$table,"SAS") | ||
tab$grade=c(tab$grade,NA) | ||
if(any(grepl("bras",colnames(tabR)))){ | ||
tab$arm=c(tab$arm,NA)} | ||
if ("term_"%in% c(colnames(tabR),colnames(tabSAS))){ | ||
tab$term_=c(tab$term_,paste(tabR[which(!(tabR$term_%in% tabSAS$term_)),"term_"]%>%pull,collapse = " & ")) | ||
tab$main=c(tab$main,"Missing PT item") | ||
}else{ | ||
tab$term_=c(tab$term_,NA) | ||
tab$main=c(tab$main,"Missing SOC item") | ||
} | ||
tab$valueR=c(tab$valueR,"Filled") | ||
tab$valueSAS=c(tab$valueSAS,NA) | ||
} | ||
} | ||
## #warning("Different number of column (arm or grade)")-------------- | ||
if (ncol(tabR)!=ncol(tabSAS)){ | ||
if (ncol(tabR)<ncol(tabSAS)){ | ||
|
||
tab$level=c(tab$level,"Mineur") | ||
tab$grade=c(tab$grade,paste(colnames(tabSAS)[-(which(colnames(tabSAS) %in% colnames(tabR)))],collapse=" & ")) | ||
tab$table=c(tab$table,"R") | ||
tab$soc=c(tab$soc,NA) | ||
tab$main=c(tab$main,"Missing grade") | ||
tab$valueR=c(tab$valueR,NA) | ||
tab$valueSAS=c(tab$valueSAS,"Filled") | ||
tab$term_=c(tab$term_,NA) | ||
|
||
if(any(grepl("bras",colnames(tabR)))){ | ||
tab$arm=c(tab$arm,paste(unique(str_extract(colnames(tabSAS)[which(!(colnames(tabSAS)%in% colnames(tabR)))],"bras[:alnum:]+")),collapse = " & ")) | ||
} | ||
}else if (ncol(tabR)>ncol(tabSAS)){ | ||
|
||
tab$level=c(tab$level,"Mineur") | ||
tab$soc=c(tab$soc,NA) | ||
tab$grade=c(tab$grade,paste(colnames(tabR)[-(which(colnames(tabR) %in% colnames(tabSAS)))],collapse = " & ")) | ||
if(any(grepl("bras",colnames(tabR)))){ | ||
tab$arm=c(tab$arm,paste(unique(str_extract(colnames(tabR)[which(!(colnames(tabR)%in% colnames(tabSAS)))],"bras[:alnum:]+")),collapse = " & ")) | ||
} | ||
tab$table=c(tab$table,"SAS") | ||
tab$main=c(tab$main,"Missing grade") | ||
tab$valueR=c(tab$valueR,"Filled") | ||
tab$valueSAS=c(tab$valueSAS,NA) | ||
tab$term_=c(tab$term_,NA) | ||
|
||
} | ||
} | ||
## commun element validation ------------ | ||
if ("term_"%in% c(colnames(tabR),colnames(tabSAS))){ | ||
dfsas=tabSAS%>% | ||
pivot_longer(-c("soc","term_"),names_to = "grade",values_to = "count")%>% | ||
mutate(table="SAS") | ||
|
||
|
||
df=tabR%>%arrange(soc,term_)%>% | ||
pivot_longer(-c("soc","term_"),names_to = "grade",values_to = "count")%>% | ||
mutate(table="R")%>% | ||
full_join(dfsas, #instead of full_join | ||
by=c("soc","term_","grade"), | ||
suffix = c(".r",".sas")) | ||
|
||
|
||
if (any(grepl("bras",df$grade))){ | ||
|
||
df=df%>%separate(col="grade",into=c("arm","grade"),sep="_")} #instead of _ | ||
|
||
indice= df%>%filter(count.r!=count.sas | is.na(count.r)!=is.na(count.sas)) | ||
|
||
|
||
}else{ | ||
df=tabR%>%arrange(soc)%>% | ||
pivot_longer(-c("soc"),names_to = "grade",values_to = "count")%>% | ||
mutate(table="R")%>% | ||
full_join(tabSAS%>% #instead of full | ||
pivot_longer(-c("soc"),names_to = "grade",values_to = "count")%>% | ||
mutate(table="SAS"), | ||
by=c("soc","grade"), | ||
suffix = c(".r",".sas")) | ||
if (any(grepl("bras",df$grade))){ df=df%>%mutate("arm"=str_extract(grade,"bras[:alnum:]+"))} | ||
indice=df[which(df$count.r!=df$count.sas | (is.na(df$count.r) & !is.na(df$count.sas)) | | ||
(!is.na(df$count.r) & is.na(df$count.sas)) | ||
,arr.ind = T),] | ||
} | ||
|
||
if (nrow(indice)!=0){ | ||
|
||
tab =rbind.fill(as.data.frame(tab),indice%>% | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. La fonction |
||
mutate(level="MAJEUR", | ||
table="Both", | ||
main="Different value")%>% | ||
dplyr::rename(valueR="count.r", | ||
valueSAS="count.sas")%>% | ||
select(-c("table.r","table.sas"))) | ||
} | ||
|
||
|
||
if (nrow(tabRraw)==nrow(tabSASraw) & ncol(tabRraw)==ncol(tabSASraw) & nrow(indice)==0 ){ | ||
# warning("Comparison result: same outputs") | ||
|
||
if ("term_" %in% colnames(tabRraw)){ jointure=c("soc","term_") | ||
colsize=c(1,1,ncol(tabRraw)-2,ncol(tabSASraw)-2)}else{jointure="soc" | ||
colsize=c(1,ncol(tabRraw)-1,ncol(tabSASraw)-1) } | ||
tab = tabRraw%>% | ||
full_join(tabSASraw, | ||
by=jointure, | ||
suffix = c("_r","_sas"))%>% | ||
flextable()%>% | ||
add_footer_lines( "Comparison result: same outputs", top=TRUE) %>% | ||
autofit() %>% | ||
add_header_row( values = c(jointure,"R table", "SAS table"), colwidths = colsize) %>% | ||
align(part="header", align="center",i=1)%>% | ||
bold(part="header") %>% | ||
# bold(i=~N1_r!=N1_sas,j=c(2,4))%>% | ||
vline(j=ncol(tabRraw), part = "body") %>% | ||
|
||
set_header_labels(values= c(colnames(tabRraw),colnames(tabSASraw)[-c(1:length(jointure))])) %>% | ||
merge_at(i=c(1,2),j=1, part = "header")%>% | ||
align(part="body", align="left",j=1) | ||
|
||
if ("term_" %in% colnames(tabRraw)){ | ||
tab = tab %>% | ||
merge_at(i=c(1,2), j=2, part = "header")%>% | ||
align(part="body", align="left",j=2)} | ||
}else{ | ||
tab=as.data.frame(tab)%>% | ||
# distinct(level,grade,table,main,soc,.keep_all = TRUE)%>% | ||
flextable()%>% | ||
bold(part="header") %>% | ||
align(part="body", align="right",i=~str_detect(valueR,pattern="[:digit:]"),j="valueR")%>% | ||
align(part="body", align="right",i=~str_detect(valueSAS,pattern="[:digit:]"),j="valueSAS")%>% | ||
autofit() %>% | ||
bg(i=~level=="MAJEUR",bg="#f06d4d") | ||
} | ||
|
||
|
||
|
||
return(tab) | ||
|
||
|
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ce full join émet un warning :
Il est émit quand
term
est renseigné, et je crois qu'il fautdrait écrirepivot_longer(-any_of(c("soc", "term"), ...)
, ou quelque chose du genre (et dansby
aussi).J'ai l'impression que le problème d'output vient de là.