@@ -32,102 +32,102 @@ compare_means <- function(dataset, var1, var2,
32
32
test = " t" ,
33
33
data_filter = " " ) {
34
34
35
- vars <- c(var1 , var2 )
36
- dat <- getdata(dataset , vars , filt = data_filter )
35
+ vars <- c(var1 , var2 )
36
+ dat <- getdata(dataset , vars , filt = data_filter )
37
37
if (! is_string(dataset )) dataset <- deparse(substitute(dataset )) %> % set_attr(" df" , TRUE )
38
38
39
- # # in case : was used for var2
40
- vars <- colnames(dat )
39
+ # # in case : was used for var2
40
+ vars <- colnames(dat )
41
41
42
42
if (is.numeric(dat [[var1 ]])) {
43
- dat %<> % gather_(" variable" , " values" , vars )
44
- dat [[" variable" ]] %<> % factor (levels = vars )
45
- cname <- " "
46
- } else {
43
+ dat %<> % gather_(" variable" , " values" , vars )
44
+ dat [[" variable" ]] %<> % factor (levels = vars )
45
+ cname <- " "
46
+ } else {
47
47
if (is.character(dat [[var1 ]])) dat [[var1 ]] <- as.factor(dat [[var1 ]])
48
- colnames(dat ) <- c(" variable" ," values" )
49
- cname <- var1
48
+ colnames(dat ) <- c(" variable" ," values" )
49
+ cname <- var1
50
50
}
51
51
52
52
# # needed with new tidyr
53
53
dat $ variable %<> % as.factor
54
54
55
- # # check there is variation in the data
55
+ # # check there is variation in the data
56
56
if (any(summarise_all(dat , funs(does_vary )) == FALSE ))
57
- return (" Test could not be calculated (no variation). Please select another variable." %> %
58
- add_class(" compare_means" ))
57
+ return (" Test could not be calculated (no variation). Please select another variable." %> %
58
+ add_class(" compare_means" ))
59
59
60
- # # resetting option to independent if the number of observations is unequal
60
+ # # resetting option to independent if the number of observations is unequal
61
61
# # summary on factor gives counts
62
62
if (samples == " paired" ) {
63
63
if (summary(dat [[" variable" ]]) %> % {max(. ) != min(. )})
64
64
samples <- " independent (obs. per level unequal)"
65
65
}
66
66
67
- levs <- levels(dat [[" variable" ]])
67
+ levs <- levels(dat [[" variable" ]])
68
68
69
69
cmb <- combn(levs , 2 ) %> % t %> % as.data.frame
70
70
rownames(cmb ) <- cmb %> % apply(1 , paste , collapse = " :" )
71
71
colnames(cmb ) <- c(" group1" ," group2" )
72
72
73
- if (! is_empty(comb )) {
74
- if (all(comb %in% rownames(cmb ))) {
75
- cmb <- cmb [comb , ]
76
- } else {
77
- cmb <- cmb [1 ,]
78
- }
79
- }
73
+ if (! is_empty(comb )) {
74
+ if (all(comb %in% rownames(cmb ))) {
75
+ cmb <- cmb [comb , ]
76
+ } else {
77
+ cmb <- cmb [1 ,]
78
+ }
79
+ }
80
80
81
81
res <- cmb
82
82
res [ ,c(" t.value" ," p.value" , " df" , " ci_low" , " ci_high" , " cis_low" , " cis_high" )] <- 0
83
83
84
84
for (i in 1 : nrow(cmb )) {
85
- sel <- cmb [i ,]
85
+ sel <- cmb [i ,]
86
86
87
- x <- filter_(dat , paste0(" variable == '" , sel [[1 ]], " '" )) %> % . [[" values" ]]
88
- y <- filter_(dat , paste0(" variable == '" , sel [[2 ]], " '" )) %> % . [[" values" ]]
87
+ x <- filter_(dat , paste0(" variable == '" , sel [[1 ]], " '" )) %> % . [[" values" ]]
88
+ y <- filter_(dat , paste0(" variable == '" , sel [[2 ]], " '" )) %> % . [[" values" ]]
89
89
90
- res [i ,c(" t.value" ," p.value" , " df" , " ci_low" , " ci_high" )] <-
91
- t.test(x , y , paired = samples == " paired" , alternative = alternative , conf.level = conf_lev ) %> %
92
- tidy %> % . [1 , c(" statistic" , " p.value" ," parameter" , " conf.low" , " conf.high" )]
90
+ res [i ,c(" t.value" ," p.value" , " df" , " ci_low" , " ci_high" )] <-
91
+ t.test(x , y , paired = samples == " paired" , alternative = alternative , conf.level = conf_lev ) %> %
92
+ tidy %> % . [1 , c(" statistic" , " p.value" ," parameter" , " conf.low" , " conf.high" )]
93
93
94
- if (test != " t" ) {
95
- res [i ," p.value" ] <-
96
- wilcox.test(x , y , paired = samples == " paired" , alternative = alternative ,
97
- conf.int = FALSE , conf.level = conf_lev ) %> %
98
- tidy %> % . [1 ," p.value" ]
99
- }
94
+ if (test != " t" ) {
95
+ res [i ," p.value" ] <-
96
+ wilcox.test(x , y , paired = samples == " paired" , alternative = alternative ,
97
+ conf.int = FALSE , conf.level = conf_lev ) %> %
98
+ tidy %> % . [1 ," p.value" ]
99
+ }
100
100
101
- # # bootstrap confidence intervals
102
- # # seem almost identical, even with highly skewed data
103
- # nr_x <- length(x)
104
- # nr_y <- length(y)
101
+ # # bootstrap confidence intervals
102
+ # # seem almost identical, even with highly skewed data
103
+ # nr_x <- length(x)
104
+ # nr_y <- length(y)
105
105
106
- # sim_ci <-
107
- # replicate(1000, mean(sample(x, nr_x, replace = TRUE)) -
108
- # mean(sample(y, nr_y, replace = TRUE))) %>%
109
- # quantile(probs = {(1-conf_lev)/2} %>% c(., 1 - .))
106
+ # sim_ci <-
107
+ # replicate(1000, mean(sample(x, nr_x, replace = TRUE)) -
108
+ # mean(sample(y, nr_y, replace = TRUE))) %>%
109
+ # quantile(probs = {(1-conf_lev)/2} %>% c(., 1 - .))
110
110
111
- # res[i, c("cis_low", "cis_high")] <- sim_ci
111
+ # res[i, c("cis_low", "cis_high")] <- sim_ci
112
112
113
113
}
114
114
rm(x ,y ,sel )
115
115
116
- if (adjust != " none" )
117
- res $ p.value %<> % p.adjust(method = adjust )
116
+ if (adjust != " none" )
117
+ res $ p.value %<> % p.adjust(method = adjust )
118
118
119
- # # from http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/
120
- ci_calc <- function (se , n , conf.lev = .95 )
121
- se * qt(conf.lev / 2 + .5 , n - 1 )
119
+ # # from http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/
120
+ ci_calc <- function (se , n , conf.lev = .95 )
121
+ se * qt(conf.lev / 2 + .5 , n - 1 )
122
122
123
- dat_summary <-
124
- dat %> %
125
- group_by_(" variable" ) %> %
123
+ dat_summary <-
124
+ dat %> %
125
+ group_by_(" variable" ) %> %
126
126
summarise_all(funs(mean = mean , n = length(. ), sd , se = sd / sqrt(n ),
127
- ci = ci_calc(se , n , conf_lev ))) %> %
127
+ ci = ci_calc(se , n , conf_lev ))) %> %
128
128
rename_(.dots = setNames(" variable" , cname ))
129
129
130
- vars <- paste0(vars , collapse = " , " )
130
+ vars <- paste0(vars , collapse = " , " )
131
131
as.list(environment()) %> % add_class(" compare_means" )
132
132
}
133
133
@@ -154,20 +154,20 @@ compare_means <- function(dataset, var1, var2,
154
154
# ' @export
155
155
summary.compare_means <- function (object , show = FALSE , dec = 3 , ... ) {
156
156
157
- if (is.character(object )) return (object )
157
+ if (is.character(object )) return (object )
158
158
159
159
cat(paste0(" Pairwise mean comparisons (" , object $ test , " -test)\n " ))
160
- cat(" Data :" , object $ dataset , " \n " )
161
- if (object $ data_filter %> % gsub(" \\ s" ," " ,. ) != " " )
162
- cat(" Filter :" , gsub(" \\ n" ," " , object $ data_filter ), " \n " )
163
- cat(" Variables :" , object $ vars , " \n " )
164
- cat(" Samples :" , object $ samples , " \n " )
165
- cat(" Confidence:" , object $ conf_lev , " \n " )
166
- cat(" Adjustment:" , if (object $ adjust == " bonf" ) " Bonferroni" else " None" , " \n\n " )
160
+ cat(" Data :" , object $ dataset , " \n " )
161
+ if (object $ data_filter %> % gsub(" \\ s" ," " ,. ) != " " )
162
+ cat(" Filter :" , gsub(" \\ n" ," " , object $ data_filter ), " \n " )
163
+ cat(" Variables :" , object $ vars , " \n " )
164
+ cat(" Samples :" , object $ samples , " \n " )
165
+ cat(" Confidence:" , object $ conf_lev , " \n " )
166
+ cat(" Adjustment:" , if (object $ adjust == " bonf" ) " Bonferroni" else " None" , " \n\n " )
167
167
168
168
object $ dat_summary [,- 1 ] %<> % round(dec )
169
169
print(object $ dat_summary %> % as.data.frame , row.names = FALSE )
170
- cat(" \n " )
170
+ cat(" \n " )
171
171
172
172
hyp_symbol <- c(" two.sided" = " not equal to" ,
173
173
" less" = " <" ,
@@ -176,31 +176,31 @@ summary.compare_means <- function(object, show = FALSE, dec = 3, ...) {
176
176
means <- object $ dat_summary $ mean
177
177
names(means ) <- object $ dat_summary [[1 ]] %> % as.character
178
178
179
- # # determine lower and upper % for ci
180
- ci_perc <- ci_label(object $ alternative , object $ conf_lev )
181
-
182
- mod <- object $ res
183
- mod $ `Alt. hyp.` <- paste(mod $ group1 ,hyp_symbol ,mod $ group2 ," " )
184
- mod $ `Null hyp.` <- paste(mod $ group1 ," =" ,mod $ group2 , " " )
185
- mod $ diff <- { means [mod $ group1 %> % as.character ] - means [mod $ group2 %> % as.character ] } %> % round(dec )
186
-
187
- if (show ) {
188
- # mod <- mod[,c("Null hyp.", "Alt. hyp.", "diff", "t.value", "df", "ci_low", "ci_high", "p.value")]
189
- mod $ se <- (mod $ diff / mod $ t.value ) %> % round(dec )
190
- mod <- mod [,c(" Null hyp." , " Alt. hyp." , " diff" , " p.value" , " se" , " t.value" , " df" , " ci_low" , " ci_high" )]
191
- # mod <- mod[,c("Alt. hyp.", "Null hyp.", "diff", "t.value", "df", "ci_low", "ci_high", "cis_low", "cis_high", "p.value")]
192
- if (! is.integer(mod [[" df" ]])) mod [[" df" ]] %<> % round(dec )
193
- mod [,c(" t.value" , " ci_low" ," ci_high" )] %<> % round(dec )
194
- mod <- rename_(mod , .dots = setNames(c(" ci_low" ," ci_high" ), ci_perc ))
195
- } else {
196
- mod <- mod [,c(" Null hyp." , " Alt. hyp." , " diff" , " p.value" )]
197
- }
198
-
199
- mod $ ` ` <- sig_stars(mod $ p.value )
200
- mod $ p.value <- round(mod $ p.value , dec )
201
- mod $ p.value [ mod $ p.value < .001 ] <- " < .001"
202
- print(mod , row.names = FALSE , right = FALSE )
203
- cat(" \n Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n " )
179
+ # # determine lower and upper % for ci
180
+ ci_perc <- ci_label(object $ alternative , object $ conf_lev )
181
+
182
+ mod <- object $ res
183
+ mod $ `Alt. hyp.` <- paste(mod $ group1 ,hyp_symbol ,mod $ group2 ," " )
184
+ mod $ `Null hyp.` <- paste(mod $ group1 ," =" ,mod $ group2 , " " )
185
+ mod $ diff <- { means [mod $ group1 %> % as.character ] - means [mod $ group2 %> % as.character ] } %> % round(dec )
186
+
187
+ if (show ) {
188
+ # mod <- mod[,c("Null hyp.", "Alt. hyp.", "diff", "t.value", "df", "ci_low", "ci_high", "p.value")]
189
+ mod $ se <- (mod $ diff / mod $ t.value ) %> % round(dec )
190
+ mod <- mod [,c(" Null hyp." , " Alt. hyp." , " diff" , " p.value" , " se" , " t.value" , " df" , " ci_low" , " ci_high" )]
191
+ # mod <- mod[,c("Alt. hyp.", "Null hyp.", "diff", "t.value", "df", "ci_low", "ci_high", "cis_low", "cis_high", "p.value")]
192
+ if (! is.integer(mod [[" df" ]])) mod [[" df" ]] %<> % round(dec )
193
+ mod [,c(" t.value" , " ci_low" ," ci_high" )] %<> % round(dec )
194
+ mod <- rename_(mod , .dots = setNames(c(" ci_low" ," ci_high" ), ci_perc ))
195
+ } else {
196
+ mod <- mod [,c(" Null hyp." , " Alt. hyp." , " diff" , " p.value" )]
197
+ }
198
+
199
+ mod $ ` ` <- sig_stars(mod $ p.value )
200
+ mod $ p.value <- round(mod $ p.value , dec )
201
+ mod $ p.value [ mod $ p.value < .001 ] <- " < .001"
202
+ print(mod , row.names = FALSE , right = FALSE )
203
+ cat(" \n Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n " )
204
204
}
205
205
206
206
# ' Plot method for the compare_means function
@@ -223,59 +223,59 @@ summary.compare_means <- function(object, show = FALSE, dec = 3, ...) {
223
223
# ' @export
224
224
plot.compare_means <- function (x , plots = " scatter" , shiny = FALSE , custom = FALSE , ... ) {
225
225
226
- if (is.character(x )) return (x )
227
- object <- x ; rm(x )
228
-
229
- dat <- object $ dat
230
- v1 <- colnames(dat )[1 ]
231
- v2 <- colnames(dat )[- 1 ]
232
-
233
- # # cname is equal to " " when the xvar is numeric
234
- if (object $ cname == " " ) {
235
- var1 <- v1
236
- var2 <- v2
237
- } else {
238
- var1 <- object $ var1
239
- var2 <- object $ var2
240
- }
241
-
242
- # # from http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/
243
- plot_list <- list ()
244
- if (" bar" %in% plots ) {
245
- colnames(object $ dat_summary )[1 ] <- " variable"
246
- # # use of `which` allows the user to change the order of the plots shown
247
- plot_list [[which(" bar" == plots )]] <-
248
- ggplot(object $ dat_summary ,
249
- aes_string(x = " variable" , y = " mean" , fill = " variable" )) +
250
- geom_bar(stat = " identity" ) +
251
- geom_errorbar(width = .1 , aes(ymin = mean - ci , ymax = mean + ci )) +
252
- geom_errorbar(width = .05 , aes(ymin = mean - se , ymax = mean + se ), colour = " blue" ) +
253
- theme(legend.position = " none" ) +
254
- xlab(var1 ) + ylab(paste0(var2 , " (mean)" ))
255
- }
256
-
257
- # # graphs on full data
258
- if (" box" %in% plots ) {
259
- plot_list [[which(" box" == plots )]] <-
260
- visualize(dat , xvar = v1 , yvar = v2 , type = " box" , custom = TRUE ) +
261
- theme(legend.position = " none" ) + xlab(var1 ) + ylab(var2 )
262
- }
263
-
264
- if (" density" %in% plots ) {
265
- plot_list [[which(" density" == plots )]] <-
266
- visualize(dat , xvar = v2 , type = " density" , fill = v1 , custom = TRUE ) +
267
- xlab(var2 ) + guides(fill = guide_legend(title = var1 ))
268
- }
269
-
270
- if (" scatter" %in% plots ) {
271
- plot_list [[which(" scatter" == plots )]] <-
272
- visualize(dat , xvar = v1 , yvar = v2 , type = " scatter" , check = " jitter" , alpha = .3 , custom = TRUE ) +
273
- xlab(var1 ) + ylab(paste0(var2 , " (mean)" ))
226
+ if (is.character(x )) return (x )
227
+ object <- x ; rm(x )
228
+
229
+ dat <- object $ dat
230
+ v1 <- colnames(dat )[1 ]
231
+ v2 <- colnames(dat )[- 1 ]
232
+
233
+ # # cname is equal to " " when the xvar is numeric
234
+ if (object $ cname == " " ) {
235
+ var1 <- v1
236
+ var2 <- v2
237
+ } else {
238
+ var1 <- object $ var1
239
+ var2 <- object $ var2
240
+ }
241
+
242
+ # # from http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/
243
+ plot_list <- list ()
244
+ if (" bar" %in% plots ) {
245
+ colnames(object $ dat_summary )[1 ] <- " variable"
246
+ # # use of `which` allows the user to change the order of the plots shown
247
+ plot_list [[which(" bar" == plots )]] <-
248
+ ggplot(object $ dat_summary ,
249
+ aes_string(x = " variable" , y = " mean" , fill = " variable" )) +
250
+ geom_bar(stat = " identity" ) +
251
+ geom_errorbar(width = .1 , aes(ymin = mean - ci , ymax = mean + ci )) +
252
+ geom_errorbar(width = .05 , aes(ymin = mean - se , ymax = mean + se ), colour = " blue" ) +
253
+ theme(legend.position = " none" ) +
254
+ xlab(var1 ) + ylab(paste0(var2 , " (mean)" ))
255
+ }
256
+
257
+ # # graphs on full data
258
+ if (" box" %in% plots ) {
259
+ plot_list [[which(" box" == plots )]] <-
260
+ visualize(dat , xvar = v1 , yvar = v2 , type = " box" , custom = TRUE ) +
261
+ theme(legend.position = " none" ) + xlab(var1 ) + ylab(var2 )
262
+ }
263
+
264
+ if (" density" %in% plots ) {
265
+ plot_list [[which(" density" == plots )]] <-
266
+ visualize(dat , xvar = v2 , type = " density" , fill = v1 , custom = TRUE ) +
267
+ xlab(var2 ) + guides(fill = guide_legend(title = var1 ))
268
+ }
269
+
270
+ if (" scatter" %in% plots ) {
271
+ plot_list [[which(" scatter" == plots )]] <-
272
+ visualize(dat , xvar = v1 , yvar = v2 , type = " scatter" , check = " jitter" , alpha = .3 , custom = TRUE ) +
273
+ xlab(var1 ) + ylab(paste0(var2 , " (mean)" ))
274
274
}
275
275
276
276
if (custom )
277
277
if (length(plot_list ) == 1 ) return (plot_list [[1 ]]) else return (plot_list )
278
278
279
- sshhr(gridExtra :: grid.arrange(grobs = plot_list , ncol = 1 )) %> %
280
- { if (shiny ) . else print(. ) }
279
+ sshhr(gridExtra :: grid.arrange(grobs = plot_list , ncol = 1 )) %> %
280
+ { if (shiny ) . else print(. ) }
281
281
}
0 commit comments