Skip to content

Commit 6abea7b

Browse files
committed
cleanup
1 parent 1440bf9 commit 6abea7b

22 files changed

+3110
-2746
lines changed

DESCRIPTION

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ Package: radiant.basics
22
Type: Package
33
Title: Basics Menu for Radiant: Business Analytics using R and Shiny
44
Version: 0.8.3
5-
Date: 2017-6-29
5+
Date: 2017-7-5
66
Authors@R: person("Vincent", "Nijs", , "radiant@rady.ucsd.edu", c("aut", "cre"))
77
Description: The Radiant Basics menu includes interfaces for probability calculation, central limit theorem simulation, comparing means and proportions, goodness-of-fit testing, cross-tabs, and correlation. The application extends the functionality in radiant.data.
88
Depends:
@@ -13,9 +13,9 @@ Imports:
1313
gridExtra (>= 2.0.0),
1414
scales (>= 0.4.0),
1515
dplyr (>= 0.5),
16-
tidyr (>= 0.4.1),
16+
tidyr (>= 0.6),
1717
magrittr (>= 1.5),
18-
shiny (>= 1.0.0),
18+
shiny (>= 1.0.3),
1919
psych (>= 1.6.6),
2020
import (>= 1.1.0),
2121
methods

NEWS.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@
44
* Renamed methods `summary.correlation_` and `plot.correlation_` to `summary.correlation` and `plot.correlation`
55
* Added `tab` argument to `goodness` and `cross_tabs` so a table object can be passed directly
66
* Documentation updates
7-
* Code clean-up and various minor fixes and improvements
87
* Scatter plots in _Correlation > Plot_ are now based on 1,000 data points by default. Use _R > Report_ to adjust (e.g., `plot(result, n = -1)`)
8+
* Fix for level ordering in goodness-of-fit expected-values plot
9+
* Code clean-up and various minor fixes and improvements
910

1011
# CHANGES IN radiant.basics 0.8.0
1112

R/compare_means.R

Lines changed: 137 additions & 137 deletions
Original file line numberDiff line numberDiff line change
@@ -32,102 +32,102 @@ compare_means <- function(dataset, var1, var2,
3232
test = "t",
3333
data_filter = "") {
3434

35-
vars <- c(var1, var2)
36-
dat <- getdata(dataset, vars, filt = data_filter)
35+
vars <- c(var1, var2)
36+
dat <- getdata(dataset, vars, filt = data_filter)
3737
if (!is_string(dataset)) dataset <- deparse(substitute(dataset)) %>% set_attr("df", TRUE)
3838

39-
## in case : was used for var2
40-
vars <- colnames(dat)
39+
## in case : was used for var2
40+
vars <- colnames(dat)
4141

4242
if (is.numeric(dat[[var1]])) {
43-
dat %<>% gather_("variable", "values", vars)
44-
dat[["variable"]] %<>% factor(levels = vars)
45-
cname <- " "
46-
} else {
43+
dat %<>% gather_("variable", "values", vars)
44+
dat[["variable"]] %<>% factor(levels = vars)
45+
cname <- " "
46+
} else {
4747
if (is.character(dat[[var1]])) dat[[var1]] <- as.factor(dat[[var1]])
48-
colnames(dat) <- c("variable","values")
49-
cname <- var1
48+
colnames(dat) <- c("variable","values")
49+
cname <- var1
5050
}
5151

5252
## needed with new tidyr
5353
dat$variable %<>% as.factor
5454

55-
## check there is variation in the data
55+
## check there is variation in the data
5656
if (any(summarise_all(dat, funs(does_vary)) == FALSE))
57-
return("Test could not be calculated (no variation). Please select another variable." %>%
58-
add_class("compare_means"))
57+
return("Test could not be calculated (no variation). Please select another variable." %>%
58+
add_class("compare_means"))
5959

60-
## resetting option to independent if the number of observations is unequal
60+
## resetting option to independent if the number of observations is unequal
6161
## summary on factor gives counts
6262
if (samples == "paired") {
6363
if (summary(dat[["variable"]]) %>% {max(.) != min(.)})
6464
samples <- "independent (obs. per level unequal)"
6565
}
6666

67-
levs <- levels(dat[["variable"]])
67+
levs <- levels(dat[["variable"]])
6868

6969
cmb <- combn(levs, 2) %>% t %>% as.data.frame
7070
rownames(cmb) <- cmb %>% apply(1, paste, collapse = ":")
7171
colnames(cmb) <- c("group1","group2")
7272

73-
if (!is_empty(comb)) {
74-
if (all(comb %in% rownames(cmb))) {
75-
cmb <- cmb[comb, ]
76-
} else {
77-
cmb <- cmb[1,]
78-
}
79-
}
73+
if (!is_empty(comb)) {
74+
if (all(comb %in% rownames(cmb))) {
75+
cmb <- cmb[comb, ]
76+
} else {
77+
cmb <- cmb[1,]
78+
}
79+
}
8080

8181
res <- cmb
8282
res[ ,c("t.value","p.value", "df", "ci_low", "ci_high", "cis_low", "cis_high")] <- 0
8383

8484
for (i in 1:nrow(cmb)) {
85-
sel <- cmb[i,]
85+
sel <- cmb[i,]
8686

87-
x <- filter_(dat, paste0("variable == '", sel[[1]], "'")) %>% .[["values"]]
88-
y <- filter_(dat, paste0("variable == '", sel[[2]], "'")) %>% .[["values"]]
87+
x <- filter_(dat, paste0("variable == '", sel[[1]], "'")) %>% .[["values"]]
88+
y <- filter_(dat, paste0("variable == '", sel[[2]], "'")) %>% .[["values"]]
8989

90-
res[i,c("t.value","p.value", "df", "ci_low", "ci_high")] <-
91-
t.test(x, y, paired = samples == "paired", alternative = alternative, conf.level = conf_lev) %>%
92-
tidy %>% .[1, c("statistic", "p.value","parameter", "conf.low", "conf.high")]
90+
res[i,c("t.value","p.value", "df", "ci_low", "ci_high")] <-
91+
t.test(x, y, paired = samples == "paired", alternative = alternative, conf.level = conf_lev) %>%
92+
tidy %>% .[1, c("statistic", "p.value","parameter", "conf.low", "conf.high")]
9393

94-
if (test != "t") {
95-
res[i,"p.value"] <-
96-
wilcox.test(x, y, paired = samples == "paired", alternative = alternative,
97-
conf.int = FALSE, conf.level = conf_lev) %>%
98-
tidy %>% .[1,"p.value"]
99-
}
94+
if (test != "t") {
95+
res[i,"p.value"] <-
96+
wilcox.test(x, y, paired = samples == "paired", alternative = alternative,
97+
conf.int = FALSE, conf.level = conf_lev) %>%
98+
tidy %>% .[1,"p.value"]
99+
}
100100

101-
## bootstrap confidence intervals
102-
## seem almost identical, even with highly skewed data
103-
# nr_x <- length(x)
104-
# nr_y <- length(y)
101+
## bootstrap confidence intervals
102+
## seem almost identical, even with highly skewed data
103+
# nr_x <- length(x)
104+
# nr_y <- length(y)
105105

106-
# sim_ci <-
107-
# replicate(1000, mean(sample(x, nr_x, replace = TRUE)) -
108-
# mean(sample(y, nr_y, replace = TRUE))) %>%
109-
# quantile(probs = {(1-conf_lev)/2} %>% c(., 1 - .))
106+
# sim_ci <-
107+
# replicate(1000, mean(sample(x, nr_x, replace = TRUE)) -
108+
# mean(sample(y, nr_y, replace = TRUE))) %>%
109+
# quantile(probs = {(1-conf_lev)/2} %>% c(., 1 - .))
110110

111-
# res[i, c("cis_low", "cis_high")] <- sim_ci
111+
# res[i, c("cis_low", "cis_high")] <- sim_ci
112112

113113
}
114114
rm(x,y,sel)
115115

116-
if (adjust != "none")
117-
res$p.value %<>% p.adjust(method = adjust)
116+
if (adjust != "none")
117+
res$p.value %<>% p.adjust(method = adjust)
118118

119-
## from http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/
120-
ci_calc <- function(se, n, conf.lev = .95)
121-
se * qt(conf.lev/2 + .5, n - 1)
119+
## from http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/
120+
ci_calc <- function(se, n, conf.lev = .95)
121+
se * qt(conf.lev/2 + .5, n - 1)
122122

123-
dat_summary <-
124-
dat %>%
125-
group_by_("variable") %>%
123+
dat_summary <-
124+
dat %>%
125+
group_by_("variable") %>%
126126
summarise_all(funs(mean = mean, n = length(.), sd, se = sd/sqrt(n),
127-
ci = ci_calc(se, n, conf_lev))) %>%
127+
ci = ci_calc(se, n, conf_lev))) %>%
128128
rename_(.dots = setNames("variable", cname))
129129

130-
vars <- paste0(vars, collapse = ", ")
130+
vars <- paste0(vars, collapse = ", ")
131131
as.list(environment()) %>% add_class("compare_means")
132132
}
133133

@@ -154,20 +154,20 @@ compare_means <- function(dataset, var1, var2,
154154
#' @export
155155
summary.compare_means <- function(object, show = FALSE, dec = 3, ...) {
156156

157-
if (is.character(object)) return(object)
157+
if (is.character(object)) return(object)
158158

159159
cat(paste0("Pairwise mean comparisons (", object$test, "-test)\n"))
160-
cat("Data :", object$dataset, "\n")
161-
if (object$data_filter %>% gsub("\\s","",.) != "")
162-
cat("Filter :", gsub("\\n","", object$data_filter), "\n")
163-
cat("Variables :", object$vars, "\n")
164-
cat("Samples :", object$samples, "\n")
165-
cat("Confidence:", object$conf_lev, "\n")
166-
cat("Adjustment:", if (object$adjust == "bonf") "Bonferroni" else "None", "\n\n")
160+
cat("Data :", object$dataset, "\n")
161+
if (object$data_filter %>% gsub("\\s","",.) != "")
162+
cat("Filter :", gsub("\\n","", object$data_filter), "\n")
163+
cat("Variables :", object$vars, "\n")
164+
cat("Samples :", object$samples, "\n")
165+
cat("Confidence:", object$conf_lev, "\n")
166+
cat("Adjustment:", if (object$adjust == "bonf") "Bonferroni" else "None", "\n\n")
167167

168168
object$dat_summary[,-1] %<>% round(dec)
169169
print(object$dat_summary %>% as.data.frame, row.names = FALSE)
170-
cat("\n")
170+
cat("\n")
171171

172172
hyp_symbol <- c("two.sided" = "not equal to",
173173
"less" = "<",
@@ -176,31 +176,31 @@ summary.compare_means <- function(object, show = FALSE, dec = 3, ...) {
176176
means <- object$dat_summary$mean
177177
names(means) <- object$dat_summary[[1]] %>% as.character
178178

179-
## determine lower and upper % for ci
180-
ci_perc <- ci_label(object$alternative, object$conf_lev)
181-
182-
mod <- object$res
183-
mod$`Alt. hyp.` <- paste(mod$group1,hyp_symbol,mod$group2," ")
184-
mod$`Null hyp.` <- paste(mod$group1,"=",mod$group2, " ")
185-
mod$diff <- { means[mod$group1 %>% as.character] - means[mod$group2 %>% as.character] } %>% round(dec)
186-
187-
if (show) {
188-
# mod <- mod[,c("Null hyp.", "Alt. hyp.", "diff", "t.value", "df", "ci_low", "ci_high", "p.value")]
189-
mod$se <- (mod$diff / mod$t.value) %>% round(dec)
190-
mod <- mod[,c("Null hyp.", "Alt. hyp.", "diff", "p.value", "se", "t.value", "df", "ci_low", "ci_high")]
191-
# mod <- mod[,c("Alt. hyp.", "Null hyp.", "diff", "t.value", "df", "ci_low", "ci_high", "cis_low", "cis_high", "p.value")]
192-
if (!is.integer(mod[["df"]])) mod[["df"]] %<>% round(dec)
193-
mod[,c("t.value", "ci_low","ci_high")] %<>% round(dec)
194-
mod <- rename_(mod, .dots = setNames(c("ci_low","ci_high"), ci_perc))
195-
} else {
196-
mod <- mod[,c("Null hyp.", "Alt. hyp.", "diff", "p.value")]
197-
}
198-
199-
mod$` ` <- sig_stars(mod$p.value)
200-
mod$p.value <- round(mod$p.value, dec)
201-
mod$p.value[ mod$p.value < .001 ] <- "< .001"
202-
print(mod, row.names = FALSE, right = FALSE)
203-
cat("\nSignif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n")
179+
## determine lower and upper % for ci
180+
ci_perc <- ci_label(object$alternative, object$conf_lev)
181+
182+
mod <- object$res
183+
mod$`Alt. hyp.` <- paste(mod$group1,hyp_symbol,mod$group2," ")
184+
mod$`Null hyp.` <- paste(mod$group1,"=",mod$group2, " ")
185+
mod$diff <- { means[mod$group1 %>% as.character] - means[mod$group2 %>% as.character] } %>% round(dec)
186+
187+
if (show) {
188+
# mod <- mod[,c("Null hyp.", "Alt. hyp.", "diff", "t.value", "df", "ci_low", "ci_high", "p.value")]
189+
mod$se <- (mod$diff / mod$t.value) %>% round(dec)
190+
mod <- mod[,c("Null hyp.", "Alt. hyp.", "diff", "p.value", "se", "t.value", "df", "ci_low", "ci_high")]
191+
# mod <- mod[,c("Alt. hyp.", "Null hyp.", "diff", "t.value", "df", "ci_low", "ci_high", "cis_low", "cis_high", "p.value")]
192+
if (!is.integer(mod[["df"]])) mod[["df"]] %<>% round(dec)
193+
mod[,c("t.value", "ci_low","ci_high")] %<>% round(dec)
194+
mod <- rename_(mod, .dots = setNames(c("ci_low","ci_high"), ci_perc))
195+
} else {
196+
mod <- mod[,c("Null hyp.", "Alt. hyp.", "diff", "p.value")]
197+
}
198+
199+
mod$` ` <- sig_stars(mod$p.value)
200+
mod$p.value <- round(mod$p.value, dec)
201+
mod$p.value[ mod$p.value < .001 ] <- "< .001"
202+
print(mod, row.names = FALSE, right = FALSE)
203+
cat("\nSignif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n")
204204
}
205205

206206
#' Plot method for the compare_means function
@@ -223,59 +223,59 @@ summary.compare_means <- function(object, show = FALSE, dec = 3, ...) {
223223
#' @export
224224
plot.compare_means <- function(x, plots = "scatter", shiny = FALSE, custom = FALSE, ...) {
225225

226-
if (is.character(x)) return(x)
227-
object <- x; rm(x)
228-
229-
dat <- object$dat
230-
v1 <- colnames(dat)[1]
231-
v2 <- colnames(dat)[-1]
232-
233-
## cname is equal to " " when the xvar is numeric
234-
if (object$cname == " ") {
235-
var1 <- v1
236-
var2 <- v2
237-
} else {
238-
var1 <- object$var1
239-
var2 <- object$var2
240-
}
241-
242-
## from http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/
243-
plot_list <- list()
244-
if ("bar" %in% plots) {
245-
colnames(object$dat_summary)[1] <- "variable"
246-
## use of `which` allows the user to change the order of the plots shown
247-
plot_list[[which("bar" == plots)]] <-
248-
ggplot(object$dat_summary,
249-
aes_string(x = "variable", y = "mean", fill = "variable")) +
250-
geom_bar(stat = "identity") +
251-
geom_errorbar(width = .1, aes(ymin = mean - ci, ymax = mean + ci)) +
252-
geom_errorbar(width = .05, aes(ymin = mean - se, ymax = mean + se), colour = "blue") +
253-
theme(legend.position = "none") +
254-
xlab(var1) + ylab(paste0(var2, " (mean)"))
255-
}
256-
257-
## graphs on full data
258-
if ("box" %in% plots) {
259-
plot_list[[which("box" == plots)]] <-
260-
visualize(dat, xvar = v1, yvar = v2, type = "box", custom = TRUE) +
261-
theme(legend.position = "none") + xlab(var1) + ylab(var2)
262-
}
263-
264-
if ("density" %in% plots) {
265-
plot_list[[which("density" == plots)]] <-
266-
visualize(dat, xvar = v2, type = "density", fill = v1, custom = TRUE) +
267-
xlab(var2) + guides(fill = guide_legend(title = var1))
268-
}
269-
270-
if ("scatter" %in% plots) {
271-
plot_list[[which("scatter" == plots)]] <-
272-
visualize(dat, xvar = v1, yvar = v2, type = "scatter", check = "jitter", alpha = .3, custom = TRUE) +
273-
xlab(var1) + ylab(paste0(var2, " (mean)"))
226+
if (is.character(x)) return(x)
227+
object <- x; rm(x)
228+
229+
dat <- object$dat
230+
v1 <- colnames(dat)[1]
231+
v2 <- colnames(dat)[-1]
232+
233+
## cname is equal to " " when the xvar is numeric
234+
if (object$cname == " ") {
235+
var1 <- v1
236+
var2 <- v2
237+
} else {
238+
var1 <- object$var1
239+
var2 <- object$var2
240+
}
241+
242+
## from http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/
243+
plot_list <- list()
244+
if ("bar" %in% plots) {
245+
colnames(object$dat_summary)[1] <- "variable"
246+
## use of `which` allows the user to change the order of the plots shown
247+
plot_list[[which("bar" == plots)]] <-
248+
ggplot(object$dat_summary,
249+
aes_string(x = "variable", y = "mean", fill = "variable")) +
250+
geom_bar(stat = "identity") +
251+
geom_errorbar(width = .1, aes(ymin = mean - ci, ymax = mean + ci)) +
252+
geom_errorbar(width = .05, aes(ymin = mean - se, ymax = mean + se), colour = "blue") +
253+
theme(legend.position = "none") +
254+
xlab(var1) + ylab(paste0(var2, " (mean)"))
255+
}
256+
257+
## graphs on full data
258+
if ("box" %in% plots) {
259+
plot_list[[which("box" == plots)]] <-
260+
visualize(dat, xvar = v1, yvar = v2, type = "box", custom = TRUE) +
261+
theme(legend.position = "none") + xlab(var1) + ylab(var2)
262+
}
263+
264+
if ("density" %in% plots) {
265+
plot_list[[which("density" == plots)]] <-
266+
visualize(dat, xvar = v2, type = "density", fill = v1, custom = TRUE) +
267+
xlab(var2) + guides(fill = guide_legend(title = var1))
268+
}
269+
270+
if ("scatter" %in% plots) {
271+
plot_list[[which("scatter" == plots)]] <-
272+
visualize(dat, xvar = v1, yvar = v2, type = "scatter", check = "jitter", alpha = .3, custom = TRUE) +
273+
xlab(var1) + ylab(paste0(var2, " (mean)"))
274274
}
275275

276276
if (custom)
277277
if (length(plot_list) == 1) return(plot_list[[1]]) else return(plot_list)
278278

279-
sshhr(gridExtra::grid.arrange(grobs = plot_list, ncol = 1)) %>%
280-
{ if (shiny) . else print(.) }
279+
sshhr(gridExtra::grid.arrange(grobs = plot_list, ncol = 1)) %>%
280+
{ if (shiny) . else print(.) }
281281
}

0 commit comments

Comments
 (0)