Skip to content

Commit 4a4f478

Browse files
committed
resolve conflict
2 parents 286479c + 0a55c0f commit 4a4f478

File tree

3 files changed

+50
-0
lines changed

3 files changed

+50
-0
lines changed

.DS_Store

-6 KB
Binary file not shown.

exercises/ThildeDataScience4R.R

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
diab <- read_xlsx('../data/diabetes_clinical_toy_messy.xlsx')
2+
diabMeta <- read_csv('../data/diabetes_meta_toy_messy.csv')
3+
glu <- read_xlsx('../data/df_glucose.xlsx')
4+
5+
6+
diab_clean <- diab %>%
7+
filter(!if_any(everything(), is.na) & Age != 0 & BloodPressure !=0 & BMI!=0) %>%
8+
mutate(Diabetes = as.integer(Diabetes), Age = as.integer(Age), Sex = ifelse(Sex == "male", "Male",
9+
ifelse(Sex == "FEMALE", "Female", as.character(Sex))))
10+
11+
12+
diab_clean_meta <- left_join(diab_clean, diabMeta)
13+
14+
diab_glu <- left_join(diab_clean_meta, glu)
15+
16+
diabnum <- diab_glu %>%
17+
select(-ID) %>%
18+
select_if(is.numeric)
19+
20+
21+
22+
pca_res <- prcomp(diabnum, scale. = TRUE)
23+
autoplot(pca_res, data = diab_clean_meta, colour='Diabetes')
24+
25+
26+
pclist <- list()
27+
28+
for (i in 1:length(pca_res$sdev)) {
29+
varpc <- pca_res$sdev[i]^2/sum(pca_res$sdev^2)*100
30+
npc <- paste0('PC', i)
31+
32+
pclist[[i]] <- tibble(PC= npc, Variance = varpc)
33+
34+
}
35+
36+
PCdf <- map_df(pclist, tibble::as_tibble)
37+
38+
39+
40+
41+
42+
# Test for outliers and normality:
43+
44+
# Cooks distance
45+
model1 <- lm(Diabetes ~ ., data = diab_glu)
46+
plot(model1, 4)
47+
48+
# Zscore
49+
z_scores <- scale(diab_glu$PhysicalActivity)
50+
which(abs(z_scores) > 3.29)

slides/R4datascience_slides.key

4.95 MB
Binary file not shown.

0 commit comments

Comments
 (0)