Skip to content

Commit 4340c27

Browse files
committed
Next version
- 0.2.0.99 - All local builds pass - Rhub - Finalize for CRAN
1 parent 6475cca commit 4340c27

32 files changed

+771
-239
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ Type: Package
22
Package: tidycells
33
Title: Read Tabular Data from Diverse Sources and Easily Make
44
Them Tidy
5-
Version: 0.2.0.9000
5+
Version: 0.2.0.99
66
Authors@R:
77
person(given = "Indranil",
88
family = "Gayen",

NEWS.md

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,25 @@
11

22
# *News*
33

4+
# tidycells 0.2.1
5+
6+
## New features
7+
8+
* Enhancement in the heuristic-based algorithm
9+
10+
## Other changes
11+
12+
* Now if `read_cells` fails in the intermediate stage, it will give the output of last successful stage
13+
414
# tidycells 0.2.0 _(2019-08-20)_
515

616
## First CRAN Release
717

8-
* Next release will adopt to [tidyr 1.0.0](https://github.com/tidyverse/tidyr/issues/710)
9-
* Next release will fix CRAN build issue in Fedora [#1](https://github.com/r-rudra/tidycells/issues/1) ([![See DevNotes](https://img.shields.io/badge/See-DevNotes-blue.svg)](https://github.com/r-rudra/tidycells/blob/master/dev-notes.md#regarding-cran-results))
18+
* Initial CRAN Release
1019

1120
# tidycells 0.1.9 _(2019-08-12)_
1221

13-
## New Features
22+
## New Features`
1423

1524
* Added `collate_columns` to collate attribute-columns having similar content.
1625

@@ -19,5 +28,5 @@
1928
## Initial Public Release
2029

2130
* Initial Release to GitHub
22-
* Prior to this it was private package
31+
* Prior to this it was a private package
2332

R/ai_attach_direction.R

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,19 @@
11

22

33
ai_attach_direction <- function(d_att_dat_map_raw) {
4-
d_att_dat_map_raw %>%
4+
5+
# asp: attr split
6+
d_att_dat_map_raw_asp <- d_att_dat_map_raw %>%
57
# kept for tracking
68
mutate(direction_basic = direction) %>%
79
mutate(attr_gid_split = ifelse(direction_group == "NS" & attr_group == "major", row_a,
810
ifelse(direction_group == "WE" & attr_group == "major", col_a, 0)
9-
)) %>%
11+
))
12+
13+
d_att_dat_map_raw_asp <- d_att_dat_map_raw_asp %>%
14+
mutate(attr_gid_split = ifelse(direction_group == "corner", col_a, attr_gid_split))
15+
16+
d_att_dat_map_raw_asp %>%
1017
group_by(data_gid, attr_gid, direction, attr_gid_split) %>%
1118
group_split() %>%
1219
map_df(~ .x %>% mutate(direction = get_direction(.x)))

R/ai_data_gid_join.R

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
ai_data_gid_join <- function(d_dat, data_attr_map, full_data) {
22
repeat({
3+
if (length(unique(d_dat$group_id_map$gid)) < 2) break()
4+
35
data_gid_comb <- d_dat$group_id_map$gid %>%
46
unique() %>%
57
utils::combn(2) %>%

R/ai_get_data_attr_map_main.R

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
2+
ai_get_data_attr_map_main <- function(d_dat, d_att, crude_join = TRUE) {
3+
4+
# start with simple attr data map
5+
admap0 <- ai_get_data_attr_map(
6+
dat_boundary = d_dat$group_id_boundary,
7+
att_gid_map = d_att$group_id_map
8+
)
9+
10+
11+
if (crude_join) {
12+
# crude joins
13+
# absolutely sure joins
14+
crude_djoins <- ai_crude_data_block_joins(basic_admap = admap0$map, d_dat = d_dat)
15+
if (crude_djoins$done) {
16+
d_dat <- crude_djoins$d_dat
17+
admap0 <- ai_get_data_attr_map(
18+
dat_boundary = d_dat$group_id_boundary,
19+
att_gid_map = d_att$group_id_map
20+
)
21+
}
22+
}
23+
24+
25+
# split attr gid relative to data_gid
26+
rel_chk <- ai_relative_data_split_attr(basic_map = admap0, d_att = d_att, d_dat = d_dat)
27+
if (rel_chk$done) {
28+
d_att <- rel_chk$d_att %>% map(unique)
29+
admap0 <- rel_chk$admap
30+
}
31+
32+
33+
admap1_major_minor <- admap0$all_map %>%
34+
rename(attr_gid = gid, dist = md) %>%
35+
filter(direction_group != "corner") %>%
36+
ai_get_data_attr_map_details(d_dat, d_att)
37+
38+
admap1_major_minor_compact <- admap1_major_minor$map %>%
39+
filter(attr_group == "major") %>%
40+
rename(md = dist) %>%
41+
group_by(data_gid, direction_group) %>%
42+
mutate(m_dist = min(md)) %>%
43+
ungroup() %>%
44+
filter(md == m_dist) %>%
45+
select(-md) %>%
46+
rename(dist = m_dist)
47+
48+
admap0_pass0 <- admap1_major_minor_compact %>%
49+
filter(direction_group != "corner") %>%
50+
unique() %>%
51+
select(-attr_group)
52+
53+
# dimension analysis done here (major minor classification)
54+
admap1 <- admap0_pass0 %>%
55+
ai_get_data_attr_map_details(d_dat, d_att)
56+
57+
list(admap = admap1, d_dat = d_dat, d_att = d_att)
58+
}

R/ai_relative_data_join_attr.R

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22

33
ai_relative_data_join_attr <- function(admap_main, d_att) {
44
chk <- admap_main$raw_map %>%
5-
distinct(attr_gid, data_gid, direction) %>%
6-
group_by(data_gid, direction) %>%
5+
distinct(attr_gid, data_gid, direction, attr_group) %>%
6+
group_by(data_gid, direction, attr_group) %>%
77
mutate(n_att = n_distinct(attr_gid)) %>%
88
ungroup() %>%
99
filter(n_att > 1)
@@ -16,13 +16,14 @@ ai_relative_data_join_attr <- function(admap_main, d_att) {
1616

1717
rel_gids <- chk %>%
1818
select(-n_att) %>%
19-
inner_join(admap_main$raw_map, by = c("attr_gid", "data_gid", "direction"))
19+
inner_join(admap_main$raw_map, by = c("attr_gid", "data_gid", "direction", "attr_group"))
2020

21-
d_att_dat_map_raw_rest <- admap_main$raw_map %>% anti_join(chk, by = c("attr_gid", "data_gid", "direction"))
21+
d_att_dat_map_raw_rest <- admap_main$raw_map %>%
22+
anti_join(chk, by = c("attr_gid", "data_gid", "direction", "attr_group"))
2223

2324
rel_gids_att <- rel_gids %>%
24-
distinct(attr_gid, data_gid, direction, row = row_a, col = col_a) %>%
25-
group_by(data_gid, direction) %>%
25+
distinct(attr_gid, data_gid, direction, attr_group, row = row_a, col = col_a) %>%
26+
group_by(data_gid, direction, attr_group) %>%
2627
mutate(new_attr_gid = paste(min(attr_gid), data_gid, direction, sep = "_")) %>%
2728
ungroup()
2829

@@ -31,6 +32,7 @@ ai_relative_data_join_attr <- function(admap_main, d_att) {
3132
rel_gids <- rel_gids %>%
3233
group_by(new_attr_gid, data_gid) %>%
3334
mutate(
35+
# this is possibly not required anymore as attr_group is in grouping vars
3436
new_attr_group = ifelse(any(attr_group == "major"), "major", "minor"),
3537
new_dist = min(dist)
3638
) %>%

R/analyze_cells.R

Lines changed: 60 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -90,37 +90,17 @@ analyze_cells_raw <- function(d, silent = TRUE) {
9090
abort("No `attribute` cells found")
9191
}
9292

93-
d_dat <- get_group_id(data_cells)
94-
d_att <- get_group_id(attr_cells)
93+
d_dat_orig <- get_group_id(data_cells)
94+
d_att_orig <- get_group_id(attr_cells)
9595

96-
# start with simple attr data map
97-
admap0 <- ai_get_data_attr_map(
98-
dat_boundary = d_dat$group_id_boundary,
99-
att_gid_map = d_att$group_id_map
100-
)
101-
102-
# crude joins
103-
# absolutely sure joins
104-
crude_djoins <- ai_crude_data_block_joins(basic_admap = admap0$map, d_dat = d_dat)
105-
if (crude_djoins$done) {
106-
d_dat <- crude_djoins$d_dat
107-
admap0 <- ai_get_data_attr_map(
108-
dat_boundary = d_dat$group_id_boundary,
109-
att_gid_map = d_att$group_id_map
110-
)
111-
}
96+
d_dat <- d_dat_orig
97+
d_att <- d_att_orig
11298

113-
# split attr gid relative to data_gid
114-
rel_chk <- ai_relative_data_split_attr(basic_map = admap0, d_att = d_att, d_dat = d_dat)
115-
if (rel_chk$done) {
116-
d_att <- rel_chk$d_att
117-
admap0 <- rel_chk$admap
118-
}
99+
setp1 <- ai_get_data_attr_map_main(d_dat, d_att)
119100

120-
# dimension analysis done here (major minor classification)
121-
admap1 <- admap0$map %>%
122-
filter(direction_group != "corner") %>%
123-
ai_get_data_attr_map_details(d_dat, d_att)
101+
d_dat <- setp1$d_dat
102+
d_att <- setp1$d_att
103+
admap1 <- setp1$admap
124104

125105
# data_gid join (if possible)
126106
if (nrow(d_dat$group_id_boundary) > 1) {
@@ -132,22 +112,21 @@ analyze_cells_raw <- function(d, silent = TRUE) {
132112
# this means results has been invalidated
133113
d_dat <- d_dat0
134114

135-
admap0 <- ai_get_data_attr_map(
136-
dat_boundary = d_dat$group_id_boundary,
137-
att_gid_map = d_att$group_id_map
138-
)
115+
# revert back to original form
116+
d_att <- d_att_orig
139117

140-
# all (NS and WE) attr are attached
141-
admap1 <- admap0$map %>%
142-
filter(direction_group != "corner") %>%
143-
ai_get_data_attr_map_details(d_dat, d_att)
118+
setp2 <- ai_get_data_attr_map_main(d_dat, d_att, crude_join = FALSE)
119+
120+
d_dat <- setp2$d_dat
121+
d_att <- setp2$d_att
122+
admap1 <- setp2$admap
144123
}
145124
}
146125

147126
# join attr based on block merges possible
148127
rel_chk <- ai_relative_data_join_attr(admap_main = admap1, d_att = d_att)
149128
if (rel_chk$done) {
150-
d_att <- rel_chk$d_att
129+
d_att <- rel_chk$d_att %>% map(unique)
151130
admap1 <- rel_chk$admap
152131
}
153132

@@ -193,10 +172,52 @@ analyze_cells_raw <- function(d, silent = TRUE) {
193172
}
194173

195174

196-
d_dat$group_id_whole_boundary <- extend_data_block(d_dat$group_id_extended_boundary, admap_fc1$map, d_att)
175+
# last stage of analysis
176+
# this is not required
177+
# d_dat$group_id_whole_boundary <- extend_data_block(d_dat$group_id_extended_boundary, admap_fc1$map, d_att)
178+
179+
admap2 <- merge_admaps(admap1, admap_fc1)
180+
181+
# join attr based on block merges possible (one more time)
182+
rel_chk <- ai_relative_data_join_attr(admap_main = admap2, d_att = d_att)
183+
if (rel_chk$done) {
184+
d_att <- rel_chk$d_att %>% map(unique)
185+
admap2 <- rel_chk$admap
186+
}
187+
188+
cmp <- compact_gid_maps(d_att, admap2)
189+
d_att <- cmp$gid_map
190+
admap2 <- cmp$admap
191+
192+
admap3 <- admap2$map %>%
193+
select(-attr_group) %>%
194+
ai_get_data_attr_map_details(d_dat, d_att)
195+
196+
if (!identical(admap3$map, admap2$map)) {
197+
# I think this can be iterated
198+
# KFL
199+
admap3_pass <- admap3$map %>%
200+
rename(md = dist) %>%
201+
group_by(data_gid, direction_group, attr_group) %>%
202+
mutate(m_dist = min(md)) %>%
203+
ungroup() %>%
204+
filter(md == m_dist) %>%
205+
select(-md) %>%
206+
rename(dist = m_dist)
207+
208+
admap <- admap3_pass %>%
209+
select(-attr_group) %>%
210+
ai_get_data_attr_map_details(d_dat, d_att)
211+
} else {
212+
admap <- admap3
213+
}
214+
215+
# once admap is done
216+
d_dat$group_id_extended_boundary <- NULL
217+
d_dat$group_id_whole_boundary <- extend_data_block(d_dat$group_id_boundary, admap$map, d_att)
197218

198-
admap <- merge_admaps(admap1, admap_fc1)
199219

220+
# str-detection done
200221
this_cells <- get_cells_from_admap(admap, d_dat, d_att)
201222

202223
# natural gid for easier understanding

R/attach_trace_info.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@ attach_trace_info <- function(ca, dc) {
44
if (missing(ca)) {
55
abort("either 'ca' or 'dc' required")
66
}
7-
dc_t <- compose_cells_raw(ca, trace_it_back = TRUE)
7+
dc_t <- compose_cells_raw(ca, trace_it_back = TRUE, silent = TRUE)
88
dc <- dc_t
99
} else {
1010
if (!any(stringr::str_detect(colnames(dc), "cellAddress_"))) {
1111
if (!missing(ca)) {
12-
dc_t <- compose_cells_raw(ca, trace_it_back = TRUE)
12+
dc_t <- compose_cells_raw(ca, trace_it_back = TRUE, silent = TRUE)
1313
dc_t <- dc_t[c("row", "col", colnames(dc_t)[stringr::str_detect(colnames(dc_t), "cellAddress_")])]
1414
dc_t$chk_this <- "ok"
1515
dc <- dc %>% left_join(dc_t, by = c("row", "col"))

R/compact_gid_maps.R

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
2+
# diffrent split resulted in same attr_gid
3+
# these will be compated to single group
4+
compact_gid_maps <- function(gid_map, admap_main) {
5+
gid_map_raw <- gid_map$group_id_map
6+
7+
foot_prints <- gid_map_raw %>%
8+
group_by(gid) %>%
9+
group_split() %>%
10+
map_df(~ .x %>%
11+
distinct(row, col, gid) %>%
12+
arrange(row, col) %>%
13+
summarise(gid = gid[1], fp = paste0(row, ",", col, collapse = ";")))
14+
15+
ngmap <- foot_prints %>%
16+
group_by(fp) %>%
17+
mutate(new_gid = min(gid)) %>%
18+
ungroup() %>%
19+
distinct(gid, new_gid)
20+
21+
gid_map_raw_new <- gid_map_raw %>% left_join(ngmap, by = "gid")
22+
gid_map_raw_new <- gid_map_raw_new %>%
23+
mutate(new_gid = ifelse(is.na(new_gid), gid, new_gid)) %>%
24+
select(-gid) %>%
25+
rename(gid = new_gid) %>%
26+
distinct()
27+
28+
gid_map$group_id_map <- gid_map_raw_new
29+
gid_map$group_id_boundary <- get_group_id_boundary(gid_map_raw_new)
30+
31+
admap_main_raw_map_new <- admap_main$raw_map %>%
32+
left_join(ngmap %>% rename(attr_gid = gid, new_attr_gid = new_gid), by = "attr_gid")
33+
admap_main_raw_map_new <- admap_main_raw_map_new %>%
34+
mutate(new_attr_gid = ifelse(is.na(new_attr_gid), attr_gid, new_attr_gid)) %>%
35+
select(-attr_gid) %>%
36+
rename(attr_gid = new_attr_gid) %>%
37+
distinct()
38+
39+
admap_main$raw_map <- admap_main_raw_map_new
40+
41+
admap_main$map <- admap_main$raw_map %>%
42+
distinct(attr_gid, data_gid, direction, direction_group, dist, attr_group)
43+
44+
# dimesion analysis is not kept
45+
46+
list(gid_map = gid_map, admap = admap_main)
47+
}

0 commit comments

Comments
 (0)