Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 45 additions & 1 deletion R/flatten.R
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,51 @@ dm_flatten_to_tbl_impl <- function(dm, start, list_of_pts, join, join_name, squa
by <- map2(order_df$pred, order_df$name, ~ get_by(prep_dm, .x, .y))

# perform the joins according to the list, starting with table `initial_LHS`
reduce2(ordered_table_list, by, ~ join(..1, ..2, by = ..3), .init = tbl_impl(prep_dm, start))
if (squash && length(order_df$name) > 0) {
# For recursive flattening, use an iterative approach to handle
# cases where columns serve as both PK and FK
result <- tbl_impl(prep_dm, start)

for (i in seq_along(order_df$name)) {
table_to_join <- ordered_table_list[[i]]
join_by <- by[[i]]

# Additional safety check: verify join columns exist
result_cols <- colnames(result)
left_cols <- names(join_by)
right_cols <- colnames(table_to_join)
join_right_cols <- unname(join_by)

# Check if all required columns exist
missing_left <- setdiff(left_cols, result_cols)
missing_right <- setdiff(join_right_cols, right_cols)

if (length(missing_left) > 0) {
# This is where the error likely occurs
# For recursive joins, we might need to handle column name mapping
# Look for disambiguated versions of missing columns
mapped_join_by <- join_by
for (missing_col in missing_left) {
# Look for pattern like "original_name.table_suffix"
pattern_matches <- result_cols[grepl(paste0("^", missing_col, "\\."), result_cols)]
if (length(pattern_matches) > 0) {
# Replace the missing column name with the disambiguated version
# Use the first match if multiple exist
names(mapped_join_by)[names(mapped_join_by) == missing_col] <- pattern_matches[1]
}
# If no pattern matches found, keep the original name and let the join fail with a clear error
}
join_by <- mapped_join_by
}

result <- join(result, table_to_join, by = join_by)
}

result
} else {
# For non-recursive flattening, use the original approach
reduce2(ordered_table_list, by, ~ join(..1, ..2, by = ..3), .init = tbl_impl(prep_dm, start))
}
}

#' Join two tables
Expand Down
32 changes: 32 additions & 0 deletions tests/testthat/test-flatten.R
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,38 @@ test_that("`dm_flatten_to_tbl(.recursive = TRUE)` does the right things", {
)
})

test_that("`dm_flatten_to_tbl(.recursive = TRUE)` works with combined primary and foreign key", {
# Test for issue where a column serves as both PK and FK
skip_if_src_not("df") # Only test on data frame source for now

# Create test data as described in the issue
x <- tibble(a = 1L, b = 2L)
y <- tibble(c = 2L, d = 3L)
z <- tibble(e = 2L, f = 4L)

# Create dm with the problematic structure:
# - y.c is both a primary key and a foreign key pointing to z.e
mydm <- dm(x, y, z) %>%
dm_add_pk(x, a) %>%
dm_add_pk(y, c) %>%
dm_add_pk(z, e) %>%
dm_add_fk(x, b, y) %>%
dm_add_fk(y, c, z) # This makes y.c both PK and FK

# This should work without throwing "Join columns in `x` must be present in the data. Problem with `c`."
result <- mydm %>% dm_flatten_to_tbl(x, .recursive = TRUE)

# Expected result should have all columns from x, y, and z
expected <- tibble(
a = 1L,
b = 2L,
d = 3L,
f = 4L
)

expect_equivalent_tbl(result, expected)
})

test_that("prepare_dm_for_flatten() works", {
# with rename
out <- expect_message_obj(prepare_dm_for_flatten(
Expand Down