diff --git a/R/flatten.R b/R/flatten.R index b5f8a1114..fdd854503 100644 --- a/R/flatten.R +++ b/R/flatten.R @@ -139,7 +139,51 @@ dm_flatten_to_tbl_impl <- function(dm, start, list_of_pts, join, join_name, squa by <- map2(order_df$pred, order_df$name, ~ get_by(prep_dm, .x, .y)) # perform the joins according to the list, starting with table `initial_LHS` - reduce2(ordered_table_list, by, ~ join(..1, ..2, by = ..3), .init = tbl_impl(prep_dm, start)) + if (squash && length(order_df$name) > 0) { + # For recursive flattening, use an iterative approach to handle + # cases where columns serve as both PK and FK + result <- tbl_impl(prep_dm, start) + + for (i in seq_along(order_df$name)) { + table_to_join <- ordered_table_list[[i]] + join_by <- by[[i]] + + # Additional safety check: verify join columns exist + result_cols <- colnames(result) + left_cols <- names(join_by) + right_cols <- colnames(table_to_join) + join_right_cols <- unname(join_by) + + # Check if all required columns exist + missing_left <- setdiff(left_cols, result_cols) + missing_right <- setdiff(join_right_cols, right_cols) + + if (length(missing_left) > 0) { + # This is where the error likely occurs + # For recursive joins, we might need to handle column name mapping + # Look for disambiguated versions of missing columns + mapped_join_by <- join_by + for (missing_col in missing_left) { + # Look for pattern like "original_name.table_suffix" + pattern_matches <- result_cols[grepl(paste0("^", missing_col, "\\."), result_cols)] + if (length(pattern_matches) > 0) { + # Replace the missing column name with the disambiguated version + # Use the first match if multiple exist + names(mapped_join_by)[names(mapped_join_by) == missing_col] <- pattern_matches[1] + } + # If no pattern matches found, keep the original name and let the join fail with a clear error + } + join_by <- mapped_join_by + } + + result <- join(result, table_to_join, by = join_by) + } + + result + } else { + # For non-recursive flattening, use the original approach + reduce2(ordered_table_list, by, ~ join(..1, ..2, by = ..3), .init = tbl_impl(prep_dm, start)) + } } #' Join two tables diff --git a/tests/testthat/test-flatten.R b/tests/testthat/test-flatten.R index c182e624e..8f2c99c61 100644 --- a/tests/testthat/test-flatten.R +++ b/tests/testthat/test-flatten.R @@ -242,6 +242,38 @@ test_that("`dm_flatten_to_tbl(.recursive = TRUE)` does the right things", { ) }) +test_that("`dm_flatten_to_tbl(.recursive = TRUE)` works with combined primary and foreign key", { + # Test for issue where a column serves as both PK and FK + skip_if_src_not("df") # Only test on data frame source for now + + # Create test data as described in the issue + x <- tibble(a = 1L, b = 2L) + y <- tibble(c = 2L, d = 3L) + z <- tibble(e = 2L, f = 4L) + + # Create dm with the problematic structure: + # - y.c is both a primary key and a foreign key pointing to z.e + mydm <- dm(x, y, z) %>% + dm_add_pk(x, a) %>% + dm_add_pk(y, c) %>% + dm_add_pk(z, e) %>% + dm_add_fk(x, b, y) %>% + dm_add_fk(y, c, z) # This makes y.c both PK and FK + + # This should work without throwing "Join columns in `x` must be present in the data. Problem with `c`." + result <- mydm %>% dm_flatten_to_tbl(x, .recursive = TRUE) + + # Expected result should have all columns from x, y, and z + expected <- tibble( + a = 1L, + b = 2L, + d = 3L, + f = 4L + ) + + expect_equivalent_tbl(result, expected) +}) + test_that("prepare_dm_for_flatten() works", { # with rename out <- expect_message_obj(prepare_dm_for_flatten(