Skip to content

Commit a1e2ca3

Browse files
committed
docs: enhance comments for intersect and except functions to clarify join types
1 parent 9fbc211 commit a1e2ca3

File tree

2 files changed

+7
-3
lines changed

2 files changed

+7
-3
lines changed

datafusion/expr/src/logical_plan/builder.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1748,7 +1748,7 @@ pub fn requalify_sides_if_needed(
17481748
// Implementation note: This uses a simple O(n*m) nested loop rather than
17491749
// a HashMap-based O(n+m) approach. The nested loop is preferred because:
17501750
// - Schemas are typically small (per TPCH benchmark, max is 16 columns),
1751-
// so n*m is negligible
1751+
// so n*m is negligible
17521752
// - Early return on first conflict makes common case very fast
17531753
// - Code is simpler and easier to reason about
17541754
// - Called only during plan construction, not in execution hot path

datafusion/substrait/tests/cases/roundtrip_logical_plan.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1107,7 +1107,8 @@ async fn self_referential_intersect() -> Result<()> {
11071107
// After roundtrip through Substrait, SubqueryAlias is lost and requalification
11081108
// produces "left" and "right" aliases
11091109
// Note: INTERSECT (without ALL) includes DISTINCT, but the outer Aggregate
1110-
// is optimized away, resulting in just the LeftSemi join
1110+
// is optimized away, resulting in just the **LeftSemi** join
1111+
// (LeftSemi returns rows from left that exist in right)
11111112
assert_expected_plan(
11121113
"SELECT a FROM data WHERE a > 0 INTERSECT SELECT a FROM data WHERE a < 5",
11131114
"LeftSemi Join: left.a = right.a\
@@ -1131,7 +1132,8 @@ async fn self_referential_except() -> Result<()> {
11311132
// After roundtrip through Substrait, SubqueryAlias is lost and requalification
11321133
// produces "left" and "right" aliases
11331134
// Note: EXCEPT (without ALL) includes DISTINCT, but the outer Aggregate
1134-
// is optimized away, resulting in just the LeftAnti join
1135+
// is optimized away, resulting in just the **LeftAnti** join
1136+
// (LeftAnti returns rows from left that don't exist in right)
11351137
assert_expected_plan(
11361138
"SELECT a FROM data WHERE a > 0 EXCEPT SELECT a FROM data WHERE a < 5",
11371139
"LeftAnti Join: left.a = right.a\
@@ -1151,6 +1153,7 @@ async fn self_referential_except() -> Result<()> {
11511153
async fn self_referential_intersect_all() -> Result<()> {
11521154
// Test INTERSECT ALL with the same table on both sides
11531155
// INTERSECT ALL preserves duplicates and does not include DISTINCT
1156+
// Uses **LeftSemi** join (returns rows from left that exist in right)
11541157
// The requalification ensures no duplicate field name errors
11551158
assert_expected_plan(
11561159
"SELECT a FROM data WHERE a > 0 INTERSECT ALL SELECT a FROM data WHERE a < 5",
@@ -1170,6 +1173,7 @@ async fn self_referential_intersect_all() -> Result<()> {
11701173
async fn self_referential_except_all() -> Result<()> {
11711174
// Test EXCEPT ALL with the same table on both sides
11721175
// EXCEPT ALL preserves duplicates and does not include DISTINCT
1176+
// Uses **LeftAnti** join (returns rows from left that don't exist in right)
11731177
// The requalification ensures no duplicate field name errors
11741178
assert_expected_plan(
11751179
"SELECT a FROM data WHERE a > 0 EXCEPT ALL SELECT a FROM data WHERE a < 5",

0 commit comments

Comments
 (0)