-
Notifications
You must be signed in to change notification settings - Fork 178
Mvexpand feature #4675
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Mvexpand feature #4675
Changes from all commits
445799f
427af0f
83d7786
7f6e127
dcbf56b
a3c1384
148ccc5
f5a9e82
2cc60ad
18cbba4
60fa2ad
d248cb0
a28894a
825c52e
dc76a55
8319583
6d87133
b83ab21
c84703d
de82b65
6c6e0ec
069d52e
2f3aeb6
e584368
a41b081
bf018b7
c215243
a6ccb5a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,46 @@ | ||
| /* | ||
| * Copyright OpenSearch Contributors | ||
| * SPDX-License-Identifier: Apache-2.0 | ||
| */ | ||
|
|
||
| package org.opensearch.sql.ast.tree; | ||
|
|
||
| import com.google.common.collect.ImmutableList; | ||
| import java.util.List; | ||
| import javax.annotation.Nullable; | ||
| import lombok.EqualsAndHashCode; | ||
| import lombok.Getter; | ||
| import lombok.ToString; | ||
| import org.opensearch.sql.ast.AbstractNodeVisitor; | ||
| import org.opensearch.sql.ast.expression.Field; | ||
|
|
||
| /** AST node representing an {@code mvexpand <field> [limit N]} operation. */ | ||
| @ToString | ||
| @EqualsAndHashCode(callSuper = false) | ||
| public class MvExpand extends UnresolvedPlan { | ||
|
|
||
| private UnresolvedPlan child; | ||
| @Getter private final Field field; | ||
| @Getter @Nullable private final Integer limit; | ||
|
|
||
| public MvExpand(Field field, @Nullable Integer limit) { | ||
| this.field = field; | ||
| this.limit = limit; | ||
| } | ||
|
|
||
| @Override | ||
| public MvExpand attach(UnresolvedPlan child) { | ||
| this.child = child; | ||
| return this; | ||
| } | ||
|
|
||
| @Override | ||
| public List<UnresolvedPlan> getChild() { | ||
| return this.child == null ? ImmutableList.of() : ImmutableList.of(this.child); | ||
| } | ||
|
|
||
| @Override | ||
| public <T, C> T accept(AbstractNodeVisitor<T, C> nodeVisitor, C context) { | ||
| return nodeVisitor.visitMvExpand(this, context); | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -122,6 +122,7 @@ | |
| import org.opensearch.sql.ast.tree.Lookup.OutputStrategy; | ||
| import org.opensearch.sql.ast.tree.ML; | ||
| import org.opensearch.sql.ast.tree.Multisearch; | ||
| import org.opensearch.sql.ast.tree.MvExpand; | ||
| import org.opensearch.sql.ast.tree.Paginate; | ||
| import org.opensearch.sql.ast.tree.Parse; | ||
| import org.opensearch.sql.ast.tree.Patterns; | ||
|
|
@@ -839,7 +840,12 @@ public RelNode visitPatterns(Patterns node, CalcitePlanContext context) { | |
| .toList(); | ||
| context.relBuilder.aggregate(context.relBuilder.groupKey(groupByList), aggCall); | ||
| buildExpandRelNode( | ||
| context.relBuilder.field(node.getAlias()), node.getAlias(), node.getAlias(), context); | ||
| context.relBuilder.field(node.getAlias()), | ||
| node.getAlias(), | ||
| node.getAlias(), | ||
| null, | ||
| context); | ||
|
|
||
| flattenParsedPattern( | ||
| node.getAlias(), | ||
| context.relBuilder.field(node.getAlias()), | ||
|
|
@@ -1611,6 +1617,20 @@ private static void buildDedupNotNull( | |
| context.relBuilder.projectExcept(_row_number_dedup_); | ||
| } | ||
|
|
||
| @Override | ||
| public RelNode visitMvExpand(MvExpand node, CalcitePlanContext context) { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: Let's move this method next to
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
| visitChildren(node, context); | ||
| Field arrayField = node.getField(); | ||
| RexInputRef arrayFieldRex = (RexInputRef) rexVisitor.analyze(arrayField, context); | ||
|
|
||
| // pass the per-document limit into the builder so it can be applied inside the UNNEST inner | ||
| // query | ||
| buildMvExpandRelNode( | ||
| arrayFieldRex, arrayField.getField().toString(), null, node.getLimit(), context); | ||
|
|
||
| return context.relBuilder.peek(); | ||
| } | ||
|
|
||
| @Override | ||
| public RelNode visitWindow(Window node, CalcitePlanContext context) { | ||
| visitChildren(node, context); | ||
|
|
@@ -2807,7 +2827,7 @@ public RelNode visitExpand(Expand expand, CalcitePlanContext context) { | |
| RexInputRef arrayFieldRex = (RexInputRef) rexVisitor.analyze(arrayField, context); | ||
| String alias = expand.getAlias(); | ||
|
|
||
| buildExpandRelNode(arrayFieldRex, arrayField.getField().toString(), alias, context); | ||
| buildExpandRelNode(arrayFieldRex, arrayField.getField().toString(), alias, null, context); | ||
|
|
||
| return context.relBuilder.peek(); | ||
| } | ||
|
|
@@ -3055,46 +3075,51 @@ private void flattenParsedPattern( | |
| projectPlusOverriding(fattenedNodes, projectNames, context); | ||
| } | ||
|
|
||
| private void buildExpandRelNode( | ||
| RexInputRef arrayFieldRex, String arrayFieldName, String alias, CalcitePlanContext context) { | ||
| // 3. Capture the outer row in a CorrelationId | ||
| Holder<RexCorrelVariable> correlVariable = Holder.empty(); | ||
| context.relBuilder.variable(correlVariable::set); | ||
| // New generic helper: builds Uncollect + Correlate using a provided left node (so caller | ||
| // can ensure left rowType is fixed). | ||
| private void buildUnnestForLeft( | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we no longer need this method as the logic is unified. We might want to extract some portion of
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Addressed: removed buildUnnestForLeft and inlined its logic into buildExpandRelNode. buildMvExpandRelNode now delegates to buildExpandRelNode and passes the per-document limit through. Also added JavaDoc for visitMvExpand to match visitExpand. Functionality is unchanged |
||
| RelNode leftBuilt, | ||
| RelDataType leftRowType, | ||
| int arrayFieldIndex, | ||
| String arrayFieldName, | ||
| String alias, | ||
| Holder<RexCorrelVariable> correlVariable, | ||
| RexNode correlArrayFieldAccess, | ||
| Integer mvExpandLimit, | ||
| CalcitePlanContext context) { | ||
|
|
||
| // 4. Create RexFieldAccess to access left node's array field with correlationId and build join | ||
| // left node | ||
| RexNode correlArrayFieldAccess = | ||
| context.relBuilder.field( | ||
| context.rexBuilder.makeCorrel( | ||
| context.relBuilder.peek().getRowType(), correlVariable.get().id), | ||
| arrayFieldRex.getIndex()); | ||
| RelNode leftNode = context.relBuilder.build(); | ||
| RelBuilder rb = context.relBuilder; | ||
| rb.push(LogicalValues.createOneRow(rb.getCluster())) | ||
| .project(List.of(correlArrayFieldAccess), List.of(arrayFieldName)); | ||
| // apply per-document limit into the inner SELECT if provided | ||
| if (mvExpandLimit != null && mvExpandLimit > 0) { | ||
| rb.limit(0, mvExpandLimit); | ||
| } | ||
| RelNode rightNode = rb.uncollect(List.of(), false).build(); | ||
|
|
||
| // 5. Build join right node and expand the array field using uncollect | ||
| RelNode rightNode = | ||
| context | ||
| .relBuilder | ||
| // fake input, see convertUnnest and convertExpression in Calcite SqlToRelConverter | ||
| .push(LogicalValues.createOneRow(context.relBuilder.getCluster())) | ||
| .project(List.of(correlArrayFieldAccess), List.of(arrayFieldName)) | ||
| .uncollect(List.of(), false) | ||
| .build(); | ||
|
|
||
| // 6. Perform a nested-loop join (correlate) between the original table and the expanded | ||
| // array field. | ||
| // The last parameter has to refer to the array to be expanded on the left side. It will | ||
| // be used by the right side to correlate with the left side. | ||
| // Compute required column ref against leftBuilt's row type (robust) | ||
| RexNode requiredColumnRef = | ||
| context.rexBuilder.makeInputRef(leftBuilt.getRowType(), arrayFieldIndex); | ||
|
|
||
| // Correlate leftBuilt and rightNode using the proper required column ref | ||
| context | ||
| .relBuilder | ||
| .push(leftNode) | ||
| .push(leftBuilt) | ||
| .push(rightNode) | ||
| .correlate(JoinRelType.INNER, correlVariable.get().id, List.of(arrayFieldRex)) | ||
| // 7. Remove the original array field from the output. | ||
| // TODO: RFC: should we keep the original array field when alias is present? | ||
| .projectExcept(arrayFieldRex); | ||
| .correlate(JoinRelType.INNER, correlVariable.get().id, List.of(requiredColumnRef)); | ||
|
|
||
| // Remove the original array field from the output by name if possible | ||
| RexNode toRemove; | ||
| try { | ||
| toRemove = context.relBuilder.field(arrayFieldName); | ||
| } catch (Exception e) { | ||
| // Fallback in case name lookup fails | ||
| toRemove = requiredColumnRef; | ||
| } | ||
| context.relBuilder.projectExcept(toRemove); | ||
|
|
||
| // Optional rename into alias (preserve the original logic) | ||
| if (alias != null) { | ||
| // Sub-nested fields cannot be removed after renaming the nested field. | ||
| tryToRemoveNestedFields(context); | ||
| RexInputRef expandedField = context.relBuilder.field(arrayFieldName); | ||
| List<String> names = new ArrayList<>(context.relBuilder.peek().getRowType().getFieldNames()); | ||
|
|
@@ -3103,6 +3128,74 @@ private void buildExpandRelNode( | |
| } | ||
| } | ||
|
|
||
| private void buildExpandRelNode( | ||
| RexNode arrayFieldRexNode, | ||
| String arrayFieldName, | ||
| String alias, | ||
| Integer mvExpandLimit, | ||
| CalcitePlanContext context) { | ||
|
|
||
| // Convert incoming RexNode to RexInputRef when possible; otherwise resolve by field name. | ||
| RexInputRef arrayFieldRex; | ||
| if (arrayFieldRexNode instanceof RexInputRef) { | ||
| arrayFieldRex = (RexInputRef) arrayFieldRexNode; | ||
| } else { | ||
| RelDataType currentRowType = context.relBuilder.peek().getRowType(); | ||
| RelDataTypeField fld = currentRowType.getField(arrayFieldName, false, false); | ||
|
Comment on lines
+3142
to
+3144
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What kind of case falls into this branch? Which test case associated with this branch? |
||
| if (fld != null) { | ||
| arrayFieldRex = context.rexBuilder.makeInputRef(currentRowType, fld.getIndex()); | ||
| } else { | ||
| throw new IllegalArgumentException( | ||
| "buildExpandRelNode: expected RexInputRef or resolvable field name: " + arrayFieldName); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this expected to be visible to the user? In that case, error message should be meaningful for the user.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good catch — I updated the method to throw SemanticCheckException with clear, user-facing messages ("field not found" / "expected ARRAY but found X") and to validate type both at resolve-time and after materialization (to catch rename/alias cases) |
||
| } | ||
| } | ||
|
|
||
| // Capture left node and its schema BEFORE calling build() | ||
| RelNode leftNode = context.relBuilder.peek(); | ||
| RelDataType leftRowType = leftNode.getRowType(); | ||
|
|
||
| // Resolve the array field index in left schema by name (robust); fallback to original index | ||
| RelDataTypeField leftField = leftRowType.getField(arrayFieldName, false, false); | ||
| int arrayFieldIndexInLeft = | ||
| (leftField != null) ? leftField.getIndex() : arrayFieldRex.getIndex(); | ||
|
|
||
| // Create correlation variable while left is still on the builder stack | ||
| Holder<RexCorrelVariable> correlVariable = Holder.empty(); | ||
| context.relBuilder.variable(correlVariable::set); | ||
|
|
||
| // Create correlated field access while left is still on the builder stack | ||
| RexNode correlArrayFieldAccess = | ||
| context.relBuilder.field( | ||
| context.rexBuilder.makeCorrel(leftRowType, correlVariable.get().id), | ||
| arrayFieldIndexInLeft); | ||
|
|
||
| // Materialize leftBuilt | ||
| RelNode leftBuilt = context.relBuilder.build(); | ||
|
|
||
| // Use unified helper to build right/uncollect + correlate + cleanup | ||
| buildUnnestForLeft( | ||
| leftBuilt, | ||
| leftRowType, | ||
| arrayFieldIndexInLeft, | ||
| arrayFieldName, | ||
| alias, | ||
| correlVariable, | ||
| correlArrayFieldAccess, | ||
| mvExpandLimit, | ||
| context); | ||
| } | ||
|
|
||
| private void buildMvExpandRelNode( | ||
| RexInputRef arrayFieldRex, | ||
| String arrayFieldName, | ||
| String alias, | ||
| Integer mvExpandLimit, | ||
| CalcitePlanContext context) { | ||
|
|
||
| // Delegate to the canonical expand implementation (pass the per-document limit through). | ||
| buildExpandRelNode(arrayFieldRex, arrayFieldName, alias, mvExpandLimit, context); | ||
| } | ||
|
|
||
| /** Creates an optimized sed call using native Calcite functions */ | ||
| private RexNode createOptimizedSedCall( | ||
| RexNode fieldRex, String sedExpression, CalcitePlanContext context) { | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.