Improved the inference pre-optimization.

afoucret · afoucret · commit c81bfa7cf6fc · 2025-07-21T15:45:27.000+02:00
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/InferenceFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/InferenceFunction.java
@@ -11,6 +11,7 @@
 import org.elasticsearch.xpack.esql.core.expression.Expression;
 import org.elasticsearch.xpack.esql.core.expression.function.Function;
 import org.elasticsearch.xpack.esql.core.tree.Source;
+import org.elasticsearch.xpack.esql.inference.InferenceFunctionEvaluator;
 
 import java.util.List;
 
@@ -35,5 +36,20 @@ protected InferenceFunction(Source source, List<Expression> children) {
      */
     public abstract TaskType taskType();
 
+    /**
+     * Returns a new instance of the function with the specified inference resolution error.
+     */
     public abstract PlanType withInferenceResolutionError(String inferenceId, String error);
+
+    /**
+     * Returns the inference function evaluator factory.
+     */
+    public abstract InferenceFunctionEvaluator.Factory inferenceEvaluatorFactory();
+
+    /**
+     * Returns true if the function has a nested inference function.
+     */
+    public boolean hasNestedInferenceFunction() {
+        return anyMatch(e -> e instanceof InferenceFunction && e != this);
+    }
 }
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbedding.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbedding.java
@@ -20,6 +20,8 @@
 import org.elasticsearch.xpack.esql.expression.function.FunctionAppliesToLifecycle;
 import org.elasticsearch.xpack.esql.expression.function.FunctionInfo;
 import org.elasticsearch.xpack.esql.expression.function.Param;
+import org.elasticsearch.xpack.esql.inference.InferenceFunctionEvaluator;
+import org.elasticsearch.xpack.esql.inference.textembedding.TextEmbeddingFunctionEvaluator;
 import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
 
 import java.io.IOException;
@@ -129,6 +131,11 @@ public TaskType taskType() {
         return TaskType.TEXT_EMBEDDING;
     }
 
+    @Override
+    public InferenceFunctionEvaluator.Factory inferenceEvaluatorFactory() {
+        return inferenceRunner -> new TextEmbeddingFunctionEvaluator(this, inferenceRunner);
+    }
+
     @Override
     public TextEmbedding withInferenceResolutionError(String inferenceId, String error) {
         return new TextEmbedding(source(), inputText, new UnresolvedAttribute(inferenceId().source(), inferenceId, error));
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/inference/InferenceFunctionEvaluator.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/inference/InferenceFunctionEvaluator.java
@@ -10,18 +10,12 @@
 import org.elasticsearch.action.ActionListener;
 import org.elasticsearch.xpack.esql.core.expression.Expression;
 import org.elasticsearch.xpack.esql.core.expression.FoldContext;
-import org.elasticsearch.xpack.esql.expression.function.inference.InferenceFunction;
-import org.elasticsearch.xpack.esql.expression.function.inference.TextEmbedding;
-import org.elasticsearch.xpack.esql.inference.textembedding.TextEmbeddingFunctionEvaluator;
 
 public interface InferenceFunctionEvaluator {
 
     void eval(FoldContext foldContext, ActionListener<Expression> listener);
 
-    static InferenceFunctionEvaluator get(InferenceFunction<?> inferenceFunction, InferenceRunner inferenceRunner) {
-        return switch (inferenceFunction) {
-            case TextEmbedding textEmbedding -> new TextEmbeddingFunctionEvaluator(textEmbedding, inferenceRunner);
-            default -> throw new IllegalArgumentException("Unsupported inference function: " + inferenceFunction.getClass());
-        };
+    interface Factory {
+        InferenceFunctionEvaluator get(InferenceRunner inferenceRunner);
     }
 }
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanPreOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanPreOptimizer.java
@@ -8,19 +8,13 @@
 package org.elasticsearch.xpack.esql.optimizer;
 
 import org.elasticsearch.action.ActionListener;
-import org.elasticsearch.action.support.CountDownActionListener;
-import org.elasticsearch.xpack.esql.core.expression.Expression;
-import org.elasticsearch.xpack.esql.core.expression.FoldContext;
-import org.elasticsearch.xpack.esql.expression.function.inference.InferenceFunction;
-import org.elasticsearch.xpack.esql.inference.InferenceFunctionEvaluator;
-import org.elasticsearch.xpack.esql.inference.InferenceRunner;
+import org.elasticsearch.action.support.SubscribableListener;
+import org.elasticsearch.xpack.esql.optimizer.rules.logical.preoptimizer.InferenceFunctionConstantFolding;
+import org.elasticsearch.xpack.esql.optimizer.rules.logical.preoptimizer.PreOptimizerRule;
 import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
 import org.elasticsearch.xpack.esql.plugin.TransportActionServices;
 
-import java.util.ArrayList;
-import java.util.HashMap;
 import java.util.List;
-import java.util.Map;
 
 /**
  * The class is responsible for invoking any steps that need to be applied to the logical plan,
@@ -31,10 +25,10 @@
  */
 public class LogicalPlanPreOptimizer {
 
-    private final InferenceFunctionFolding inferenceFunctionFolding;
+    private final List<PreOptimizerRule> rules;
 
     public LogicalPlanPreOptimizer(TransportActionServices services, LogicalPreOptimizerContext preOptimizerContext) {
-        this.inferenceFunctionFolding = new InferenceFunctionFolding(services.inferenceRunner(), preOptimizerContext.foldCtx());
+        rules = List.of(new InferenceFunctionConstantFolding(services.inferenceRunner(), preOptimizerContext.foldCtx()));
     }
 
     /**
@@ -55,54 +49,19 @@ public void preOptimize(LogicalPlan plan, ActionListener<LogicalPlan> listener)
         }));
     }
 
+    /**
+     * Loop over the rules and apply them to the logical plan.
+     *
+     * @param plan     the analyzed logical plan to pre-optimize
+     * @param listener the listener returning the pre-optimized plan when pre-optimization is complete
+     */
     private void doPreOptimize(LogicalPlan plan, ActionListener<LogicalPlan> listener) {
-        inferenceFunctionFolding.foldInferenceFunctions(plan, listener);
-    }
-
-    private static class InferenceFunctionFolding {
-        private final InferenceRunner inferenceRunner;
-        private final FoldContext foldContext;
+        SubscribableListener<LogicalPlan> rulesListener = SubscribableListener.newSucceeded(plan);
 
-        private InferenceFunctionFolding(InferenceRunner inferenceRunner, FoldContext foldContext) {
-            this.inferenceRunner = inferenceRunner;
-            this.foldContext = foldContext;
+        for (PreOptimizerRule rule : rules) {
+            rulesListener = rulesListener.andThen((l, p) -> rule.apply(p, l));
         }
 
-        private void foldInferenceFunctions(LogicalPlan plan, ActionListener<LogicalPlan> listener) {
-            // First let's collect all the inference functions
-            List<InferenceFunction<?>> inferenceFunctions = new ArrayList<>();
-            plan.forEachExpressionUp(InferenceFunction.class, inferenceFunctions::add);
-
-            if (inferenceFunctions.isEmpty()) {
-                // No inference functions found. Return the original plan.
-                listener.onResponse(plan);
-                return;
-            }
-
-            // This is a map of inference functions to their results.
-            // We will use this map to replace the inference functions in the plan.
-            Map<InferenceFunction<?>, Expression> inferenceFunctionsToResults = new HashMap<>();
-
-            // Prepare a listener that will be called when all inference functions are done.
-            // This listener will replace the inference functions in the plan with their results.
-            CountDownActionListener completionListener = new CountDownActionListener(
-                inferenceFunctions.size(),
-                listener.map(
-                    ignored -> plan.transformExpressionsUp(InferenceFunction.class, f -> inferenceFunctionsToResults.getOrDefault(f, f))
-                )
-            );
-
-            // Try to compute the result for each inference function.
-            for (InferenceFunction<?> inferenceFunction : inferenceFunctions) {
-                foldInferenceFunction(inferenceFunction, completionListener.map(e -> {
-                    inferenceFunctionsToResults.put(inferenceFunction, e);
-                    return null;
-                }));
-            }
-        }
-
-        private void foldInferenceFunction(InferenceFunction<?> inferenceFunction, ActionListener<Expression> listener) {
-            InferenceFunctionEvaluator.get(inferenceFunction, inferenceRunner).eval(foldContext, listener);
-        }
+        rulesListener.addListener(listener);
     }
 }
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/preoptimizer/InferenceFunctionConstantFolding.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/preoptimizer/InferenceFunctionConstantFolding.java
@@ -0,0 +1,144 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.esql.optimizer.rules.logical.preoptimizer;
+
+import org.elasticsearch.action.ActionListener;
+import org.elasticsearch.action.support.CountDownActionListener;
+import org.elasticsearch.xpack.esql.core.expression.Expression;
+import org.elasticsearch.xpack.esql.core.expression.FoldContext;
+import org.elasticsearch.xpack.esql.expression.function.inference.InferenceFunction;
+import org.elasticsearch.xpack.esql.inference.InferenceRunner;
+import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Pre-optimizer rule that evaluates inference functions (like TEXT_EMBEDDING) into constant values.
+ * <p>
+ * This rule identifies foldable inference functions in the logical plan, executes them using the
+ * inference runner, and replaces them with their computed results. This enables downstream
+ * optimizations to work with the actual embedding values rather than the function calls.
+ * <p>
+ * The rule processes inference functions recursively, handling newly revealed functions that might
+ * appear after the first round of folding.
+ */
+public class InferenceFunctionConstantFolding implements PreOptimizerRule {
+    private final InferenceRunner inferenceRunner;
+    private final FoldContext foldContext;
+
+    /**
+     * Creates a new instance of the InferenceFunctionConstantFolding rule.
+     *
+     * @param inferenceRunner the inference runner to use for evaluating inference functions
+     * @param foldContext     the fold context to use for evaluating inference functions
+     */
+    public InferenceFunctionConstantFolding(InferenceRunner inferenceRunner, FoldContext foldContext) {
+        this.inferenceRunner = inferenceRunner;
+        this.foldContext = foldContext;
+    }
+
+    /**
+     * Applies the InferenceFunctionConstantFolding rule to the given logical plan.
+     *
+     * @param plan     the logical plan to apply the rule to
+     * @param listener the listener to notify when the rule has been applied
+     */
+    @Override
+    public void apply(LogicalPlan plan, ActionListener<LogicalPlan> listener) {
+        foldInferenceFunctions(plan, listener);
+    }
+
+    /**
+     * Recursively folds inference functions in the logical plan.
+     * <p>
+     * This method collects all foldable inference functions, evaluates them in parallel,
+     * and then replaces them with their results. If new inference functions are revealed
+     * after the first round of folding, it recursively processes them as well.
+     *
+     * @param plan     the logical plan to fold inference functions in
+     * @param listener the listener to notify when the folding is complete
+     */
+    private void foldInferenceFunctions(LogicalPlan plan, ActionListener<LogicalPlan> listener) {
+        // First let's collect all the inference foldable inference functions
+        List<InferenceFunction<?>> inferenceFunctions = collectFoldableInferenceFunctions(plan);
+
+        if (inferenceFunctions.isEmpty()) {
+            // No inference functions that can be evaluated at this time found. Return the original plan.
+            listener.onResponse(plan);
+            return;
+        }
+
+        // This is a map of inference functions to their results.
+        // We will use this map to replace the inference functions in the plan.
+        Map<InferenceFunction<?>, Expression> inferenceFunctionsToResults = new HashMap<>();
+
+        // Prepare a listener that will be called when all inference functions are done.
+        // This listener will replace the inference functions in the plan with their results and then recursively fold the remaining
+        // inference functions.
+        CountDownActionListener completionListener = new CountDownActionListener(
+            inferenceFunctions.size(),
+            listener.delegateFailureIgnoreResponseAndWrap(l -> {
+                // Replace the inference functions in the plan with their results
+                LogicalPlan next = plan.transformExpressionsUp(
+                    InferenceFunction.class,
+                    f -> inferenceFunctionsToResults.getOrDefault(f, f)
+                );
+
+                // Recursively fold the remaining inference functions
+                foldInferenceFunctions(next, l);
+            })
+        );
+
+        // Try to compute the result for each inference function.
+        for (InferenceFunction<?> inferenceFunction : inferenceFunctions) {
+            foldInferenceFunction(inferenceFunction, completionListener.map(e -> {
+                inferenceFunctionsToResults.put(inferenceFunction, e);
+                return null;
+            }));
+        }
+    }
+
+    /**
+     * Collects all foldable inference functions from the logical plan.
+     * <p>
+     * A function is considered foldable if:
+     * 1. It's an instance of InferenceFunction
+     * 2. It's marked as foldable (all parameters are constants)
+     * 3. It doesn't contain nested inference functions
+     *
+     * @param plan the logical plan to collect inference functions from
+     * @return a list of foldable inference functions
+     */
+    private List<InferenceFunction<?>> collectFoldableInferenceFunctions(LogicalPlan plan) {
+        List<InferenceFunction<?>> inferenceFunctions = new ArrayList<>();
+
+        plan.forEachExpressionUp(InferenceFunction.class, f -> {
+            if (f.foldable() && f.hasNestedInferenceFunction() == false) {
+                inferenceFunctions.add(f);
+            }
+        });
+
+        return inferenceFunctions;
+    }
+
+    /**
+     * Evaluates a single inference function asynchronously.
+     * <p>
+     * Uses the inference function's evaluator factory to create an evaluator
+     * that can process the function with the given inference runner.
+     *
+     * @param inferenceFunction the inference function to evaluate
+     * @param listener          the listener to notify when the evaluation is complete
+     */
+    private void foldInferenceFunction(InferenceFunction<?> inferenceFunction, ActionListener<Expression> listener) {
+        inferenceFunction.inferenceEvaluatorFactory().get(inferenceRunner).eval(foldContext, listener);
+    }
+}
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/preoptimizer/PreOptimizerRule.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/preoptimizer/PreOptimizerRule.java
@@ -0,0 +1,25 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.esql.optimizer.rules.logical.preoptimizer;
+
+import org.elasticsearch.action.ActionListener;
+import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
+
+/**
+ * A rule that can be applied to a logical plan before it is optimized.
+ */
+public interface PreOptimizerRule {
+
+    /**
+     * Apply the rule to the logical plan.
+     *
+     * @param plan     the analyzed logical plan to pre-optimize
+     * @param listener the listener returning the pre-optimized plan when pre-optimization is complete
+     */
+    void apply(LogicalPlan plan, ActionListener<LogicalPlan> listener);
+}
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/AbstractLogicalPlanPreOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/AbstractLogicalPlanPreOptimizerTests.java
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanPreOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanPreOptimizerTests.java
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/preoptimizer/InferenceFunctionConstantFoldingTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/preoptimizer/InferenceFunctionConstantFoldingTests.java