5454import com .google .common .collect .HashMultimap ;
5555import com .google .common .collect .ImmutableList ;
5656import com .google .common .collect .Multimap ;
57+ import com .google .common .collect .Sets ;
5758
5859import org .checkerframework .checker .nullness .qual .Nullable ;
5960
6061import java .util .ArrayDeque ;
6162import java .util .ArrayList ;
6263import java .util .Collection ;
64+ import java .util .Deque ;
6365import java .util .HashMap ;
6466import java .util .HashSet ;
67+ import java .util .IdentityHashMap ;
6568import java .util .Iterator ;
6669import java .util .LinkedHashSet ;
6770import java .util .List ;
@@ -104,6 +107,8 @@ public class HepPlanner extends AbstractRelOptPlanner {
104107
105108 private final boolean noDag ;
106109
110+ private boolean largePlanMode = false ;
111+
107112 /**
108113 * Query graph, with edges directed from parent to child. This is a
109114 * single-rooted DAG, possibly with additional roots corresponding to
@@ -183,10 +188,47 @@ public HepPlanner(
183188 this .noDag = noDag ;
184189 }
185190
191+ /**
192+ * Create a new {@code HepPlanner} capable of execute multiple HepPrograms
193+ * with (noDag = false, isLargePlanMode = true, enableFiredRulesCache = true).
194+ *
195+ * <p>Unlike planners that require setRoot for every optimization pass,
196+ * this planner preserves the internal graph structure and optimized plan across
197+ * successive executions. This allows for multi-phrase optimization where the
198+ * output of one {@link HepProgram} serves as the immediate starting point for the next.
199+ *
200+ * <p><b>Usage Example:</b>
201+ * <pre>{@code
202+ * HepPlanner planner = new HepPlanner();
203+ * planner.setRoot(initPlanRoot);
204+ * planner.executeProgram(phrase1Program);
205+ * planner.dumpRuleAttemptsInfo(); // optional
206+ * planner.clear(); // clear the rules and rule match caches, the graph is reused
207+ * // other logics ...
208+ * planner.executeProgram(phrase2Program);
209+ * planner.clear();
210+ * ...
211+ * RelNode optimized = planner.buildFinalPlan();
212+ * }</pre>
213+ *
214+ * @see #setRoot(RelNode)
215+ * @see #executeProgram(HepProgram)
216+ * @see #dumpRuleAttemptsInfo()
217+ * @see #buildFinalPlan()
218+ */
219+ public HepPlanner () {
220+ this (HepProgram .builder ().build (), null , false , null , RelOptCostImpl .FACTORY );
221+ this .setLargePlanMode (true );
222+ this .setEnableFiredRulesCache (true );
223+ }
224+
186225 //~ Methods ----------------------------------------------------------------
187226
188227 @ Override public void setRoot (RelNode rel ) {
189- root = addRelToGraph (rel );
228+ // initRelToVertexCache is used to quickly skip common nodes before traversing its inputs
229+ IdentityHashMap <RelNode , HepRelVertex > initRelToVertexCache = (isLargePlanMode () && !noDag )
230+ ? new IdentityHashMap <>() : null ;
231+ root = addRelToGraph (rel , initRelToVertexCache );
190232 dumpGraph ();
191233 }
192234
@@ -204,6 +246,14 @@ public HepPlanner(
204246 this .firedRulesCacheIndex .clear ();
205247 }
206248
249+ public boolean isLargePlanMode () {
250+ return largePlanMode ;
251+ }
252+
253+ public void setLargePlanMode (final boolean largePlanMode ) {
254+ this .largePlanMode = largePlanMode ;
255+ }
256+
207257 @ Override public RelNode changeTraits (RelNode rel , RelTraitSet toTraits ) {
208258 // Ignore traits, except for the root, where we remember
209259 // what the final conversion should be.
@@ -214,6 +264,11 @@ public HepPlanner(
214264 }
215265
216266 @ Override public RelNode findBestExp () {
267+ if (isLargePlanMode ()) {
268+ throw new UnsupportedOperationException ("findBestExp is not supported in large plan mode"
269+ + ", please use buildFinalPlan() to get the final plan." );
270+ }
271+
217272 requireNonNull (root , "'root' must not be null" );
218273
219274 executeProgram (mainProgram );
@@ -224,6 +279,10 @@ public HepPlanner(
224279 return buildFinalPlan (requireNonNull (root , "'root' must not be null" ));
225280 }
226281
282+ public RelNode buildFinalPlan () {
283+ return buildFinalPlan (requireNonNull (root , "'root' must not be null" ));
284+ }
285+
227286 /**
228287 * Enables or disables the fire-rule cache.
229288 *
@@ -237,7 +296,7 @@ public void setEnableFiredRulesCache(boolean enable) {
237296
238297 /** Top-level entry point for a program. Initializes state and then invokes
239298 * the program. */
240- private void executeProgram (HepProgram program ) {
299+ public void executeProgram (HepProgram program ) {
241300 final HepInstruction .PrepareContext px =
242301 HepInstruction .PrepareContext .create (this );
243302 final HepState state = program .prepare (px );
@@ -249,7 +308,7 @@ void executeProgram(HepProgram instruction, HepProgram.State state) {
249308 state .instructionStates .forEach (instructionState -> {
250309 instructionState .execute ();
251310 int delta = nTransformations - nTransformationsLastGC ;
252- if (delta > graphSizeLastGC ) {
311+ if (! isLargePlanMode () && delta > graphSizeLastGC ) {
253312 // The number of transformations performed since the last
254313 // garbage collection is greater than the number of vertices in
255314 // the graph at that time. That means there should be a
@@ -492,12 +551,23 @@ private Iterator<HepRelVertex> getGraphIterator(
492551 HepProgram .State programState , HepRelVertex start ) {
493552 switch (requireNonNull (programState .matchOrder , "programState.matchOrder" )) {
494553 case ARBITRARY :
554+ if (isLargePlanMode ()) {
555+ return BreadthFirstIterator .of (graph , start ).iterator ();
556+ }
557+ return DepthFirstIterator .of (graph , start ).iterator ();
495558 case DEPTH_FIRST :
559+ if (isLargePlanMode ()) {
560+ throw new UnsupportedOperationException ("DepthFirstIterator is too slow for large plan mode"
561+ + ", please setLargePlanMode(false) if you don't want to use this mode." );
562+ }
496563 return DepthFirstIterator .of (graph , start ).iterator ();
497564 case TOP_DOWN :
498565 case BOTTOM_UP :
499566 assert start == root ;
500- collectGarbage ();
567+ if (!isLargePlanMode ()) {
568+ // NOTE: Planner already run GC for every transformation removed subtree
569+ collectGarbage ();
570+ }
501571 return TopologicalOrderIterator .of (graph , programState .matchOrder ).iterator ();
502572 default :
503573 throw new
@@ -774,7 +844,8 @@ private HepRelVertex applyTransformationResults(
774844 parents .add (parent );
775845 }
776846
777- HepRelVertex newVertex = addRelToGraph (bestRel );
847+ HepRelVertex newVertex = addRelToGraph (bestRel , null );
848+ Set <HepRelVertex > garbageVertexSet = new LinkedHashSet <>();
778849
779850 // There's a chance that newVertex is the same as one
780851 // of the parents due to common subexpression recognition
@@ -785,10 +856,12 @@ private HepRelVertex applyTransformationResults(
785856 if (iParentMatch != -1 ) {
786857 newVertex = parents .get (iParentMatch );
787858 } else {
788- contractVertices (newVertex , vertex , parents );
859+ contractVertices (newVertex , vertex , parents , garbageVertexSet );
789860 }
790861
791- if (getListener () != null ) {
862+ if (isLargePlanMode ()) {
863+ collectGarbage (garbageVertexSet );
864+ } else if (getListener () != null ) {
792865 // Assume listener doesn't want to see garbage.
793866 collectGarbage ();
794867 }
@@ -824,19 +897,26 @@ private HepRelVertex applyTransformationResults(
824897 }
825898
826899 private HepRelVertex addRelToGraph (
827- RelNode rel ) {
900+ RelNode rel , @ Nullable IdentityHashMap < RelNode , HepRelVertex > initRelToVertexCache ) {
828901 // Check if a transformation already produced a reference
829902 // to an existing vertex.
830903 if (graph .vertexSet ().contains (rel )) {
831904 return (HepRelVertex ) rel ;
832905 }
833906
907+ // Fast equiv vertex for set root, before add children.
908+ if (initRelToVertexCache != null && initRelToVertexCache .containsKey (rel )) {
909+ HepRelVertex vertex = initRelToVertexCache .get (rel );
910+ assert vertex != null ;
911+ return vertex ;
912+ }
913+
834914 // Recursively add children, replacing this rel's inputs
835915 // with corresponding child vertices.
836916 final List <RelNode > inputs = rel .getInputs ();
837917 final List <RelNode > newInputs = new ArrayList <>();
838918 for (RelNode input1 : inputs ) {
839- HepRelVertex childVertex = addRelToGraph (input1 );
919+ HepRelVertex childVertex = addRelToGraph (input1 , initRelToVertexCache );
840920 newInputs .add (childVertex );
841921 }
842922
@@ -868,14 +948,19 @@ private HepRelVertex addRelToGraph(
868948 graph .addEdge (newVertex , (HepRelVertex ) input );
869949 }
870950
951+ if (initRelToVertexCache != null ) {
952+ initRelToVertexCache .put (rel , newVertex );
953+ }
954+
871955 nTransformations ++;
872956 return newVertex ;
873957 }
874958
875959 private void contractVertices (
876960 HepRelVertex preservedVertex ,
877961 HepRelVertex discardedVertex ,
878- List <HepRelVertex > parents ) {
962+ List <HepRelVertex > parents ,
963+ Set <HepRelVertex > garbageVertexSet ) {
879964 if (preservedVertex == discardedVertex ) {
880965 // Nop.
881966 return ;
@@ -897,17 +982,32 @@ private void contractVertices(
897982 }
898983 clearCache (parent );
899984 graph .removeEdge (parent , discardedVertex );
985+
986+ if (!noDag && isLargePlanMode ()) {
987+ // Recursive merge parent path
988+ HepRelVertex addedVertex = mapDigestToVertex .get (parentRel .getRelDigest ());
989+ if (addedVertex != null && addedVertex != parent ) {
990+ List <HepRelVertex > parentCopy = // contractVertices will change predecessorList
991+ new ArrayList <>(Graphs .predecessorListOf (graph , parent ));
992+ contractVertices (addedVertex , parent , parentCopy , garbageVertexSet );
993+ continue ;
994+ }
995+ }
996+
900997 graph .addEdge (parent , preservedVertex );
901998 updateVertex (parent , parentRel );
902999 }
9031000
9041001 // NOTE: we don't actually do graph.removeVertex(discardedVertex),
9051002 // because it might still be reachable from preservedVertex.
9061003 // Leave that job for garbage collection.
1004+ // If isLargePlanMode is true, we will do fine grant GC in tryCleanVertices
1005+ // by tracking discarded vertex subtree's inward references.
9071006
9081007 if (discardedVertex == root ) {
9091008 root = preservedVertex ;
9101009 }
1010+ garbageVertexSet .add (discardedVertex );
9111011 }
9121012
9131013 /**
@@ -992,6 +1092,58 @@ private RelNode buildFinalPlan(HepRelVertex vertex) {
9921092 return rel ;
9931093 }
9941094
1095+ /** Try remove discarded vertices recursively. */
1096+ private void tryCleanVertices (HepRelVertex vertex ) {
1097+ if (vertex == root || !graph .vertexSet ().contains (vertex )
1098+ || !graph .getInwardEdges (vertex ).isEmpty ()) {
1099+ return ;
1100+ }
1101+
1102+ // rel is the no inward edges subtree root.
1103+ RelNode rel = vertex .getCurrentRel ();
1104+ notifyDiscard (rel );
1105+
1106+ Set <HepRelVertex > outVertices = new LinkedHashSet <>();
1107+ List <DefaultEdge > outEdges = graph .getOutwardEdges (vertex );
1108+ for (DefaultEdge outEdge : outEdges ) {
1109+ outVertices .add ((HepRelVertex ) outEdge .target );
1110+ }
1111+
1112+ for (HepRelVertex child : outVertices ) {
1113+ graph .removeEdge (vertex , child );
1114+ }
1115+ assert graph .getInwardEdges (vertex ).isEmpty ();
1116+ assert graph .getOutwardEdges (vertex ).isEmpty ();
1117+ graph .vertexSet ().remove (vertex );
1118+ mapDigestToVertex .remove (rel .getRelDigest ());
1119+
1120+ for (HepRelVertex child : outVertices ) {
1121+ tryCleanVertices (child );
1122+ }
1123+ clearCache (vertex );
1124+
1125+ if (enableFiredRulesCache ) {
1126+ for (List <Integer > relIds : firedRulesCacheIndex .get (rel .getId ())) {
1127+ firedRulesCache .removeAll (relIds );
1128+ }
1129+ }
1130+ }
1131+
1132+ private void collectGarbage (final Set <HepRelVertex > garbageVertexSet ) {
1133+ for (HepRelVertex vertex : garbageVertexSet ) {
1134+ tryCleanVertices (vertex );
1135+ }
1136+
1137+ if (LOGGER .isTraceEnabled ()) {
1138+ int currentGraphSize = graph .vertexSet ().size ();
1139+ collectGarbage ();
1140+ int currentGraphSize2 = graph .vertexSet ().size ();
1141+ if (currentGraphSize != currentGraphSize2 ) {
1142+ throw new AssertionError ("Graph size changed after garbage collection" );
1143+ }
1144+ }
1145+ }
1146+
9951147 private void collectGarbage () {
9961148 if (nTransformations == nTransformationsLastGC ) {
9971149 // No modifications have taken place since the last gc,
@@ -1061,12 +1213,48 @@ private void assertNoCycles() {
10611213 + cyclicVertices );
10621214 }
10631215
1216+ private void assertGraphConsistent () {
1217+ int liveNum = 0 ;
1218+ for (HepRelVertex vertex : BreadthFirstIterator .of (graph , root )) {
1219+ if (graph .getOutwardEdges (vertex ).size ()
1220+ != Sets .newHashSet (vertex .getCurrentRel ().getInputs ()).size ()) {
1221+ throw new AssertionError ("HepPlanner:outward edge num is different "
1222+ + "with input node num, " + vertex );
1223+ }
1224+ for (DefaultEdge edge : graph .getInwardEdges (vertex )) {
1225+ if (!((HepRelVertex ) edge .source ).getCurrentRel ().getInputs ().contains (vertex )) {
1226+ throw new AssertionError ("HepPlanner:inward edge target is not in input node list, "
1227+ + vertex );
1228+ }
1229+ }
1230+ liveNum ++;
1231+ }
1232+
1233+ Set <RelNode > validSet = new HashSet <>();
1234+ Deque <RelNode > nodes = new ArrayDeque <>();
1235+ nodes .push (root .getCurrentRel ());
1236+ while (!nodes .isEmpty ()) {
1237+ RelNode node = nodes .pop ();
1238+ validSet .add (node );
1239+ for (RelNode input : node .getInputs ()) {
1240+ nodes .push (((HepRelVertex ) input ).getCurrentRel ());
1241+ }
1242+ }
1243+
1244+ if (liveNum == validSet .size ()) {
1245+ return ;
1246+ }
1247+ throw new AssertionError ("HepPlanner:Query graph live node num is different with root"
1248+ + " input valid node num, liveNodeNum: " + liveNum + ", validNodeNum: " + validSet .size ());
1249+ }
1250+
10641251 private void dumpGraph () {
10651252 if (!LOGGER .isTraceEnabled ()) {
10661253 return ;
10671254 }
10681255
10691256 assertNoCycles ();
1257+ assertGraphConsistent ();
10701258
10711259 HepRelVertex root = this .root ;
10721260 if (root == null ) {
0 commit comments