Also subdivide merge join. joinadvice
authorRobert Haas <[email protected]>
Fri, 30 Aug 2024 19:57:47 +0000 (15:57 -0400)
committerRobert Haas <[email protected]>
Fri, 30 Aug 2024 19:57:47 +0000 (15:57 -0400)
src/backend/optimizer/path/allpaths.c
src/backend/optimizer/path/costsize.c
src/backend/optimizer/path/joinpath.c
src/include/optimizer/paths.h

index 7ed03b33a7e49a21dbaa1187bfeb5a706230f64d..8fbf110b13c4fb2c236ea45066f0783f79afa93d 100644 (file)
@@ -3374,7 +3374,11 @@ make_rel_from_joinlist(PlannerInfo *root, List *joinlist)
                if (enable_hashjoin)
                        jsa_mask |= JSA_HASHJOIN;
                if (enable_mergejoin)
-                       jsa_mask |= JSA_MERGEJOIN;
+               {
+                       jsa_mask |= JSA_MERGEJOIN_PLAIN;
+                       if (enable_material)
+                               jsa_mask |= JSA_MERGEJOIN_MATERIALIZE;
+               }
                if (enable_nestloop)
                {
                        jsa_mask |= JSA_NESTLOOP_PLAIN;
index 38ad8fb50e8f3c6de63733708be289d12ec87988..0700c634bf470f08ddfee59a6a81336c65aec37f 100644 (file)
@@ -3676,7 +3676,12 @@ initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace,
        Assert(outerstartsel <= outerendsel);
        Assert(innerstartsel <= innerendsel);
 
-       disabled_nodes = (extra->jsa_mask & JSA_MERGEJOIN) == 0 ? 1 : 0;
+       /*
+        * Assume for now that this node is not itself disabled. We'll sort out
+        * whether that's really the case in final_cost_mergejoin(); here, we'll
+        * just account for any disabled child nodes.
+        */
+       disabled_nodes = 0;
 
        /* cost of source data */
 
@@ -3814,9 +3819,6 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path,
                                rescannedtuples;
        double          rescanratio;
 
-       /* Set the number of disabled nodes. */
-       path->jpath.path.disabled_nodes = workspace->disabled_nodes;
-
        /* Protect some assumptions below that rowcounts aren't zero */
        if (inner_path_rows <= 0)
                inner_path_rows = 1;
@@ -3943,16 +3945,20 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path,
                path->materialize_inner = false;
 
        /*
-        * Prefer materializing if it looks cheaper, unless the user has asked to
-        * suppress materialization.
+        * If merge joins with materialization are enabled, then choose
+        * materialization if either (a) it looks cheaper or (b) merge joins
+        * without materialization are disabled.
         */
-       else if (enable_material && mat_inner_cost < bare_inner_cost)
+       else if ((extra->jsa_mask & JSA_MERGEJOIN_MATERIALIZE) != 0 &&
+                        (mat_inner_cost < bare_inner_cost ||
+                         (extra->jsa_mask & JSA_MERGEJOIN_PLAIN) == 0))
                path->materialize_inner = true;
 
        /*
-        * Even if materializing doesn't look cheaper, we *must* do it if the
-        * inner path is to be used directly (without sorting) and it doesn't
-        * support mark/restore.
+        * Regardless of what plan shapes are enabled and what the costs seem
+        * to be, we *must* materialize it if the inner path is to be used directly
+        * (without sorting) and it doesn't support mark/restore. Planner failure
+        * is not an option!
         *
         * Since the inner side must be ordered, and only Sorts and IndexScans can
         * create order to begin with, and they both support mark/restore, you
@@ -3960,10 +3966,6 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path,
         * merge joins can *preserve* the order of their inputs, so they can be
         * selected as the input of a mergejoin, and they don't support
         * mark/restore at present.
-        *
-        * We don't test the value of enable_material here, because
-        * materialization is required for correctness in this case, and turning
-        * it off does not entitle us to deliver an invalid plan.
         */
        else if (innersortkeys == NIL &&
                         !ExecSupportsMarkRestore(inner_path))
@@ -3980,7 +3982,8 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path,
         * rather than necessary for correctness, we skip it if enable_material is
         * off.
         */
-       else if (enable_material && innersortkeys != NIL &&
+       else if ((extra->jsa_mask & JSA_MERGEJOIN_MATERIALIZE) != 0 &&
+                        innersortkeys != NIL &&
                         relation_byte_size(inner_path_rows,
                                                                inner_path->pathtarget->width) >
                         (work_mem * 1024L))
@@ -3988,11 +3991,25 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path,
        else
                path->materialize_inner = false;
 
-       /* Charge the right incremental cost for the chosen case */
+       /* Get the number of disabled nodes, not yet including this one. */
+       path->jpath.path.disabled_nodes = workspace->disabled_nodes;
+
+       /*
+        * Charge the right incremental cost for the chosen case, and increment
+        * disabled_nodes if appropriate.
+        */
        if (path->materialize_inner)
+       {
                run_cost += mat_inner_cost;
+               if ((extra->jsa_mask & JSA_MERGEJOIN_MATERIALIZE) == 0)
+                       ++path->jpath.path.disabled_nodes;
+       }
        else
+       {
                run_cost += bare_inner_cost;
+               if ((extra->jsa_mask & JSA_MERGEJOIN_PLAIN) == 0)
+                       ++path->jpath.path.disabled_nodes;
+       }
 
        /* CPU costs */
 
index d853e2a64edad905558ef33ddb69f233f12f9834..0ef7e776fe2d805621b84fdde4e1933204ef5add 100644 (file)
@@ -237,7 +237,7 @@ add_paths_to_joinrel(PlannerInfo *root,
         * way of implementing a full outer join, so in that case we don't care
         * whether mergejoins are disabled.
         */
-       if ((extra.jsa_mask & JSA_MERGEJOIN) != 0 || jointype == JOIN_FULL)
+       if ((extra.jsa_mask & JSA_MERGEJOIN_ANY) != 0 || jointype == JOIN_FULL)
                extra.mergeclause_list = select_mergejoin_clauses(root,
                                                                                                                  joinrel,
                                                                                                                  outerrel,
index 94779eff9a3091309c5898e1792c3c0db0c9dc89..f4ff3e98d1b829c63e7b3076bcf5d27d4c2b5451 100644 (file)
  * mask is computed on the basis of the various enable_* GUCs, and can be
  * overriden by hooks.
  *
- * We have five main join strategie: a foreign join (when supported by the
+ * We have five main join strategies: a foreign join (when supported by the
  * relevant FDW), a merge join, a nested loop, a hash join, and a partitionwise
- * join. Nested loops are further subdivided depending on whether the inner
- * side of the join is materialized, memoized, or neither (which we here call
- * a "plain" nested loop).
+ * join. Merge joins are further subdivided based on whether the inner side
+ * is materialized, and nested loops are further subdivided based on whether
+ * the inner side is materialized, memoized, or neither. "Plain" means a
+ * strategy where neither materialization nor memoization is used.
+ *
+ * If you don't care whether materialization or memoization is used, set all
+ * the bits for the relevant major join strategy. If you do care, just set the
+ * subset of bits that correspond to the cases you want to allow.
  */
 #define JSA_FOREIGN                                            0x0001
-#define JSA_MERGEJOIN                                  0x0002
-#define JSA_NESTLOOP_PLAIN                             0x0004
-#define JSA_NESTLOOP_MATERIALIZE               0x0008
-#define JSA_NESTLOOP_MEMOIZE                   0x0010
-#define JSA_HASHJOIN                                   0x0020
-#define JSA_PARTITIONWISE                              0x0040
+#define JSA_MERGEJOIN_PLAIN                            0x0002
+#define JSA_MERGEJOIN_MATERIALIZE              0x0004
+#define JSA_NESTLOOP_PLAIN                             0x0008
+#define JSA_NESTLOOP_MATERIALIZE               0x0010
+#define JSA_NESTLOOP_MEMOIZE                   0x0020
+#define JSA_HASHJOIN                                   0x0040
+#define JSA_PARTITIONWISE                              0x0080
+
+#define JSA_MERGEJOIN_ANY      \
+       (JSA_MERGEJOIN_PLAIN | JSA_MERGEJOIN_MATERIALIZE)
+#define JSA_NESTLOOP_ANY \
+       (JSA_NESTLOOP_PLAIN | JSA_NESTLOOP_MATERIALIZE | JSA_NESTLOOP_MEMOIZE)
 
 /*
  * allpaths.c