Check parallel safety in generate_useful_gather_paths

author Tomas Vondra <[email protected]>
Mon, 21 Dec 2020 17:29:46 +0000 (18:29 +0100)
committer Tomas Vondra <[email protected]>
Mon, 21 Dec 2020 17:29:49 +0000 (18:29 +0100)
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index a1b3e4b82158d83a84ba4f15bcee767b8ad1678d..627d08b78a48f71da789cbe3878ee4b9d4ed7393 100644 (file)
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -2802,6 +2802,9 @@ generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows)
   * This allows us to do incremental sort on top of an index scan under a gather
   * merge node, i.e. parallelized.
   *
+ * If the require_parallel_safe is true, we also require the expressions to
+ * be parallel safe (which allows pushing the sort below Gather Merge).
+ *
   * XXX At the moment this can only ever return a list with a single element,
   * because it looks at query_pathkeys only. So we might return the pathkeys
   * directly, but it seems plausible we'll want to consider other orderings
@@ -2809,7 +2812,8 @@ generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows)
   * merge joins.
   */
  static List *
-get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel)
+get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel,
+                                bool require_parallel_safe)
  {
     List       *useful_pathkeys_list = NIL;
  
@@ -2839,8 +2843,11 @@ get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel)
              * meet criteria of EC membership in the current relation, we
              * enable not just an incremental sort on the entirety of
              * query_pathkeys but also incremental sort below a JOIN.
+            *
+            * If requested, ensure the expression is parallel safe too.
              */
-           if (!find_em_expr_usable_for_sorting_rel(pathkey_ec, rel))
+           if (!find_em_expr_usable_for_sorting_rel(root, pathkey_ec, rel,
+                                                    require_parallel_safe))
                 break;
  
             npathkeys++;
@@ -2894,7 +2901,7 @@ generate_useful_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_r
     generate_gather_paths(root, rel, override_rows);
  
     /* consider incremental sort for interesting orderings */
-   useful_pathkeys_list = get_useful_pathkeys_for_relation(root, rel);
+   useful_pathkeys_list = get_useful_pathkeys_for_relation(root, rel, true);
  
     /* used for explicit (full) sort paths */
     cheapest_partial_path = linitial(rel->partial_pathlist);
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index f98fd7b3eb8b0f44995262f872a029518b3eed1d..b49ee52cbad8254fcc9a6f91b246f9b52db8aa0e 100644 (file)
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -803,7 +803,8 @@ find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel)
   * applied in prepare_sort_from_pathkeys.
   */
  Expr *
-find_em_expr_usable_for_sorting_rel(EquivalenceClass *ec, RelOptInfo *rel)
+find_em_expr_usable_for_sorting_rel(PlannerInfo *root, EquivalenceClass *ec,
+                                   RelOptInfo *rel, bool require_parallel_safe)
  {
     ListCell   *lc_em;
  
@@ -833,6 +834,12 @@ find_em_expr_usable_for_sorting_rel(EquivalenceClass *ec, RelOptInfo *rel)
         if (!bms_is_subset(em->em_relids, rel->relids))
             continue;
  
+       /*
+        * If requested, reject expressions that are not parallel-safe.
+        */
+       if (require_parallel_safe && !is_parallel_safe(root, (Node *) em_expr))
+           continue;
+
         /*
          * As long as the expression isn't volatile then
          * prepare_sort_from_pathkeys is able to generate a new target entry,
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 8a4c6f8b59ca6f824fbb511a3a0c992f9730c2b2..bf6c16b8040c068edcb9971220a29a7e40b7550e 100644 (file)
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -135,7 +135,10 @@ extern EquivalenceClass *get_eclass_for_sort_expr(PlannerInfo *root,
                                                   Relids rel,
                                                   bool create_it);
  extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel);
-extern Expr *find_em_expr_usable_for_sorting_rel(EquivalenceClass *ec, RelOptInfo *rel);
+extern Expr *find_em_expr_usable_for_sorting_rel(PlannerInfo *root,
+                                                EquivalenceClass *ec,
+                                                RelOptInfo *rel,
+                                                bool require_parallel_safe);
  extern void generate_base_implied_equalities(PlannerInfo *root);
  extern List *generate_join_implied_equalities(PlannerInfo *root,
                                               Relids join_relids,
diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out
index 51471ae92de9d286d56045ae39356eb9988ce0d4..d316a7c4b909f3b75512758023ae942c6163acea 100644 (file)
--- a/src/test/regress/expected/incremental_sort.out
+++ b/src/test/regress/expected/incremental_sort.out
@@ -1551,6 +1551,46 @@ order by 1, 2;
     ->  Function Scan on generate_series
  (7 rows)
  
+-- Parallel sort but with expression (correlated subquery) that
+-- is prohibited in parallel plans.
+explain (costs off) select distinct
+  unique1,
+  (select t.unique1 from tenk1 where tenk1.unique1 = t.unique1)
+from tenk1 t, generate_series(1, 1000);
+                                   QUERY PLAN                                    
+---------------------------------------------------------------------------------
+ Unique
+   ->  Sort
+         Sort Key: t.unique1, ((SubPlan 1))
+         ->  Gather
+               Workers Planned: 2
+               ->  Nested Loop
+                     ->  Parallel Index Only Scan using tenk1_unique1 on tenk1 t
+                     ->  Function Scan on generate_series
+               SubPlan 1
+                 ->  Index Only Scan using tenk1_unique1 on tenk1
+                       Index Cond: (unique1 = t.unique1)
+(11 rows)
+
+explain (costs off) select
+  unique1,
+  (select t.unique1 from tenk1 where tenk1.unique1 = t.unique1)
+from tenk1 t, generate_series(1, 1000)
+order by 1, 2;
+                                QUERY PLAN                                 
+---------------------------------------------------------------------------
+ Sort
+   Sort Key: t.unique1, ((SubPlan 1))
+   ->  Gather
+         Workers Planned: 2
+         ->  Nested Loop
+               ->  Parallel Index Only Scan using tenk1_unique1 on tenk1 t
+               ->  Function Scan on generate_series
+         SubPlan 1
+           ->  Index Only Scan using tenk1_unique1 on tenk1
+                 Index Cond: (unique1 = t.unique1)
+(10 rows)
+
  -- Parallel sort but with expression not available until the upper rel.
  explain (costs off) select distinct sub.unique1, stringu1 || random()::text
  from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub;
diff --git a/src/test/regress/sql/incremental_sort.sql b/src/test/regress/sql/incremental_sort.sql
index cb48f200ce555a1249e2cbcf1d0dac0c750a8c8a..ff3af0fd165d87708cc72ed135c00add76b3fd94 100644 (file)
--- a/src/test/regress/sql/incremental_sort.sql
+++ b/src/test/regress/sql/incremental_sort.sql
@@ -250,6 +250,17 @@ from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub;
  explain (costs off) select sub.unique1, md5(stringu1)
  from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub
  order by 1, 2;
+-- Parallel sort but with expression (correlated subquery) that
+-- is prohibited in parallel plans.
+explain (costs off) select distinct
+  unique1,
+  (select t.unique1 from tenk1 where tenk1.unique1 = t.unique1)
+from tenk1 t, generate_series(1, 1000);
+explain (costs off) select
+  unique1,
+  (select t.unique1 from tenk1 where tenk1.unique1 = t.unique1)
+from tenk1 t, generate_series(1, 1000)
+order by 1, 2;
  -- Parallel sort but with expression not available until the upper rel.
  explain (costs off) select distinct sub.unique1, stringu1 || random()::text
  from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub;
author	Tomas Vondra <[email protected]>
	Mon, 21 Dec 2020 17:29:46 +0000 (18:29 +0100)
committer	Tomas Vondra <[email protected]>
	Mon, 21 Dec 2020 17:29:49 +0000 (18:29 +0100)
src/backend/optimizer/path/allpaths.c		\| blob \| blame \| history
src/backend/optimizer/path/equivclass.c		\| blob \| blame \| history
src/include/optimizer/paths.h		\| blob \| blame \| history
src/test/regress/expected/incremental_sort.out		\| blob \| blame \| history
src/test/regress/sql/incremental_sort.sql		\| blob \| blame \| history