if (opcintype == cur_em->em_datatype &&
equal(expr, cur_em->em_expr))
- return cur_ec; /* Match! */
+ {
+ /*
+ * Match!
+ *
+ * Copy the sortref if it wasn't set yet. That may happen if
+ * the ec was constructed from a WHERE clause, i.e. it doesn't
+ * have a target reference at all.
+ */
+ if (cur_ec->ec_sortref == 0 && sortref > 0)
+ cur_ec->ec_sortref = sortref;
+ return cur_ec;
+ }
}
}
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "nodes/plannodes.h"
+#include "optimizer/cost.h"
#include "optimizer/optimizer.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "partitioning/partbounds.h"
#include "utils/lsyscache.h"
+/* Consider reordering of GROUP BY keys? */
+bool enable_group_by_reordering = true;
static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
static bool matches_boolean_partition_clause(RestrictInfo *rinfo,
return false;
}
+/*
+ * group_keys_reorder_by_pathkeys
+ * Reorder GROUP BY keys to match the input pathkeys.
+ *
+ * Function returns new lists (pathkeys and clauses), original GROUP BY lists
+ * stay untouched.
+ *
+ * Returns the number of GROUP BY keys with a matching pathkey.
+ */
+static int
+group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
+ List **group_clauses,
+ int num_groupby_pathkeys)
+{
+ List *new_group_pathkeys = NIL,
+ *new_group_clauses = NIL;
+ ListCell *lc;
+ int n;
+
+ if (pathkeys == NIL || *group_pathkeys == NIL)
+ return 0;
+
+ /*
+ * Walk the pathkeys (determining ordering of the input path) and see if
+ * there's a matching GROUP BY key. If we find one, we append it to the
+ * list, and do the same for the clauses.
+ *
+ * Once we find the first pathkey without a matching GROUP BY key, the
+ * rest of the pathkeys are useless and can't be used to evaluate the
+ * grouping, so we abort the loop and ignore the remaining pathkeys.
+ */
+ foreach(lc, pathkeys)
+ {
+ PathKey *pathkey = (PathKey *) lfirst(lc);
+ SortGroupClause *sgc;
+
+ /*
+ * Pathkeys are built in a way that allows simply comparing pointers.
+ * Give up if we can't find the matching pointer. Also give up if
+ * there is no sortclause reference for some reason.
+ */
+ if (foreach_current_index(lc) >= num_groupby_pathkeys ||
+ !list_member_ptr(*group_pathkeys, pathkey) ||
+ pathkey->pk_eclass->ec_sortref == 0)
+ break;
+
+ /*
+ * Since 1349d27 pathkey coming from underlying node can be in the
+ * root->group_pathkeys but not in the processed_groupClause. So, we
+ * should be careful here.
+ */
+ sgc = get_sortgroupref_clause_noerr(pathkey->pk_eclass->ec_sortref,
+ *group_clauses);
+ if (!sgc)
+ /* The grouping clause does not cover this pathkey */
+ break;
+
+ /*
+ * Sort group clause should have an ordering operator as long as there
+ * is an associated pathkey.
+ */
+ Assert(OidIsValid(sgc->sortop));
+
+ new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
+ new_group_clauses = lappend(new_group_clauses, sgc);
+ }
+
+ /* remember the number of pathkeys with a matching GROUP BY key */
+ n = list_length(new_group_pathkeys);
+
+ /* append the remaining group pathkeys (will be treated as not sorted) */
+ *group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
+ *group_pathkeys);
+ *group_clauses = list_concat_unique_ptr(new_group_clauses,
+ *group_clauses);
+
+ return n;
+}
+
+/*
+ * pathkeys_are_duplicate
+ * Check if give pathkeys are already contained the list of
+ * PathKeyInfo's.
+ */
+static bool
+pathkeys_are_duplicate(List *infos, List *pathkeys)
+{
+ ListCell *lc;
+
+ foreach(lc, infos)
+ {
+ PathKeyInfo *info = lfirst_node(PathKeyInfo, lc);
+
+ if (compare_pathkeys(pathkeys, info->pathkeys) == PATHKEYS_EQUAL)
+ return true;
+ }
+ return false;
+}
+
+/*
+ * get_useful_group_keys_orderings
+ * Determine which orderings of GROUP BY keys are potentially interesting.
+ *
+ * Returns a list of PathKeyInfo items, each representing an interesting
+ * ordering of GROUP BY keys. Each item stores pathkeys and clauses in the
+ * matching order.
+ *
+ * The function considers (and keeps) multiple GROUP BY orderings:
+ *
+ * - the original ordering, as specified by the GROUP BY clause,
+ * - GROUP BY keys reordered to match 'path' ordering (as much as possible),
+ * - GROUP BY keys to match target ORDER BY clause (as much as possible).
+ */
+List *
+get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
+{
+ Query *parse = root->parse;
+ List *infos = NIL;
+ PathKeyInfo *info;
+
+ List *pathkeys = root->group_pathkeys;
+ List *clauses = root->processed_groupClause;
+
+ /* always return at least the original pathkeys/clauses */
+ info = makeNode(PathKeyInfo);
+ info->pathkeys = pathkeys;
+ info->clauses = clauses;
+ infos = lappend(infos, info);
+
+ /*
+ * Should we try generating alternative orderings of the group keys? If
+ * not, we produce only the order specified in the query, i.e. the
+ * optimization is effectively disabled.
+ */
+ if (!enable_group_by_reordering)
+ return infos;
+
+ /*
+ * Grouping sets have own and more complex logic to decide the ordering.
+ */
+ if (parse->groupingSets)
+ return infos;
+
+ /*
+ * If the path is sorted in some way, try reordering the group keys to
+ * match the path as much of the ordering as possible. Then thanks to
+ * incremental sort we would get this sort as cheap as possible.
+ */
+ if (path->pathkeys &&
+ !pathkeys_contained_in(path->pathkeys, root->group_pathkeys))
+ {
+ int n;
+
+ n = group_keys_reorder_by_pathkeys(path->pathkeys, &pathkeys, &clauses,
+ root->num_groupby_pathkeys);
+
+ if (n > 0 &&
+ (enable_incremental_sort || n == root->num_groupby_pathkeys) &&
+ !pathkeys_are_duplicate(infos, pathkeys))
+ {
+ info = makeNode(PathKeyInfo);
+ info->pathkeys = pathkeys;
+ info->clauses = clauses;
+
+ infos = lappend(infos, info);
+ }
+ }
+
+ /*
+ * Try reordering pathkeys to minimize the sort cost (this time consider
+ * the ORDER BY clause).
+ */
+ if (root->sort_pathkeys &&
+ !pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys))
+ {
+ int n;
+
+ n = group_keys_reorder_by_pathkeys(root->sort_pathkeys, &pathkeys,
+ &clauses,
+ root->num_groupby_pathkeys);
+
+ if (n > 0 &&
+ (enable_incremental_sort || n == list_length(root->sort_pathkeys)) &&
+ !pathkeys_are_duplicate(infos, pathkeys))
+ {
+ info = makeNode(PathKeyInfo);
+ info->pathkeys = pathkeys;
+ info->clauses = clauses;
+
+ infos = lappend(infos, info);
+ }
+ }
+
+ return infos;
+}
+
/*
* pathkeys_count_contained_in
* Same as pathkeys_contained_in, but also sets length of longest
return n_common_pathkeys;
}
+/*
+ * pathkeys_useful_for_grouping
+ * Count the number of pathkeys that are useful for grouping (instead of
+ * explicit sort)
+ *
+ * Group pathkeys could be reordered to benefit from the ordering. The
+ * ordering may not be "complete" and may require incremental sort, but that's
+ * fine. So we simply count prefix pathkeys with a matching group key, and
+ * stop once we find the first pathkey without a match.
+ *
+ * So e.g. with pathkeys (a,b,c) and group keys (a,b,e) this determines (a,b)
+ * pathkeys are useful for grouping, and we might do incremental sort to get
+ * path ordered by (a,b,e).
+ *
+ * This logic is necessary to retain paths with ordering not matching grouping
+ * keys directly, without the reordering.
+ *
+ * Returns the length of pathkey prefix with matching group keys.
+ */
+static int
+pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
+{
+ ListCell *key;
+ int n = 0;
+
+ /* no special ordering requested for grouping */
+ if (root->group_pathkeys == NIL)
+ return 0;
+
+ /* unordered path */
+ if (pathkeys == NIL)
+ return 0;
+
+ /* walk the pathkeys and search for matching group key */
+ foreach(key, pathkeys)
+ {
+ PathKey *pathkey = (PathKey *) lfirst(key);
+
+ /* no matching group key, we're done */
+ if (!list_member_ptr(root->group_pathkeys, pathkey))
+ break;
+
+ n++;
+ }
+
+ return n;
+}
+
/*
* truncate_useless_pathkeys
* Shorten the given pathkey list to just the useful pathkeys.
nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
+ if (nuseful2 > nuseful)
+ nuseful = nuseful2;
+ nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
if (nuseful2 > nuseful)
nuseful = nuseful2;
{
if (rel->joininfo != NIL || rel->has_eclass_joins)
return true; /* might be able to use pathkeys for merging */
+ if (root->group_pathkeys != NIL)
+ return true; /* might be able to use pathkeys for grouping */
if (root->query_pathkeys != NIL)
return true; /* might be able to use them for ordering */
return false; /* definitely useless */
double tuple_fraction,
int64 *offset_est, int64 *count_est);
static void remove_useless_groupby_columns(PlannerInfo *root);
-static List *preprocess_groupclause(PlannerInfo *root, List *force);
+static List *groupclause_apply_groupingset(PlannerInfo *root, List *force);
static List *extract_rollup_sets(List *groupingSets);
static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
static void standard_qp_callback(PlannerInfo *root, void *extra);
else if (parse->groupClause)
{
/* Preprocess regular GROUP BY clause, if any */
- root->processed_groupClause = preprocess_groupclause(root, NIL);
+ root->processed_groupClause = list_copy(parse->groupClause);;
/* Remove any redundant GROUP BY columns */
remove_useless_groupby_columns(root);
}
* The groupClauses for hashed grouping sets are built later on.)
*/
if (gs->set)
- rollup->groupClause = preprocess_groupclause(root, gs->set);
+ rollup->groupClause = groupclause_apply_groupingset(root, gs->set);
else
rollup->groupClause = NIL;
}
/*
- * preprocess_groupclause - do preparatory work on GROUP BY clause
- *
- * The idea here is to adjust the ordering of the GROUP BY elements
- * (which in itself is semantically insignificant) to match ORDER BY,
- * thereby allowing a single sort operation to both implement the ORDER BY
- * requirement and set up for a Unique step that implements GROUP BY.
- *
- * In principle it might be interesting to consider other orderings of the
- * GROUP BY elements, which could match the sort ordering of other
- * possible plans (eg an indexscan) and thereby reduce cost. We don't
- * bother with that, though. Hashed grouping will frequently win anyway.
- *
- * Note: we need no comparable processing of the distinctClause because
- * the parser already enforced that that matches ORDER BY.
- *
- * Note: we return a fresh List, but its elements are the same
- * SortGroupClauses appearing in parse->groupClause. This is important
- * because later processing may modify the processed_groupClause list.
- *
- * For grouping sets, the order of items is instead forced to agree with that
- * of the grouping set (and items not in the grouping set are skipped). The
- * work of sorting the order of grouping set elements to match the ORDER BY if
- * possible is done elsewhere.
+ * groupclause_apply_groupingset
+ * Apply the order of GROUP BY clauses defined by grouping sets. Items
+ * not in the grouping set are skipped.
*/
static List *
-preprocess_groupclause(PlannerInfo *root, List *force)
+groupclause_apply_groupingset(PlannerInfo *root, List *gset)
{
Query *parse = root->parse;
List *new_groupclause = NIL;
- bool partial_match;
ListCell *sl;
- ListCell *gl;
- /* For grouping sets, we need to force the ordering */
- if (force)
+ foreach(sl, gset)
{
- foreach(sl, force)
- {
- Index ref = lfirst_int(sl);
- SortGroupClause *cl = get_sortgroupref_clause(ref, parse->groupClause);
+ Index ref = lfirst_int(sl);
+ SortGroupClause *cl = get_sortgroupref_clause(ref, parse->groupClause);
- new_groupclause = lappend(new_groupclause, cl);
- }
-
- return new_groupclause;
+ new_groupclause = lappend(new_groupclause, cl);
}
-
- /* If no ORDER BY, nothing useful to do here */
- if (parse->sortClause == NIL)
- return list_copy(parse->groupClause);
-
- /*
- * Scan the ORDER BY clause and construct a list of matching GROUP BY
- * items, but only as far as we can make a matching prefix.
- *
- * This code assumes that the sortClause contains no duplicate items.
- */
- foreach(sl, parse->sortClause)
- {
- SortGroupClause *sc = lfirst_node(SortGroupClause, sl);
-
- foreach(gl, parse->groupClause)
- {
- SortGroupClause *gc = lfirst_node(SortGroupClause, gl);
-
- if (equal(gc, sc))
- {
- new_groupclause = lappend(new_groupclause, gc);
- break;
- }
- }
- if (gl == NULL)
- break; /* no match, so stop scanning */
- }
-
- /* Did we match all of the ORDER BY list, or just some of it? */
- partial_match = (sl != NULL);
-
- /* If no match at all, no point in reordering GROUP BY */
- if (new_groupclause == NIL)
- return list_copy(parse->groupClause);
-
- /*
- * Add any remaining GROUP BY items to the new list, but only if we were
- * able to make a complete match. In other words, we only rearrange the
- * GROUP BY list if the result is that one list is a prefix of the other
- * --- otherwise there's no possibility of a common sort. Also, give up
- * if there are any non-sortable GROUP BY items, since then there's no
- * hope anyway.
- */
- foreach(gl, parse->groupClause)
- {
- SortGroupClause *gc = lfirst_node(SortGroupClause, gl);
-
- if (list_member_ptr(new_groupclause, gc))
- continue; /* it matched an ORDER BY item */
- if (partial_match) /* give up, no common sort possible */
- return list_copy(parse->groupClause);
- if (!OidIsValid(gc->sortop)) /* give up, GROUP BY can't be sorted */
- return list_copy(parse->groupClause);
- new_groupclause = lappend(new_groupclause, gc);
- }
-
- /* Success --- install the rearranged GROUP BY list */
- Assert(list_length(parse->groupClause) == list_length(new_groupclause));
return new_groupclause;
}
{
rollup = makeNode(RollupData);
- rollup->groupClause = preprocess_groupclause(root, gset);
+ rollup->groupClause = groupclause_apply_groupingset(root, gset);
rollup->gsets_data = list_make1(gs);
rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
rollup->gsets_data,
Assert(gs->set != NIL);
- rollup->groupClause = preprocess_groupclause(root, gs->set);
+ rollup->groupClause = groupclause_apply_groupingset(root, gs->set);
rollup->gsets_data = list_make1(gs);
rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
rollup->gsets_data,
*/
foreach(lc, input_rel->pathlist)
{
+ ListCell *lc2;
Path *path = (Path *) lfirst(lc);
+ Path *path_save = path;
+ List *pathkey_orderings = NIL;
- path = make_ordered_path(root,
- grouped_rel,
- path,
- cheapest_path,
- root->group_pathkeys);
+ /* generate alternative group orderings that might be useful */
+ pathkey_orderings = get_useful_group_keys_orderings(root, path);
- if (path == NULL)
- continue;
+ Assert(list_length(pathkey_orderings) > 0);
- /* Now decide what to stick atop it */
- if (parse->groupingSets)
- {
- consider_groupingsets_paths(root, grouped_rel,
- path, true, can_hash,
- gd, agg_costs, dNumGroups);
- }
- else if (parse->hasAggs)
- {
- /*
- * We have aggregation, possibly with plain GROUP BY. Make an
- * AggPath.
- */
- add_path(grouped_rel, (Path *)
- create_agg_path(root,
- grouped_rel,
- path,
- grouped_rel->reltarget,
- parse->groupClause ? AGG_SORTED : AGG_PLAIN,
- AGGSPLIT_SIMPLE,
- root->processed_groupClause,
- havingQual,
- agg_costs,
- dNumGroups));
- }
- else if (parse->groupClause)
+ foreach(lc2, pathkey_orderings)
{
- /*
- * We have GROUP BY without aggregation or grouping sets. Make
- * a GroupPath.
- */
- add_path(grouped_rel, (Path *)
- create_group_path(root,
- grouped_rel,
- path,
- root->processed_groupClause,
- havingQual,
- dNumGroups));
- }
- else
- {
- /* Other cases should have been handled above */
- Assert(false);
- }
- }
+ PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
- /*
- * Instead of operating directly on the input relation, we can
- * consider finalizing a partially aggregated path.
- */
- if (partially_grouped_rel != NULL)
- {
- foreach(lc, partially_grouped_rel->pathlist)
- {
- Path *path = (Path *) lfirst(lc);
+ /* restore the path (we replace it in the loop) */
+ path = path_save;
path = make_ordered_path(root,
grouped_rel,
path,
- partially_grouped_rel->cheapest_total_path,
- root->group_pathkeys);
-
+ cheapest_path,
+ info->pathkeys);
if (path == NULL)
continue;
- if (parse->hasAggs)
+ /* Now decide what to stick atop it */
+ if (parse->groupingSets)
+ {
+ consider_groupingsets_paths(root, grouped_rel,
+ path, true, can_hash,
+ gd, agg_costs, dNumGroups);
+ }
+ else if (parse->hasAggs)
+ {
+ /*
+ * We have aggregation, possibly with plain GROUP BY. Make
+ * an AggPath.
+ */
add_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
path,
grouped_rel->reltarget,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
- AGGSPLIT_FINAL_DESERIAL,
- root->processed_groupClause,
+ AGGSPLIT_SIMPLE,
+ info->clauses,
havingQual,
- agg_final_costs,
+ agg_costs,
dNumGroups));
- else
+ }
+ else if (parse->groupClause)
+ {
+ /*
+ * We have GROUP BY without aggregation or grouping sets.
+ * Make a GroupPath.
+ */
add_path(grouped_rel, (Path *)
create_group_path(root,
grouped_rel,
path,
- root->processed_groupClause,
+ info->clauses,
havingQual,
dNumGroups));
+ }
+ else
+ {
+ /* Other cases should have been handled above */
+ Assert(false);
+ }
+ }
+ }
+ /*
+ * Instead of operating directly on the input relation, we can
+ * consider finalizing a partially aggregated path.
+ */
+ if (partially_grouped_rel != NULL)
+ {
+ foreach(lc, partially_grouped_rel->pathlist)
+ {
+ ListCell *lc2;
+ Path *path = (Path *) lfirst(lc);
+ Path *path_save = path;
+ List *pathkey_orderings = NIL;
+
+ /* generate alternative group orderings that might be useful */
+ pathkey_orderings = get_useful_group_keys_orderings(root, path);
+
+ Assert(list_length(pathkey_orderings) > 0);
+
+ /* process all potentially interesting grouping reorderings */
+ foreach(lc2, pathkey_orderings)
+ {
+ PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+
+ /* restore the path (we replace it in the loop) */
+ path = path_save;
+
+ path = make_ordered_path(root,
+ grouped_rel,
+ path,
+ partially_grouped_rel->cheapest_total_path,
+ info->pathkeys);
+
+ if (path == NULL)
+ continue;
+
+ if (parse->hasAggs)
+ add_path(grouped_rel, (Path *)
+ create_agg_path(root,
+ grouped_rel,
+ path,
+ grouped_rel->reltarget,
+ parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+ AGGSPLIT_FINAL_DESERIAL,
+ info->clauses,
+ havingQual,
+ agg_final_costs,
+ dNumGroups));
+ else
+ add_path(grouped_rel, (Path *)
+ create_group_path(root,
+ grouped_rel,
+ path,
+ info->clauses,
+ havingQual,
+ dNumGroups));
+
+ }
}
}
}
*/
foreach(lc, input_rel->pathlist)
{
+ ListCell *lc2;
Path *path = (Path *) lfirst(lc);
+ Path *path_save = path;
+ List *pathkey_orderings = NIL;
- path = make_ordered_path(root,
- partially_grouped_rel,
- path,
- cheapest_total_path,
- root->group_pathkeys);
+ /* generate alternative group orderings that might be useful */
+ pathkey_orderings = get_useful_group_keys_orderings(root, path);
- if (path == NULL)
- continue;
+ Assert(list_length(pathkey_orderings) > 0);
- if (parse->hasAggs)
- add_path(partially_grouped_rel, (Path *)
- create_agg_path(root,
+ /* process all potentially interesting grouping reorderings */
+ foreach(lc2, pathkey_orderings)
+ {
+ PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+
+ /* restore the path (we replace it in the loop) */
+ path = path_save;
+
+ path = make_ordered_path(root,
partially_grouped_rel,
path,
- partially_grouped_rel->reltarget,
- parse->groupClause ? AGG_SORTED : AGG_PLAIN,
- AGGSPLIT_INITIAL_SERIAL,
- root->processed_groupClause,
- NIL,
- agg_partial_costs,
- dNumPartialGroups));
- else
- add_path(partially_grouped_rel, (Path *)
- create_group_path(root,
- partially_grouped_rel,
- path,
- root->processed_groupClause,
- NIL,
- dNumPartialGroups));
+ cheapest_total_path,
+ info->pathkeys);
+
+ if (path == NULL)
+ continue;
+
+ if (parse->hasAggs)
+ add_path(partially_grouped_rel, (Path *)
+ create_agg_path(root,
+ partially_grouped_rel,
+ path,
+ partially_grouped_rel->reltarget,
+ parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+ AGGSPLIT_INITIAL_SERIAL,
+ info->clauses,
+ NIL,
+ agg_partial_costs,
+ dNumPartialGroups));
+ else
+ add_path(partially_grouped_rel, (Path *)
+ create_group_path(root,
+ partially_grouped_rel,
+ path,
+ info->clauses,
+ NIL,
+ dNumPartialGroups));
+ }
}
}
/* Similar to above logic, but for partial paths. */
foreach(lc, input_rel->partial_pathlist)
{
+ ListCell *lc2;
Path *path = (Path *) lfirst(lc);
+ Path *path_save = path;
+ List *pathkey_orderings = NIL;
- path = make_ordered_path(root,
- partially_grouped_rel,
- path,
- cheapest_partial_path,
- root->group_pathkeys);
+ /* generate alternative group orderings that might be useful */
+ pathkey_orderings = get_useful_group_keys_orderings(root, path);
- if (path == NULL)
- continue;
+ Assert(list_length(pathkey_orderings) > 0);
- if (parse->hasAggs)
- add_partial_path(partially_grouped_rel, (Path *)
- create_agg_path(root,
- partially_grouped_rel,
- path,
- partially_grouped_rel->reltarget,
- parse->groupClause ? AGG_SORTED : AGG_PLAIN,
- AGGSPLIT_INITIAL_SERIAL,
- root->processed_groupClause,
- NIL,
- agg_partial_costs,
- dNumPartialPartialGroups));
- else
- add_partial_path(partially_grouped_rel, (Path *)
- create_group_path(root,
- partially_grouped_rel,
- path,
- root->processed_groupClause,
- NIL,
- dNumPartialPartialGroups));
+ /* process all potentially interesting grouping reorderings */
+ foreach(lc2, pathkey_orderings)
+ {
+ PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+
+
+ /* restore the path (we replace it in the loop) */
+ path = path_save;
+
+ path = make_ordered_path(root,
+ partially_grouped_rel,
+ path,
+ cheapest_partial_path,
+ info->pathkeys);
+
+ if (path == NULL)
+ continue;
+
+ if (parse->hasAggs)
+ add_partial_path(partially_grouped_rel, (Path *)
+ create_agg_path(root,
+ partially_grouped_rel,
+ path,
+ partially_grouped_rel->reltarget,
+ parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+ AGGSPLIT_INITIAL_SERIAL,
+ info->clauses,
+ NIL,
+ agg_partial_costs,
+ dNumPartialPartialGroups));
+ else
+ add_partial_path(partially_grouped_rel, (Path *)
+ create_group_path(root,
+ partially_grouped_rel,
+ path,
+ info->clauses,
+ NIL,
+ dNumPartialPartialGroups));
+ }
}
}
* We can also skip the entire loop when we only have a single-item
* group_pathkeys because then we can't possibly have a presorted prefix
* of the list without having the list be fully sorted.
+ *
+ * XXX Shouldn't this also consider the group-key-reordering?
*/
if (!enable_incremental_sort || list_length(root->group_pathkeys) == 1)
return;
true,
NULL, NULL, NULL
},
+ {
+ {"enable_group_by_reordering", PGC_USERSET, QUERY_TUNING_METHOD,
+ gettext_noop("Enables reordering of GROUP BY keys."),
+ NULL,
+ GUC_EXPLAIN
+ },
+ &enable_group_by_reordering,
+ true,
+ NULL, NULL, NULL
+ },
{
{"geqo", PGC_USERSET, QUERY_TUNING_GEQO,
gettext_noop("Enables genetic query optimization."),
#enable_seqscan = on
#enable_sort = on
#enable_tidscan = on
+#enable_group_by_reordering = on
# - Planner Cost Constants -
bool pk_nulls_first; /* do NULLs come before normal values? */
} PathKey;
+/*
+ * Combines the information about pathkeys and the associated clauses.
+ */
+typedef struct PathKeyInfo
+{
+ NodeTag type;
+ List *pathkeys;
+ List *clauses;
+} PathKeyInfo;
+
/*
* VolatileFunctionStatus -- allows nodes to cache their
* contain_volatile_functions properties. VOLATILITY_UNKNOWN means not yet
extern PGDLLIMPORT int geqo_threshold;
extern PGDLLIMPORT int min_parallel_table_scan_size;
extern PGDLLIMPORT int min_parallel_index_scan_size;
+extern PGDLLIMPORT bool enable_group_by_reordering;
/* Hook for plugins to get control in set_rel_pathlist() */
typedef void (*set_rel_pathlist_hook_type) (PlannerInfo *root,
extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
extern bool pathkeys_contained_in(List *keys1, List *keys2);
extern bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common);
+extern List *get_useful_group_keys_orderings(PlannerInfo *root, Path *path);
extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
Relids required_outer,
CostSelector cost_criterion,
(1 row)
ROLLBACK;
+-- GROUP BY optimization by reorder columns
+CREATE TABLE btg AS SELECT
+ i % 100 AS x,
+ i % 100 AS y,
+ 'abc' || i % 10 AS z,
+ i AS w
+FROM generate_series(1,10000) AS i;
+CREATE INDEX abc ON btg(x,y);
+ANALYZE btg;
+-- GROUP BY optimization by reorder columns by frequency
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+-- Utilize index scan ordering to avoid a Sort operation
+EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY x,y;
+ QUERY PLAN
+----------------------------------------
+ GroupAggregate
+ Group Key: x, y
+ -> Index Only Scan using abc on btg
+(3 rows)
+
+EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY y,x;
+ QUERY PLAN
+----------------------------------------
+ GroupAggregate
+ Group Key: x, y
+ -> Index Only Scan using abc on btg
+(3 rows)
+
+-- Engage incremental sort
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY x,y,z,w;
+ QUERY PLAN
+-----------------------------------------
+ Group
+ Group Key: x, y, z, w
+ -> Incremental Sort
+ Sort Key: x, y, z, w
+ Presorted Key: x, y
+ -> Index Scan using abc on btg
+(6 rows)
+
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY z,y,w,x;
+ QUERY PLAN
+-----------------------------------------
+ Group
+ Group Key: x, y, z, w
+ -> Incremental Sort
+ Sort Key: x, y, z, w
+ Presorted Key: x, y
+ -> Index Scan using abc on btg
+(6 rows)
+
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,z,x,y;
+ QUERY PLAN
+-----------------------------------------
+ Group
+ Group Key: x, y, w, z
+ -> Incremental Sort
+ Sort Key: x, y, w, z
+ Presorted Key: x, y
+ -> Index Scan using abc on btg
+(6 rows)
+
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y;
+ QUERY PLAN
+-----------------------------------------
+ Group
+ Group Key: x, y, w, z
+ -> Incremental Sort
+ Sort Key: x, y, w, z
+ Presorted Key: x, y
+ -> Index Scan using abc on btg
+(6 rows)
+
+-- Subqueries
+explain (COSTS OFF) SELECT x,y
+FROM (SELECT * FROM btg ORDER BY x,y,w,z) AS q1
+GROUP BY (w,x,z,y);
+ QUERY PLAN
+----------------------------------------------
+ Group
+ Group Key: btg.x, btg.y, btg.w, btg.z
+ -> Incremental Sort
+ Sort Key: btg.x, btg.y, btg.w, btg.z
+ Presorted Key: btg.x, btg.y
+ -> Index Scan using abc on btg
+(6 rows)
+
+explain (COSTS OFF) SELECT x,y
+FROM (SELECT * FROM btg ORDER BY x,y,w,z LIMIT 100) AS q1
+GROUP BY (w,x,z,y);
+ QUERY PLAN
+----------------------------------------------------
+ Group
+ Group Key: btg.x, btg.y, btg.w, btg.z
+ -> Limit
+ -> Incremental Sort
+ Sort Key: btg.x, btg.y, btg.w, btg.z
+ Presorted Key: btg.x, btg.y
+ -> Index Scan using abc on btg
+(7 rows)
+
+-- Should work with and without GROUP-BY optimization
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y ORDER BY y,x,z,w;
+ QUERY PLAN
+------------------------------
+ Group
+ Group Key: y, x, z, w
+ -> Sort
+ Sort Key: y, x, z, w
+ -> Seq Scan on btg
+(5 rows)
+
+-- Utilize incremental sort to make the ORDER BY rule a bit cheaper
+explain (COSTS OFF) SELECT x,w FROM btg GROUP BY w,x,y,z ORDER BY x*x,z;
+ QUERY PLAN
+-----------------------------------------------
+ Sort
+ Sort Key: ((x * x)), z
+ -> Group
+ Group Key: x, y, w, z
+ -> Incremental Sort
+ Sort Key: x, y, w, z
+ Presorted Key: x, y
+ -> Index Scan using abc on btg
+(8 rows)
+
+SET enable_incremental_sort = off;
+-- The case when the number of incoming subtree path keys is more than
+-- the number of grouping keys.
+CREATE INDEX idx_y_x_z ON btg(y,x,w);
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT y,x,array_agg(distinct w) FROM btg WHERE y < 0 GROUP BY x,y;
+ QUERY PLAN
+-----------------------------------------------------
+ GroupAggregate
+ Output: y, x, array_agg(DISTINCT w)
+ Group Key: btg.y, btg.x
+ -> Index Only Scan using idx_y_x_z on public.btg
+ Output: y, x, w
+ Index Cond: (btg.y < 0)
+(6 rows)
+
+RESET enable_incremental_sort;
+DROP TABLE btg;
+-- The case, when scanning sort order correspond to aggregate sort order but
+-- can not be found in the group-by list
+CREATE TABLE t1 (c1 int PRIMARY KEY, c2 int);
+CREATE UNIQUE INDEX ON t1(c2);
+explain (costs off)
+SELECT array_agg(c1 ORDER BY c2),c2
+FROM t1 WHERE c2 < 100 GROUP BY c1 ORDER BY 2;
+ QUERY PLAN
+--------------------------------------------------------
+ Sort
+ Sort Key: c2
+ -> GroupAggregate
+ Group Key: c1
+ -> Sort
+ Sort Key: c1, c2
+ -> Bitmap Heap Scan on t1
+ Recheck Cond: (c2 < 100)
+ -> Bitmap Index Scan on t1_c2_idx
+ Index Cond: (c2 < 100)
+(10 rows)
+
+DROP TABLE t1 CASCADE;
+-- Check, that GROUP-BY reordering optimization can operate with pathkeys, built
+-- by planner itself. For example, by MergeJoin.
+SET enable_hashjoin = off;
+SET enable_nestloop = off;
+explain (COSTS OFF)
+SELECT c1.relname,c1.relpages
+FROM pg_class c1 JOIN pg_class c2 ON (c1.relname=c2.relname AND c1.relpages=c2.relpages)
+GROUP BY c1.reltuples,c1.relpages,c1.relname
+ORDER BY c1.relpages, c1.relname, c1.relpages*c1.relpages;
+ QUERY PLAN
+---------------------------------------------------------------------------------------------
+ Incremental Sort
+ Sort Key: c1.relpages, c1.relname, ((c1.relpages * c1.relpages))
+ Presorted Key: c1.relpages, c1.relname
+ -> Group
+ Group Key: c1.relpages, c1.relname, c1.reltuples
+ -> Incremental Sort
+ Sort Key: c1.relpages, c1.relname, c1.reltuples
+ Presorted Key: c1.relpages, c1.relname
+ -> Merge Join
+ Merge Cond: ((c1.relpages = c2.relpages) AND (c1.relname = c2.relname))
+ -> Sort
+ Sort Key: c1.relpages, c1.relname
+ -> Seq Scan on pg_class c1
+ -> Sort
+ Sort Key: c2.relpages, c2.relname
+ -> Seq Scan on pg_class c2
+(16 rows)
+
+RESET enable_hashjoin;
+RESET enable_nestloop;
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
-- Secondly test the case of a parallel aggregate combiner function
-- returning NULL. For that use normal transition function, but a
-- combiner function returning NULL.
enable_async_append | on
enable_bitmapscan | on
enable_gathermerge | on
+ enable_group_by_reordering | on
enable_hashagg | on
enable_hashjoin | on
enable_incremental_sort | on
enable_seqscan | on
enable_sort | on
enable_tidscan | on
-(22 rows)
+(23 rows)
-- There are always wait event descriptions for various types.
select type, count(*) > 0 as ok FROM pg_wait_events
ROLLBACK;
+-- GROUP BY optimization by reorder columns
+CREATE TABLE btg AS SELECT
+ i % 100 AS x,
+ i % 100 AS y,
+ 'abc' || i % 10 AS z,
+ i AS w
+FROM generate_series(1,10000) AS i;
+CREATE INDEX abc ON btg(x,y);
+ANALYZE btg;
+
+-- GROUP BY optimization by reorder columns by frequency
+
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+
+-- Utilize index scan ordering to avoid a Sort operation
+EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY x,y;
+EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY y,x;
+
+-- Engage incremental sort
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY x,y,z,w;
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY z,y,w,x;
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,z,x,y;
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y;
+
+-- Subqueries
+explain (COSTS OFF) SELECT x,y
+FROM (SELECT * FROM btg ORDER BY x,y,w,z) AS q1
+GROUP BY (w,x,z,y);
+explain (COSTS OFF) SELECT x,y
+FROM (SELECT * FROM btg ORDER BY x,y,w,z LIMIT 100) AS q1
+GROUP BY (w,x,z,y);
+
+-- Should work with and without GROUP-BY optimization
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y ORDER BY y,x,z,w;
+
+-- Utilize incremental sort to make the ORDER BY rule a bit cheaper
+explain (COSTS OFF) SELECT x,w FROM btg GROUP BY w,x,y,z ORDER BY x*x,z;
+
+SET enable_incremental_sort = off;
+-- The case when the number of incoming subtree path keys is more than
+-- the number of grouping keys.
+CREATE INDEX idx_y_x_z ON btg(y,x,w);
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT y,x,array_agg(distinct w) FROM btg WHERE y < 0 GROUP BY x,y;
+RESET enable_incremental_sort;
+
+DROP TABLE btg;
+
+-- The case, when scanning sort order correspond to aggregate sort order but
+-- can not be found in the group-by list
+CREATE TABLE t1 (c1 int PRIMARY KEY, c2 int);
+CREATE UNIQUE INDEX ON t1(c2);
+explain (costs off)
+SELECT array_agg(c1 ORDER BY c2),c2
+FROM t1 WHERE c2 < 100 GROUP BY c1 ORDER BY 2;
+DROP TABLE t1 CASCADE;
+
+-- Check, that GROUP-BY reordering optimization can operate with pathkeys, built
+-- by planner itself. For example, by MergeJoin.
+SET enable_hashjoin = off;
+SET enable_nestloop = off;
+explain (COSTS OFF)
+SELECT c1.relname,c1.relpages
+FROM pg_class c1 JOIN pg_class c2 ON (c1.relname=c2.relname AND c1.relpages=c2.relpages)
+GROUP BY c1.reltuples,c1.relpages,c1.relname
+ORDER BY c1.relpages, c1.relname, c1.relpages*c1.relpages;
+RESET enable_hashjoin;
+RESET enable_nestloop;
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+
-- Secondly test the case of a parallel aggregate combiner function
-- returning NULL. For that use normal transition function, but a
-- combiner function returning NULL.
rfile
ws_options
ws_file_info
+PathKeyInfo