From 5c9d8636d30d0838c7432c2c75096b4cd7801971 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 28 Nov 2024 17:33:16 -0500 Subject: [PATCH] Avoid mislabeling of lateral references when pulling up a subquery. If we are pulling up a subquery that's under an outer join, and the subquery's target list contains a strict expression that uses both a subquery variable and a lateral-reference variable, it's okay to pull up the expression without wrapping it in a PlaceHolderVar. That's safe because if the subquery variable is forced to NULL by the outer join, the expression result will come out as NULL too, so we don't have to force that outcome by evaluating the expression below the outer join. It'd be correct to wrap in a PHV, but that can lead to very significantly worse plans, since we'd then have to use a nestloop plan to pass down the lateral reference to where the expression will be evaluated. However, when we do that, we should not mark the lateral reference variable as being nulled by the outer join, because it isn't after we pull up the expression in this way. So the marking logic added by cb8e50a4a was incorrect in this detail, leading to "wrong varnullingrels" errors from the consistency-checking logic in setrefs.c. It seems to be sufficient to just not mark lateral references at all in this case. (I have a nagging feeling that more complexity may be needed in cases where there are several levels of outer join, but some attempts to break it with that didn't succeed.) Per report from Bertrand Mamasam. Back-patch to v16, as the previous patch was. Discussion: https://postgr.es/m/CACZ67_UA_EVrqiFXJu9XK50baEpH=ofEPJswa2kFxg6xuSw-ww@mail.gmail.com --- src/backend/optimizer/prep/prepjointree.c | 19 ++++++++- src/test/regress/expected/subselect.out | 47 +++++++++++++++++++++++ src/test/regress/sql/subselect.sql | 16 ++++++++ 3 files changed, 80 insertions(+), 2 deletions(-) diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index 74829a09d1c..2e0d41a8d1f 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -2611,6 +2611,17 @@ pullup_replace_vars_callback(Var *var, * the plan in cases where the nullingrels get removed again * later by outer join reduction. * + * Note that we don't force wrapping of expressions containing + * lateral references, so long as they also contain Vars/PHVs + * of the subquery. This is okay because of the restriction + * to strict constructs: if the subquery's Vars/PHVs have been + * forced to NULL by an outer join then the end result of the + * expression will be NULL too, regardless of the lateral + * references. So it's not necessary to force the expression + * to be evaluated below the outer join. This can be a very + * valuable optimization, because it may allow us to avoid + * using a nested loop to pass the lateral reference down. + * * This analysis could be tighter: in particular, a non-strict * construct hidden within a lower-level PlaceHolderVar is not * reason to add another PHV. But for now it doesn't seem @@ -2675,9 +2686,13 @@ pullup_replace_vars_callback(Var *var, } else { - /* There should be lower-level Vars/PHVs we can modify */ + /* + * There should be Vars/PHVs within the expression that we can + * modify. Per above discussion, modify only Vars/PHVs of the + * subquery, not lateral references. + */ newnode = add_nulling_relids(newnode, - NULL, /* modify all Vars/PHVs */ + rcon->relids, var->varnullingrels); /* Assert we did put the varnullingrels into the expression */ Assert(bms_is_subset(var->varnullingrels, diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index 2d35de3fad6..da219aba4c4 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -1750,6 +1750,53 @@ where tname = 'tenk1' and attnum = 1; tenk1 | unique1 (1 row) +-- Check behavior when there's a lateral reference in the output expression +explain (verbose, costs off) +select t1.ten, sum(x) from + tenk1 t1 left join lateral ( + select t1.ten + t2.ten as x, t2.fivethous from tenk1 t2 + ) ss on t1.unique1 = ss.fivethous +group by t1.ten +order by t1.ten; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Sort + Output: t1.ten, (sum((t1.ten + t2.ten))) + Sort Key: t1.ten + -> HashAggregate + Output: t1.ten, sum((t1.ten + t2.ten)) + Group Key: t1.ten + -> Hash Right Join + Output: t1.ten, t2.ten + Hash Cond: (t2.fivethous = t1.unique1) + -> Seq Scan on public.tenk1 t2 + Output: t2.unique1, t2.unique2, t2.two, t2.four, t2.ten, t2.twenty, t2.hundred, t2.thousand, t2.twothousand, t2.fivethous, t2.tenthous, t2.odd, t2.even, t2.stringu1, t2.stringu2, t2.string4 + -> Hash + Output: t1.ten, t1.unique1 + -> Seq Scan on public.tenk1 t1 + Output: t1.ten, t1.unique1 +(15 rows) + +select t1.ten, sum(x) from + tenk1 t1 left join lateral ( + select t1.ten + t2.ten as x, t2.fivethous from tenk1 t2 + ) ss on t1.unique1 = ss.fivethous +group by t1.ten +order by t1.ten; + ten | sum +-----+------- + 0 | 0 + 1 | 2000 + 2 | 4000 + 3 | 6000 + 4 | 8000 + 5 | 10000 + 6 | 12000 + 7 | 14000 + 8 | 16000 + 9 | 18000 +(10 rows) + -- -- Tests for CTE inlining behavior -- diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql index af6e157aca0..1815a5e6642 100644 --- a/src/test/regress/sql/subselect.sql +++ b/src/test/regress/sql/subselect.sql @@ -908,6 +908,22 @@ select relname::information_schema.sql_identifier as tname, * from right join pg_attribute a on a.attrelid = ss2.oid where tname = 'tenk1' and attnum = 1; +-- Check behavior when there's a lateral reference in the output expression +explain (verbose, costs off) +select t1.ten, sum(x) from + tenk1 t1 left join lateral ( + select t1.ten + t2.ten as x, t2.fivethous from tenk1 t2 + ) ss on t1.unique1 = ss.fivethous +group by t1.ten +order by t1.ten; + +select t1.ten, sum(x) from + tenk1 t1 left join lateral ( + select t1.ten + t2.ten as x, t2.fivethous from tenk1 t2 + ) ss on t1.unique1 = ss.fivethous +group by t1.ten +order by t1.ten; + -- -- Tests for CTE inlining behavior -- -- 2.30.2