Skip to content

Commit 38e9c13

Browse files
author
Alena Rybakina
committed
Add search by fss hash in aqo_data table and hash table.
If we didn't any neibours with fs and fss hash indexes in aqo_data, we write new object in aqo_data with target value as average value of neirest neibours by fss_hash. I don't consider fs_hash in find neirest neibour for calculating average value of target for new object because I think fs_hash contain a description of a completely different query with a different table that matches or almost matches the indicative descriptions of the current object, but they are not an entity.
1 parent 0ca4fe6 commit 38e9c13

12 files changed

+395
-51
lines changed

‎Makefile

+2-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ REGRESS = aqo_disabled \
2626
statement_timeout \
2727
temp_tables \
2828
top_queries \
29-
relocatable
29+
relocatable\
30+
look_a_like
3031

3132
fdw_srcdir = $(top_srcdir)/contrib/postgres_fdw
3233
stat_srcdir = $(top_srcdir)/contrib/pg_stat_statements

‎aqo--1.0.sql

+2
Original file line numberDiff line numberDiff line change
@@ -50,3 +50,5 @@ CREATE FUNCTION invalidate_deactivated_queries_cache() RETURNS trigger
5050
CREATE TRIGGER aqo_queries_invalidate AFTER UPDATE OR DELETE OR TRUNCATE
5151
ON public.aqo_queries FOR EACH STATEMENT
5252
EXECUTE PROCEDURE invalidate_deactivated_queries_cache();
53+
CREATE INDEX aqo_fss_idx
54+
on public.aqo_data (fsspace_hash);

‎aqo.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,7 @@ extern bool update_query(uint64 qhash, uint64 fhash,
283283
extern bool add_query_text(uint64 query_hash, const char *query_string);
284284
extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids,
285285
bool isSafe);
286-
extern bool load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids);
286+
extern bool load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool use_idx_fss);
287287
extern bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data,
288288
List *reloids, bool isTimedOut);
289289
extern bool update_fss(uint64 fs, int fss, OkNNrdata *data, List *reloids);

‎cardinality_estimation.c

+4-1
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,10 @@ predict_for_relation(List *clauses, List *selectivities, List *relsigns,
9191
* small part of paths was used for AQO learning and fetch into the AQO
9292
* knowledge base.
9393
*/
94-
result = -1;
94+
if (!load_fss(query_context.fspace_hash, *fss, &data, NULL, false))
95+
result = -1;
96+
else
97+
result = OkNNr_predict(&data, features);
9598
}
9699
#ifdef AQO_DEBUG_PRINT
97100
predict_debug_output(clauses, selectivities, relsigns, *fss, result);

‎expected/aqo_fdw.out

+6-16
Original file line numberDiff line numberDiff line change
@@ -88,32 +88,22 @@ SELECT str FROM expln('
8888
EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
8989
SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x;
9090
') AS str WHERE str NOT LIKE '%Sort Method%';
91-
str
92-
------------------------------------------------------------
93-
Merge Join (actual rows=1 loops=1)
91+
str
92+
-------------------------------------------
93+
Foreign Scan (actual rows=1 loops=1)
9494
AQO not used
95-
Merge Cond: (a.x = b.x)
96-
-> Sort (actual rows=1 loops=1)
97-
AQO not used
98-
Sort Key: a.x
99-
-> Foreign Scan on frgn a (actual rows=1 loops=1)
100-
AQO not used
101-
-> Sort (actual rows=1 loops=1)
102-
AQO not used
103-
Sort Key: b.x
104-
-> Foreign Scan on frgn b (actual rows=1 loops=1)
105-
AQO not used
95+
Relations: (frgn a) INNER JOIN (frgn b)
10696
Using aqo: true
10797
AQO mode: LEARN
10898
JOINS: 0
109-
(16 rows)
99+
(6 rows)
110100

111101
EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE)
112102
SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x;
113103
QUERY PLAN
114104
--------------------------------------------------------------------------------------------------------
115105
Foreign Scan (actual rows=1 loops=1)
116-
AQO: rows=1, error=0%
106+
AQO not used
117107
Output: a.x, b.x
118108
Relations: (public.frgn a) INNER JOIN (public.frgn b)
119109
Remote SQL: SELECT r1.x, r2.x FROM (public.local r1 INNER JOIN public.local r2 ON (((r1.x = r2.x))))

‎expected/aqo_learn.out

+15-15
Original file line numberDiff line numberDiff line change
@@ -265,12 +265,9 @@ ORDER BY (md5(query_text))
265265
| | WHERE t1.a < 1 AND t2.b < 1 AND t2.c < 1 AND t2.d < 1 AND t1.a = t2.a;
266266
{1,1,1} | 4 | EXPLAIN SELECT * FROM aqo_test0 +
267267
| | WHERE a < 3 AND b < 3 AND c < 3 AND d < 3;
268-
{1} | 0 | SELECT count(*) FROM tmp1;
269-
{1} | 0 | SELECT count(*) FROM tmp1;
270-
{1} | 6 | EXPlAIN SELECT t1.a, t2.b, t3.c +
271-
| | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 +
272-
| | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b;
273-
{1} | 1 | EXPlAIN SELECT t1.a, t2.b, t3.c +
268+
{1,1,1,1,1} | 0 | SELECT count(*) FROM tmp1;
269+
{1,1,1,1,1} | 0 | SELECT count(*) FROM tmp1;
270+
{1} | 2 | EXPlAIN SELECT t1.a, t2.b, t3.c +
274271
| | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 +
275272
| | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b;
276273
{1} | 2 | EXPlAIN SELECT t1.a, t2.b, t3.c +
@@ -279,34 +276,37 @@ ORDER BY (md5(query_text))
279276
{1} | 3 | EXPlAIN SELECT t1.a, t2.b, t3.c +
280277
| | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 +
281278
| | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b;
282-
{1} | 2 | EXPlAIN SELECT t1.a, t2.b, t3.c +
279+
{1} | 6 | EXPlAIN SELECT t1.a, t2.b, t3.c +
283280
| | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 +
284281
| | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b;
285-
{1} | 2 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test0 AS t1, aqo_test0 AS t2 +
282+
{1,1} | 1 | EXPlAIN SELECT t1.a, t2.b, t3.c +
283+
| | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 +
284+
| | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b;
285+
{1} | 4 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test0 AS t1, aqo_test0 AS t2 +
286286
| | WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1;
287287
{1} | 2 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test0 AS t1, aqo_test0 AS t2 +
288288
| | WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1;
289-
{1} | 4 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test0 AS t1, aqo_test0 AS t2 +
289+
{1} | 2 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test0 AS t1, aqo_test0 AS t2 +
290290
| | WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1;
291-
{1} | 2 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c +
291+
{1} | 1 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c +
292292
| | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 +
293293
| | WHERE t1.a = t2.b AND t2.a = t3.b;
294-
{1} | 1 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c +
294+
{1} | 2 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c +
295295
| | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 +
296296
| | WHERE t1.a = t2.b AND t2.a = t3.b;
297297
{1} | 0 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c +
298298
| | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 +
299299
| | WHERE t1.a = t2.b AND t2.a = t3.b;
300-
{1} | 3 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d +
300+
{1} | 2 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d +
301301
| | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 +
302302
| | WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b;
303-
{1} | 0 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d +
303+
{1} | 3 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d +
304304
| | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 +
305305
| | WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b;
306306
{1} | 1 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d +
307307
| | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 +
308308
| | WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b;
309-
{1} | 2 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d +
309+
{1} | 0 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d +
310310
| | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 +
311311
| | WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b;
312312
(21 rows)
@@ -587,7 +587,7 @@ SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;');
587587
SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b');
588588
estimated | actual
589589
-----------+--------
590-
20 | 19
590+
19 | 19
591591
(1 row)
592592

593593
SELECT count(*) FROM

‎expected/look_a_like.out

+240
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,240 @@
1+
CREATE IF NOT EXISTS EXTENSION aqo;
2+
ERROR: syntax error at or near "IF"
3+
LINE 1: CREATE IF NOT EXISTS EXTENSION aqo;
4+
^
5+
SET aqo.join_threshold = 0;
6+
SET aqo.mode = 'learn';
7+
SET aqo.show_details = 'on';
8+
DROP TABLE IF EXISTS a,b CASCADE;
9+
NOTICE: table "a" does not exist, skipping
10+
NOTICE: table "b" does not exist, skipping
11+
CREATE TABLE a (x int);
12+
INSERT INTO a (x) SELECT mod(ival,10) FROM generate_series(1,1000) As ival;
13+
CREATE TABLE b (y int);
14+
INSERT INTO b (y) SELECT mod(ival + 1,10) FROM generate_series(1,1000) As ival;
15+
--
16+
-- Returns string-by-string explain of a query. Made for removing some strings
17+
-- from the explain output.
18+
--
19+
CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$
20+
BEGIN
21+
RETURN QUERY
22+
EXECUTE format('EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) %s', query_string);
23+
RETURN;
24+
END;
25+
$$ LANGUAGE PLPGSQL;
26+
-- no one predicted rows. we use knowledge cardinalities of the query
27+
-- in the next queries with the same fss_hash
28+
SELECT str AS result
29+
FROM expln('
30+
SELECT x FROM A where x = 5;') AS str
31+
WHERE str NOT LIKE 'Query Identifier%';
32+
result
33+
------------------------------------------------
34+
Seq Scan on public.a (actual rows=100 loops=1)
35+
AQO not used
36+
Output: x
37+
Filter: (a.x = 5)
38+
Rows Removed by Filter: 900
39+
Using aqo: true
40+
AQO mode: LEARN
41+
JOINS: 0
42+
(8 rows)
43+
44+
-- cardinality 100 in the first Seq Scan on a
45+
SELECT str AS result
46+
FROM expln('
47+
SELECT x FROM A,B WHERE x = 5 AND A.x = B.y;') AS str
48+
WHERE str NOT LIKE 'Query Identifier%';
49+
result
50+
------------------------------------------------------------
51+
Nested Loop (actual rows=10000 loops=1)
52+
AQO not used
53+
Output: a.x
54+
-> Seq Scan on public.a (actual rows=100 loops=1)
55+
AQO: rows=100, error=0%
56+
Output: a.x
57+
Filter: (a.x = 5)
58+
Rows Removed by Filter: 900
59+
-> Materialize (actual rows=100 loops=100)
60+
AQO not used
61+
Output: b.y
62+
-> Seq Scan on public.b (actual rows=100 loops=1)
63+
AQO not used
64+
Output: b.y
65+
Filter: (b.y = 5)
66+
Rows Removed by Filter: 900
67+
Using aqo: true
68+
AQO mode: LEARN
69+
JOINS: 0
70+
(19 rows)
71+
72+
-- cardinality 100 in Nesteed Loop in the first Seq Scan on a
73+
SELECT str AS result
74+
FROM expln('
75+
SELECT x, sum(x) FROM A,B WHERE y = 5 AND A.x = B.y group by(x);') AS str
76+
WHERE str NOT LIKE 'Query Identifier%';
77+
result
78+
------------------------------------------------------------------
79+
GroupAggregate (actual rows=1 loops=1)
80+
AQO not used
81+
Output: a.x, sum(a.x)
82+
Group Key: a.x
83+
-> Nested Loop (actual rows=10000 loops=1)
84+
AQO not used
85+
Output: a.x
86+
-> Seq Scan on public.a (actual rows=100 loops=1)
87+
AQO: rows=100, error=0%
88+
Output: a.x
89+
Filter: (a.x = 5)
90+
Rows Removed by Filter: 900
91+
-> Materialize (actual rows=100 loops=100)
92+
AQO: rows=100, error=0%
93+
Output: b.y
94+
-> Seq Scan on public.b (actual rows=100 loops=1)
95+
AQO: rows=100, error=0%
96+
Output: b.y
97+
Filter: (b.y = 5)
98+
Rows Removed by Filter: 900
99+
Using aqo: true
100+
AQO mode: LEARN
101+
JOINS: 1
102+
(23 rows)
103+
104+
-- cardinality 100 in the first Seq Scan on a
105+
SELECT str AS result
106+
FROM expln('
107+
SELECT x, sum(x) FROM A WHERE x = 5 group by(x);') AS str
108+
WHERE str NOT LIKE 'Query Identifier%';
109+
result
110+
------------------------------------------------------
111+
GroupAggregate (actual rows=1 loops=1)
112+
AQO not used
113+
Output: x, sum(x)
114+
Group Key: a.x
115+
-> Seq Scan on public.a (actual rows=100 loops=1)
116+
AQO: rows=100, error=0%
117+
Output: x
118+
Filter: (a.x = 5)
119+
Rows Removed by Filter: 900
120+
Using aqo: true
121+
AQO mode: LEARN
122+
JOINS: 0
123+
(12 rows)
124+
125+
-- no one predicted rows. we use knowledge cardinalities of the query
126+
-- in the next queries with the same fss_hash
127+
SELECT str AS result
128+
FROM expln('
129+
SELECT x FROM A where x < 10 group by(x);') AS str
130+
WHERE str NOT LIKE 'Query Identifier%';
131+
result
132+
-------------------------------------------------------
133+
HashAggregate (actual rows=10 loops=1)
134+
AQO not used
135+
Output: x
136+
Group Key: a.x
137+
Batches: 1 Memory Usage: 40kB
138+
-> Seq Scan on public.a (actual rows=1000 loops=1)
139+
AQO not used
140+
Output: x
141+
Filter: (a.x < 10)
142+
Using aqo: true
143+
AQO mode: LEARN
144+
JOINS: 0
145+
(12 rows)
146+
147+
-- cardinality 1000 in Seq Scan on a
148+
SELECT str AS result
149+
FROM expln('
150+
SELECT x,y FROM A,B WHERE x < 10 AND A.x = B.y;') AS str
151+
WHERE str NOT LIKE 'Query Identifier%';
152+
result
153+
-------------------------------------------------------------
154+
Merge Join (actual rows=100000 loops=1)
155+
AQO not used
156+
Output: a.x, b.y
157+
Merge Cond: (a.x = b.y)
158+
-> Sort (actual rows=1000 loops=1)
159+
AQO not used
160+
Output: a.x
161+
Sort Key: a.x
162+
Sort Method: quicksort Memory: 79kB
163+
-> Seq Scan on public.a (actual rows=1000 loops=1)
164+
AQO: rows=1000, error=0%
165+
Output: a.x
166+
Filter: (a.x < 10)
167+
-> Sort (actual rows=99901 loops=1)
168+
AQO not used
169+
Output: b.y
170+
Sort Key: b.y
171+
Sort Method: quicksort Memory: 79kB
172+
-> Seq Scan on public.b (actual rows=1000 loops=1)
173+
AQO not used
174+
Output: b.y
175+
Using aqo: true
176+
AQO mode: LEARN
177+
JOINS: 0
178+
(24 rows)
179+
180+
-- cardinality 100 in Seq Scan on a and Seq Scan on b
181+
SELECT str AS result
182+
FROM expln('
183+
SELECT x FROM A,B where x < 10 and y > 10 group by(x);') AS str
184+
WHERE str NOT LIKE 'Query Identifier%';
185+
result
186+
----------------------------------------------------------------
187+
HashAggregate (actual rows=0 loops=1)
188+
AQO not used
189+
Output: a.x
190+
Group Key: a.x
191+
Batches: 1 Memory Usage: 40kB
192+
-> Nested Loop (actual rows=0 loops=1)
193+
AQO not used
194+
Output: a.x
195+
-> Seq Scan on public.a (actual rows=1000 loops=1)
196+
AQO: rows=1000, error=0%
197+
Output: a.x
198+
Filter: (a.x < 10)
199+
-> Materialize (actual rows=0 loops=1000)
200+
AQO not used
201+
-> Seq Scan on public.b (actual rows=0 loops=1)
202+
AQO not used
203+
Filter: (b.y > 10)
204+
Rows Removed by Filter: 1000
205+
Using aqo: true
206+
AQO mode: LEARN
207+
JOINS: 1
208+
(21 rows)
209+
210+
-- cardinality 1000 Hash Cond: (a.x = b.y) and 1 Seq Scan on b
211+
-- this cardinality is wrong because we take it from bad neibours (previous query).
212+
-- clause y > 10 give count of rows with the same clauses.
213+
SELECT str AS result
214+
FROM expln('
215+
SELECT x,y FROM A,B WHERE x < 10 and y > 10 AND A.x = B.y;') AS str
216+
WHERE str NOT LIKE 'Query Identifier%';
217+
result
218+
----------------------------------------------------------
219+
Hash Join (actual rows=0 loops=1)
220+
AQO not used
221+
Output: a.x, b.y
222+
Hash Cond: (a.x = b.y)
223+
-> Seq Scan on public.a (actual rows=1 loops=1)
224+
AQO: rows=1000, error=100%
225+
Output: a.x
226+
Filter: (a.x < 10)
227+
-> Hash (actual rows=0 loops=1)
228+
AQO not used
229+
Output: b.y
230+
Buckets: 1024 Batches: 1 Memory Usage: 8kB
231+
-> Seq Scan on public.b (actual rows=0 loops=1)
232+
AQO: rows=1, error=100%
233+
Output: b.y
234+
Filter: (b.y > 10)
235+
Rows Removed by Filter: 1000
236+
Using aqo: true
237+
AQO mode: LEARN
238+
JOINS: 0
239+
(20 rows)
240+

‎expected/unsupported.out

+1-1
Original file line numberDiff line numberDiff line change
@@ -381,7 +381,7 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
381381
-> Aggregate (actual rows=1 loops=1000)
382382
AQO not used
383383
-> Seq Scan on t t0 (actual rows=50 loops=1000)
384-
AQO not used
384+
AQO: rows=50, error=0%
385385
Filter: (x = t.x)
386386
Rows Removed by Filter: 950
387387
SubPlan 2

0 commit comments

Comments
 (0)