Skip to content

Commit 304b46f

Browse files
Alexandra Pervushinadanolivo
Alexandra Pervushina
authored andcommitted
Be more careful with locks of relations and syscaches in get_list_of_relids() routine
Switch on feature 'search on neigur feature spaces' by a GUC (disabled by default).
1 parent f023432 commit 304b46f

8 files changed

+102
-31
lines changed

‎aqo.c

+13
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,19 @@ _PG_init(void)
213213
NULL
214214
);
215215

216+
DefineCustomBoolVariable(
217+
"aqo.wide_search",
218+
"Search ML data in neigur feature spaces.",
219+
NULL,
220+
&use_wide_search,
221+
false,
222+
PGC_USERSET,
223+
0,
224+
NULL,
225+
NULL,
226+
NULL
227+
);
228+
216229
DefineCustomIntVariable("aqo.join_threshold",
217230
"Sets the threshold of number of JOINs in query beyond which AQO is used.",
218231
NULL,

‎aqo.h

+1
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,7 @@ extern bool force_collect_stat;
173173
extern bool aqo_show_hash;
174174
extern bool aqo_show_details;
175175
extern int aqo_join_threshold;
176+
extern bool use_wide_search;
176177

177178
/* Parameters for current query */
178179
typedef struct QueryContextData

‎aqo_shared.c

+14-4
Original file line numberDiff line numberDiff line change
@@ -191,16 +191,18 @@ aqo_init_shmem(void)
191191
{
192192
/* First time through ... */
193193

194-
LWLockInitialize(&aqo_state->lock, LWLockNewTrancheId());
195194
aqo_state->dsm_handler = DSM_HANDLE_INVALID;
196-
197195
aqo_state->qtexts_dsa_handler = DSM_HANDLE_INVALID;
196+
aqo_state->data_dsa_handler = DSM_HANDLE_INVALID;
197+
198198
aqo_state->qtext_trancheid = LWLockNewTrancheId();
199+
199200
aqo_state->qtexts_changed = false;
200-
aqo_state->data_dsa_handler = DSM_HANDLE_INVALID;
201+
aqo_state->stat_changed = false;
201202
aqo_state->data_changed = false;
202203
aqo_state->queries_changed = false;
203204

205+
LWLockInitialize(&aqo_state->lock, LWLockNewTrancheId());
204206
LWLockInitialize(&aqo_state->stat_lock, LWLockNewTrancheId());
205207
LWLockInitialize(&aqo_state->qtexts_lock, LWLockNewTrancheId());
206208
LWLockInitialize(&aqo_state->data_lock, LWLockNewTrancheId());
@@ -245,7 +247,7 @@ aqo_init_shmem(void)
245247
LWLockRegisterTranche(aqo_state->data_lock.tranche, "AQO Data Lock Tranche");
246248
LWLockRegisterTranche(aqo_state->queries_lock.tranche, "AQO Queries Lock Tranche");
247249

248-
if (!IsUnderPostmaster)
250+
if (!IsUnderPostmaster && !found)
249251
{
250252
before_shmem_exit(on_shmem_shutdown, (Datum) 0);
251253

@@ -261,8 +263,16 @@ aqo_init_shmem(void)
261263
static void
262264
on_shmem_shutdown(int code, Datum arg)
263265
{
266+
Assert(!IsUnderPostmaster);
267+
268+
/*
269+
* Save ML data to a permanent storage. Do it on postmaster shutdown only
270+
* to save time. We can't do so for query_texts and aqo_data because of DSM
271+
* limits.
272+
*/
264273
aqo_stat_flush();
265274
aqo_queries_flush();
275+
return;
266276
}
267277

268278
Size

‎cardinality_estimation.c

+4-1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@
2424
#include "machine_learning.h"
2525
#include "storage.h"
2626

27+
28+
bool use_wide_search = false;
29+
2730
#ifdef AQO_DEBUG_PRINT
2831
static void
2932
predict_debug_output(List *clauses, List *selectivities,
@@ -90,7 +93,7 @@ predict_for_relation(List *clauses, List *selectivities, List *relsigns,
9093
*/
9194

9295
/* Try to search in surrounding feature spaces for the same node */
93-
if (!load_aqo_data(query_context.fspace_hash, *fss, data, NULL, true))
96+
if (!load_aqo_data(query_context.fspace_hash, *fss, data, NULL, use_wide_search))
9497
result = -1;
9598
else
9699
{

‎conf.add

+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
autovacuum = off
22
shared_preload_libraries = 'postgres_fdw, aqo'
33
max_parallel_workers_per_gather = 1 # switch off parallel workers because of unsteadiness
4+
aqo.wide_search = 'on'

‎path_utils.c

+18-9
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,8 @@ hashTempTupleDesc(TupleDesc desc)
155155
return s;
156156
}
157157

158+
#include "storage/lmgr.h"
159+
158160
/*
159161
* Get list of relation indexes and prepare list of permanent table reloids,
160162
* list of temporary table reloids (can be changed between query launches) and
@@ -177,6 +179,8 @@ get_list_of_relids(PlannerInfo *root, Relids relids, RelSortOut *rels)
177179
HeapTuple htup;
178180
Form_pg_class classForm;
179181
char *relname = NULL;
182+
Oid relrewrite;
183+
char relpersistence;
180184

181185
entry = planner_rt_fetch(index, root);
182186

@@ -191,34 +195,39 @@ get_list_of_relids(PlannerInfo *root, Relids relids, RelSortOut *rels)
191195
if (!HeapTupleIsValid(htup))
192196
elog(PANIC, "cache lookup failed for reloid %u", entry->relid);
193197

198+
/* Copy the fields from syscache and release the slot as quickly as possible. */
194199
classForm = (Form_pg_class) GETSTRUCT(htup);
200+
relpersistence = classForm->relpersistence;
201+
relrewrite = classForm->relrewrite;
202+
relname = pstrdup(NameStr(classForm->relname));
203+
ReleaseSysCache(htup);
195204

196-
if (classForm->relpersistence == RELPERSISTENCE_TEMP)
205+
if (relpersistence == RELPERSISTENCE_TEMP)
197206
{
198207
/* The case of temporary table */
199208

200-
Relation trel = relation_open(entry->relid, NoLock);
201-
TupleDesc tdesc = RelationGetDescr(trel);
209+
Relation trel;
210+
TupleDesc tdesc;
202211

212+
trel = relation_open(entry->relid, NoLock);
213+
tdesc = RelationGetDescr(trel);
214+
Assert(CheckRelationLockedByMe(trel, AccessShareLock, true));
203215
hashes = lappend_uint64(hashes, hashTempTupleDesc(tdesc));
204216
relation_close(trel, NoLock);
205217
}
206218
else
207219
{
208220
/* The case of regular table */
209221
relname = quote_qualified_identifier(
210-
get_namespace_name(get_rel_namespace(entry->relid)),
211-
classForm->relrewrite ?
212-
get_rel_name(classForm->relrewrite) :
213-
NameStr(classForm->relname));
222+
get_namespace_name(get_rel_namespace(entry->relid)),
223+
relrewrite ? get_rel_name(relrewrite) : relname);
224+
214225
hashes = lappend_uint64(hashes, DatumGetInt64(hash_any_extended(
215226
(unsigned char *) relname,
216227
strlen(relname), 0)));
217228

218229
hrels = lappend_oid(hrels, entry->relid);
219230
}
220-
221-
ReleaseSysCache(htup);
222231
}
223232

224233
rels->hrels = list_concat(rels->hrels, hrels);

‎postprocessing.c

-2
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,6 @@ restore_selectivities(List *clauselist, List *relidslist, JoinType join_type,
171171
{
172172
List *lst = NIL;
173173
ListCell *l;
174-
int i = 0;
175174
bool parametrized_sel;
176175
int nargs;
177176
int *args_hash;
@@ -220,7 +219,6 @@ restore_selectivities(List *clauselist, List *relidslist, JoinType join_type,
220219
Assert(cur_sel > 0);
221220

222221
lst = lappend(lst, cur_sel);
223-
i++;
224222
}
225223

226224
if (parametrized_sel)

‎storage.c

+51-15
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,9 @@ aqo_stat_store(uint64 queryid, bool use_aqo,
302302
entry->exec_time[pos] = exec_time;
303303
entry->est_error[pos] = est_error;
304304
}
305+
305306
entry = memcpy(palloc(sizeof(StatEntry)), entry, sizeof(StatEntry));
307+
aqo_state->stat_changed = true;
306308
LWLockRelease(&aqo_state->stat_lock);
307309
return entry;
308310
}
@@ -424,14 +426,24 @@ aqo_stat_flush(void)
424426
int ret;
425427
long entries;
426428

427-
LWLockAcquire(&aqo_state->stat_lock, LW_SHARED);
429+
/* Use exclusive lock to prevent concurrent flushing in different backends. */
430+
LWLockAcquire(&aqo_state->stat_lock, LW_EXCLUSIVE);
431+
432+
if (!aqo_state->stat_changed)
433+
/* Hash table wasn't changed, meaningless to store it in permanent storage */
434+
goto end;
435+
428436
entries = hash_get_num_entries(stat_htab);
429437
hash_seq_init(&hash_seq, stat_htab);
430438
ret = data_store(PGAQO_STAT_FILE, _form_stat_record_cb, entries,
431439
(void *) &hash_seq);
432440
if (ret != 0)
433441
hash_seq_term(&hash_seq);
442+
else
443+
/* Hash table and disk storage are now consistent */
444+
aqo_state->stat_changed = false;
434445

446+
end:
435447
LWLockRelease(&aqo_state->stat_lock);
436448
}
437449

@@ -468,7 +480,7 @@ aqo_qtexts_flush(void)
468480
long entries;
469481

470482
dsa_init();
471-
LWLockAcquire(&aqo_state->qtexts_lock, LW_SHARED);
483+
LWLockAcquire(&aqo_state->qtexts_lock, LW_EXCLUSIVE);
472484

473485
if (!aqo_state->qtexts_changed)
474486
/* XXX: mull over forced mode. */
@@ -480,7 +492,9 @@ aqo_qtexts_flush(void)
480492
(void *) &hash_seq);
481493
if (ret != 0)
482494
hash_seq_term(&hash_seq);
483-
aqo_state->qtexts_changed = false;
495+
else
496+
/* Hash table and disk storage are now consistent */
497+
aqo_state->qtexts_changed = false;
484498

485499
end:
486500
LWLockRelease(&aqo_state->qtexts_lock);
@@ -530,7 +544,7 @@ aqo_data_flush(void)
530544
long entries;
531545

532546
dsa_init();
533-
LWLockAcquire(&aqo_state->data_lock, LW_SHARED);
547+
LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE);
534548

535549
if (!aqo_state->data_changed)
536550
/* XXX: mull over forced mode. */
@@ -547,6 +561,7 @@ aqo_data_flush(void)
547561
*/
548562
hash_seq_term(&hash_seq);
549563
else
564+
/* Hash table and disk storage are now consistent */
550565
aqo_state->data_changed = false;
551566
end:
552567
LWLockRelease(&aqo_state->data_lock);
@@ -573,14 +588,22 @@ aqo_queries_flush(void)
573588
int ret;
574589
long entries;
575590

576-
LWLockAcquire(&aqo_state->queries_lock, LW_SHARED);
591+
LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE);
592+
593+
if (!aqo_state->queries_changed)
594+
goto end;
595+
577596
entries = hash_get_num_entries(queries_htab);
578597
hash_seq_init(&hash_seq, queries_htab);
579598
ret = data_store(PGAQO_QUERIES_FILE, _form_queries_record_cb, entries,
580599
(void *) &hash_seq);
581600
if (ret != 0)
582601
hash_seq_term(&hash_seq);
602+
else
603+
/* Hash table and disk storage are now consistent */
604+
aqo_state->queries_changed = false;
583605

606+
end:
584607
LWLockRelease(&aqo_state->queries_lock);
585608
}
586609

@@ -620,7 +643,8 @@ data_store(const char *filename, form_record_t callback,
620643
goto error;
621644
}
622645

623-
(void) durable_rename(tmpfile, filename, LOG);
646+
/* Parallel (re)writing into a file haven't happen. */
647+
(void) durable_rename(tmpfile, filename, PANIC);
624648
elog(LOG, "[AQO] %d records stored in file %s.", counter, filename);
625649
return 0;
626650

@@ -838,7 +862,7 @@ aqo_queries_load(void)
838862

839863
LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE);
840864

841-
/* Load on postmaster sturtup. So no any concurrent actions possible here. */
865+
/* Load on postmaster startup. So no any concurrent actions possible here. */
842866
Assert(hash_get_num_entries(queries_htab) == 0);
843867

844868
data_load(PGAQO_QUERIES_FILE, _deform_queries_record_cb, NULL);
@@ -925,6 +949,9 @@ data_load(const char *filename, deform_record_t callback, void *ctx)
925949
static void
926950
on_shmem_shutdown(int code, Datum arg)
927951
{
952+
/*
953+
* XXX: It can be expensive to rewrite a file on each shutdown of a backend.
954+
*/
928955
aqo_qtexts_flush();
929956
aqo_data_flush();
930957
}
@@ -1200,6 +1227,7 @@ _aqo_data_remove(data_key *key)
12001227

12011228
if (hash_search(data_htab, key, HASH_REMOVE, NULL) == NULL)
12021229
elog(PANIC, "[AQO] Inconsistent data hash table");
1230+
12031231
aqo_state->data_changed = true;
12041232
}
12051233

@@ -1269,8 +1297,9 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids)
12691297
char *ptr;
12701298
ListCell *lc;
12711299
size_t size;
1272-
bool tblOverflow;
1273-
HASHACTION action;
1300+
bool tblOverflow;
1301+
HASHACTION action;
1302+
bool result;
12741303

12751304
Assert(!LWLockHeldByMe(&aqo_state->data_lock));
12761305

@@ -1321,7 +1350,6 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids)
13211350
}
13221351

13231352
Assert(DsaPointerIsValid(entry->data_dp));
1324-
Assert(entry->rows <= data->rows); /* Reserved for the future features */
13251353

13261354
if (entry->cols != data->cols || entry->nrels != list_length(reloids))
13271355
{
@@ -1387,8 +1415,9 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids)
13871415

13881416
aqo_state->data_changed = true;
13891417
end:
1418+
result = aqo_state->data_changed;
13901419
LWLockRelease(&aqo_state->data_lock);
1391-
return aqo_state->data_changed;
1420+
return result;
13921421
}
13931422

13941423
static void
@@ -1496,7 +1525,7 @@ load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids,
14961525

14971526
dsa_init();
14981527

1499-
LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE);
1528+
LWLockAcquire(&aqo_state->data_lock, LW_SHARED);
15001529

15011530
if (!wideSearch)
15021531
{
@@ -1631,7 +1660,8 @@ aqo_data(PG_FUNCTION_ARGS)
16311660
ptr += sizeof(data_key);
16321661

16331662
if (entry->cols > 0)
1634-
values[AD_FEATURES] = PointerGetDatum(form_matrix((double *)ptr, entry->rows, entry->cols));
1663+
values[AD_FEATURES] = PointerGetDatum(form_matrix((double *) ptr,
1664+
entry->rows, entry->cols));
16351665
else
16361666
nulls[AD_FEATURES] = true;
16371667

@@ -1719,7 +1749,9 @@ aqo_data_reset(void)
17191749
elog(ERROR, "[AQO] hash table corrupted");
17201750
num_remove++;
17211751
}
1722-
aqo_state->data_changed = true;
1752+
1753+
if (num_remove > 0)
1754+
aqo_state->data_changed = true;
17231755
LWLockRelease(&aqo_state->data_lock);
17241756
if (num_remove != num_entries)
17251757
elog(ERROR, "[AQO] Query ML memory storage is corrupted or parallel access without a lock has detected.");
@@ -1831,6 +1863,7 @@ aqo_queries_store(uint64 queryid,
18311863
entry->use_aqo = use_aqo;
18321864
entry->auto_tuning = auto_tuning;
18331865

1866+
aqo_state->queries_changed = true;
18341867
LWLockRelease(&aqo_state->queries_lock);
18351868
return true;
18361869
}
@@ -1856,7 +1889,10 @@ aqo_queries_reset(void)
18561889
elog(ERROR, "[AQO] hash table corrupted");
18571890
num_remove++;
18581891
}
1859-
aqo_state->queries_changed = true;
1892+
1893+
if (num_remove > 0)
1894+
aqo_state->queries_changed = true;
1895+
18601896
LWLockRelease(&aqo_state->queries_lock);
18611897

18621898
if (num_remove != num_entries - 1)

0 commit comments

Comments
 (0)