From 907df9caf14fcd9f8011845a7c8376654e124847 Mon Sep 17 00:00:00 2001 From: Corey Huinker Date: Tue, 17 Dec 2024 03:30:55 -0500 Subject: [PATCH 1/4] Add working input function for pg_ndistinct. This is needed to import extended statistics. --- src/backend/statistics/mvdistinct.c | 359 +++++++++++++++++- .../statistics/extended_stats_internal.h | 8 + src/test/regress/expected/stats_ext.out | 7 + src/test/regress/sql/stats_ext.sql | 3 + 4 files changed, 371 insertions(+), 6 deletions(-) diff --git a/src/backend/statistics/mvdistinct.c b/src/backend/statistics/mvdistinct.c index 7e7a63405c8b..e9c02aaa63ec 100644 --- a/src/backend/statistics/mvdistinct.c +++ b/src/backend/statistics/mvdistinct.c @@ -27,10 +27,19 @@ #include "catalog/pg_statistic_ext.h" #include "catalog/pg_statistic_ext_data.h" +#include "common/int.h" +#include "common/jsonapi.h" +#include "fmgr.h" #include "lib/stringinfo.h" +#include "mb/pg_wchar.h" +#include "nodes/miscnodes.h" +#include "nodes/pg_list.h" #include "statistics/extended_stats_internal.h" #include "statistics/statistics.h" +#include "utils/builtins.h" +#include "utils/float.h" #include "utils/fmgrprotos.h" +#include "utils/palloc.h" #include "utils/syscache.h" #include "utils/typcache.h" #include "varatt.h" @@ -328,23 +337,361 @@ statext_ndistinct_deserialize(bytea *data) return ndistinct; } +typedef struct +{ + const char *str; + bool found_only_object; + List *distinct_items; + Node *escontext; + + MVNDistinctItem *current_item; +} ndistinctParseState; + +/* + * Invoked at the start of each object in the JSON document. + * The entire JSON document should be one object with no sub-objects. + * + * If we're anywhere else in the document, it's an error. + */ +static JsonParseErrorType +ndistinct_object_start(void *state) +{ + ndistinctParseState *parse = state; + + if (parse->found_only_object == true) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("Must begin with \"{\""))); + return JSON_SEM_ACTION_FAILED; + } + + parse->found_only_object = true; + return JSON_SUCCESS; +} + +/* + * ndsitinct input format does not have arrays, so any array elements encountered + * are an error. + */ +static JsonParseErrorType +ndistinct_array_start(void *state) +{ + ndistinctParseState *parse = state; + + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("All ndistinct count values are scalar doubles."))); + return JSON_SEM_ACTION_FAILED; +} + +static int +attnum_compare(const void *aptr, const void *bptr) +{ + AttrNumber a = *(const AttrNumber *) aptr; + AttrNumber b = *(const AttrNumber *) bptr; + + return pg_cmp_s16(a,b); +} + +/* + * The object keys are themselves comma-separated lists of attnums + * with negative attnums representing one of the expressions defined + * in the extened statistics object. + */ +static JsonParseErrorType +ndistinct_object_field_start(void *state, char *fname, bool isnull) +{ + ndistinctParseState *parse = state; + char *token; + char *saveptr; + const char *delim = ", "; + char *scratch; + List *attnum_list = NIL; + int natts = 0; + MVNDistinctItem *item; + AttrNumber *attrsort; + + if (isnull || fname == NULL) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("All ndistinct attnum lists must be a comma separated list of attnums."))); + + return JSON_SEM_ACTION_FAILED; + } + + scratch = pstrdup(fname); + + token = strtok_r(scratch, delim, &saveptr); + + while (token != NULL) + { + attnum_list = lappend(attnum_list, (void *) token); + + token = strtok_r(NULL, delim, &saveptr); + } + natts = attnum_list->length; + + /* + * We need at least 2 attnums for a ndistinct item, anything less is + * malformed. + */ + if (natts < 2) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("All ndistinct attnum lists must be a comma separated list of attnums."))); + + return JSON_SEM_ACTION_FAILED; + } + + item = palloc(sizeof(MVNDistinctItem)); + item->nattributes = natts; + item->attributes = palloc0(natts * sizeof(AttrNumber)); + attrsort = palloc0(natts * sizeof(AttrNumber)); + + for (int i = 0; i < natts; i++) + { + char *s = (char *) attnum_list->elements[i].ptr_value; + + attrsort[i] = pg_strtoint16_safe(s, parse->escontext); + item->attributes[i] = attrsort[i]; + + if (SOFT_ERROR_OCCURRED(parse->escontext)) + return JSON_SEM_ACTION_FAILED; + } + + list_free(attnum_list); + pfree(scratch); + + qsort(attrsort,natts,sizeof(AttrNumber),attnum_compare); + for (int i = 1; i < natts; i++) + if (attrsort[i] == attrsort[i-1]) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("attnum list duplicate value found: %d", attrsort[i]))); + + return JSON_SEM_ACTION_FAILED; + } + + pfree(attrsort); + + /* add ndistinct-less MVNDistinctItem to the list */ + parse->current_item = item; + parse->distinct_items = lappend(parse->distinct_items, (void *) item); + return JSON_SUCCESS; +} + +/* + * ndsitinct input format does not have arrays, so any array elements encountered + * are an error. + */ +static JsonParseErrorType +ndistinct_array_element_start(void *state, bool isnull) +{ + ndistinctParseState *parse = state; + + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("Cannot contain array elements."))); + + return JSON_SEM_ACTION_FAILED; +} + +/* + * Handle scalar events from the ndistinct input parser. + * + * There is only one case where we will encounter a scalar, and that is the + * ndsitinct value for the previous object key. + */ +static JsonParseErrorType +ndistinct_scalar(void *state, char *token, JsonTokenType tokentype) +{ + ndistinctParseState *parse = state; + + /* if the entire json is just one scalar, that's wrong */ + if (parse->found_only_object != true) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("Must begin with \"{\""))); + + return JSON_SEM_ACTION_FAILED; + } + + Assert(parse->current_item != NULL); + + parse->current_item->ndistinct = float8in_internal(token, NULL, "double", + token, parse->escontext); + + if (SOFT_ERROR_OCCURRED(parse->escontext)) + return JSON_SEM_ACTION_FAILED; + + /* mark us done with this item */ + parse->current_item = NULL; + return JSON_SUCCESS; +} + /* * pg_ndistinct_in * input routine for type pg_ndistinct * - * pg_ndistinct is real enough to be a table column, but it has no - * operations of its own, and disallows input (just like pg_node_tree). + * example input: {"6, -1": 14, "6, -2": 9143, "-1, -2": 13454, "6, -1, -2": 14549} + * + * This import format is clearly a specific subset of JSON, therefore it makes + * sense to leverage those parsing utilities, and further validate it from there. */ Datum pg_ndistinct_in(PG_FUNCTION_ARGS) { - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot accept a value of type %s", "pg_ndistinct"))); + char *str = PG_GETARG_CSTRING(0); + + ndistinctParseState parse_state; + JsonParseErrorType result; + JsonLexContext *lex; + JsonSemAction sem_action; + + /* initialize semantic state */ + parse_state.str = str; + parse_state.found_only_object = false; + parse_state.distinct_items = NIL; + parse_state.escontext = fcinfo->context; + parse_state.current_item = NULL; + + /* set callbacks */ + sem_action.semstate = (void *) &parse_state; + sem_action.object_start = ndistinct_object_start; + sem_action.object_end = NULL; + sem_action.array_start = ndistinct_array_start; + sem_action.array_end = NULL; + sem_action.object_field_start = ndistinct_object_field_start; + sem_action.object_field_end = NULL; + sem_action.array_element_start = ndistinct_array_element_start; + sem_action.array_element_end = NULL; + sem_action.scalar = ndistinct_scalar; + + lex = makeJsonLexContextCstringLen(NULL, str, strlen(str), + PG_UTF8, true); + result = pg_parse_json(lex, &sem_action); + freeJsonLexContext(lex); + if (result == JSON_SUCCESS) + { + MVNDistinct *ndistinct; + int nitems = parse_state.distinct_items->length; + bytea *bytes; - PG_RETURN_VOID(); /* keep compiler quiet */ + ndistinct = palloc(offsetof(MVNDistinct, items) + + nitems * sizeof(MVNDistinctItem)); + + ndistinct->magic = STATS_NDISTINCT_MAGIC; + ndistinct->type = STATS_NDISTINCT_TYPE_BASIC; + ndistinct->nitems = nitems; + + for (int i = 0; i < nitems; i++) + { + MVNDistinctItem *item = parse_state.distinct_items->elements[i].ptr_value; + + ndistinct->items[i].ndistinct = item->ndistinct; + ndistinct->items[i].nattributes = item->nattributes; + ndistinct->items[i].attributes = item->attributes; + + /* + * free the MVNDistinctItem, but not the attributes we're still + * using + */ + pfree(item); + } + bytes = statext_ndistinct_serialize(ndistinct); + + list_free(parse_state.distinct_items); + for (int i = 0; i < nitems; i++) + pfree(ndistinct->items[i].attributes); + pfree(ndistinct); + + PG_RETURN_BYTEA_P(bytes); + } + else if (result == JSON_SEM_ACTION_FAILED) + PG_RETURN_NULL(); /* escontext already set */ + + /* Anything else is a generic JSON parse error */ + ereturn(parse_state.escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", str), + errdetail("Must be valid JSON."))); + PG_RETURN_NULL(); +} + +/* + * Free allocations of an MVNDistinct + */ +void +free_pg_ndistinct(MVNDistinct *ndistinct) +{ + for (int i = 0; i < ndistinct->nitems; i++) + pfree(ndistinct->items[i].attributes); + + pfree(ndistinct); +} + +/* + * Validate an MVNDistinct against the extended statistics object definition. + * + * Every MVNDistinctItem must be checked to ensure that the attnums in the + * attributes list correspond to attnums/expressions defined by the + * extended statistics object. + * + * Positive attnums are attributes which must be found in the stxkeys, + * while negative attnums correspond to an expr number, so the attnum + * can't be below (0 - numexprs). + */ +bool +pg_ndistinct_validate_items(MVNDistinct *ndistinct, int2vector *stxkeys, int numexprs, int elevel) +{ + int attnum_expr_lowbound = 0 - numexprs; + + for (int i = 0; i < ndistinct->nitems; i++) + { + MVNDistinctItem item = ndistinct->items[i]; + + for (int j = 0; j < item.nattributes; j++) + { + AttrNumber attnum = item.attributes[j]; + bool ok = false; + + if (attnum > 0) + { + for (int k = 0; k < stxkeys->dim1; k++) + if (attnum == stxkeys->values[k]) + { + ok = true; + break; + } + } + else if ((attnum < 0) && (attnum >= attnum_expr_lowbound)) + ok = true; + + if (!ok) + { + ereport(elevel, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("pg_ndistinct: invalid attnum for this statistics object: %d", attnum))); + return false; + } + } + } + return true; } + /* * pg_ndistinct * output routine for type pg_ndistinct diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h index efcb7dc35461..396915a8a978 100644 --- a/src/include/statistics/extended_stats_internal.h +++ b/src/include/statistics/extended_stats_internal.h @@ -127,4 +127,12 @@ extern Selectivity mcv_clause_selectivity_or(PlannerInfo *root, Selectivity *overlap_basesel, Selectivity *totalsel); +extern Datum import_mcvlist(HeapTuple tup, int elevel, int numattrs, + Oid *atttypids, int32 *atttypmods, Oid *atttypcolls, + int nitems, Datum *mcv_elems, bool *mcv_nulls, + bool *mcv_elem_nulls, float8 *freqs, float8 *base_freqs); +extern bool pg_ndistinct_validate_items(MVNDistinct *ndistinct, int2vector *stxkeys, + int numexprs, int elevel); +extern void free_pg_ndistinct(MVNDistinct *ndistinct); + #endif /* EXTENDED_STATS_INTERNAL_H */ diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out index 6359e5fb689c..8c893305353d 100644 --- a/src/test/regress/expected/stats_ext.out +++ b/src/test/regress/expected/stats_ext.out @@ -3358,6 +3358,13 @@ SELECT statistics_name, most_common_vals FROM pg_stats_ext_exprs x s_expr | {1} (2 rows) +-- new input functions +SELECT '{"6, -1": 14, "6, -2": 9143, "-1, -2": 13454, "6, -1, -2": 14549}'::pg_ndistinct; + pg_ndistinct +------------------------------------------------------------------- + {"6, -1": 14, "6, -2": 9143, "-1, -2": 13454, "6, -1, -2": 14549} +(1 row) + -- Tidy up DROP OPERATOR <<< (int, int); DROP FUNCTION op_leak(int, int); diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql index da4f2fe9c938..3cecccc8b3ba 100644 --- a/src/test/regress/sql/stats_ext.sql +++ b/src/test/regress/sql/stats_ext.sql @@ -1700,6 +1700,9 @@ SELECT statistics_name, most_common_vals FROM pg_stats_ext x SELECT statistics_name, most_common_vals FROM pg_stats_ext_exprs x WHERE tablename = 'stats_ext_tbl' ORDER BY ROW(x.*); +-- new input functions +SELECT '{"6, -1": 14, "6, -2": 9143, "-1, -2": 13454, "6, -1, -2": 14549}'::pg_ndistinct; + -- Tidy up DROP OPERATOR <<< (int, int); DROP FUNCTION op_leak(int, int); From 5f382b7e205139096281f200562fec066cedea43 Mon Sep 17 00:00:00 2001 From: Corey Huinker Date: Tue, 17 Dec 2024 19:47:43 -0500 Subject: [PATCH 2/4] Add working input function for pg_dependencies. This is needed to import extended statistics. --- src/backend/statistics/dependencies.c | 348 +++++++++++++++++++++++- src/test/regress/expected/stats_ext.out | 18 ++ src/test/regress/sql/stats_ext.sql | 6 + 3 files changed, 362 insertions(+), 10 deletions(-) diff --git a/src/backend/statistics/dependencies.c b/src/backend/statistics/dependencies.c index eb2fc4366b4a..ec26a2427e2f 100644 --- a/src/backend/statistics/dependencies.c +++ b/src/backend/statistics/dependencies.c @@ -13,18 +13,27 @@ */ #include "postgres.h" +#include "access/attnum.h" #include "access/htup_details.h" #include "catalog/pg_statistic_ext.h" #include "catalog/pg_statistic_ext_data.h" +#include "common/int.h" +#include "common/jsonapi.h" +#include "fmgr.h" #include "lib/stringinfo.h" +#include "mb/pg_wchar.h" +#include "nodes/miscnodes.h" #include "nodes/nodeFuncs.h" #include "nodes/nodes.h" #include "nodes/pathnodes.h" +#include "nodes/pg_list.h" #include "optimizer/clauses.h" #include "optimizer/optimizer.h" #include "parser/parsetree.h" #include "statistics/extended_stats_internal.h" #include "statistics/statistics.h" +#include "utils/builtins.h" +#include "utils/float.h" #include "utils/fmgroids.h" #include "utils/fmgrprotos.h" #include "utils/lsyscache.h" @@ -643,24 +652,343 @@ statext_dependencies_load(Oid mvoid, bool inh) return result; } +typedef struct +{ + const char *str; + bool found_only_object; + List *dependency_list; + Node *escontext; + + MVDependency *current_dependency; +} dependenciesParseState; + +/* + * Invoked at the start of each object in the JSON document. + * The entire JSON document should be one object with no sub-objects. + * + * If we're anywhere else in the document, it's an error. + */ +static JsonParseErrorType +dependencies_object_start(void *state) +{ + dependenciesParseState *parse = state; + + if (parse->found_only_object == true) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", parse->str), + errdetail("Must begin with \"{\""))); + return JSON_SEM_ACTION_FAILED; + } + + parse->found_only_object = true; + return JSON_SUCCESS; +} + +/* + * dependencies input format does not have arrays, so any array elements encountered + * are an error. + */ +static JsonParseErrorType +dependencies_array_start(void *state) +{ + dependenciesParseState *parse = state; + + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", parse->str), + errdetail("All dependencies count values are scalar doubles."))); + return JSON_SEM_ACTION_FAILED; +} + +static int +attnum_compare(const void *aptr, const void *bptr) +{ + AttrNumber a = *(const AttrNumber *) aptr; + AttrNumber b = *(const AttrNumber *) bptr; + + return pg_cmp_s16(a,b); +} + +/* + * The object keys are themselves comma-separated lists of attnums + * with negative attnums representing one of the expressions defined + * in the extened statistics object, followed by a => and a final attnum. + * + * example: "-1, 2 => -1" + */ +static JsonParseErrorType +dependencies_object_field_start(void *state, char *fname, bool isnull) +{ + dependenciesParseState *parse = state; + char *token; + char *saveptr; + const char *delim = ", "; + const char *arrow_delim = " => "; + char *scratch; + char *arrow_p; + char *after_arrow_p; + List *attnum_list = NIL; + int natts = 0; + AttrNumber final_attnum; + MVDependency *dep; + AttrNumber *attrsort; + + if (isnull || fname == NULL) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", parse->str), + errdetail("All dependencies attnum lists must be a comma separated list of attnums with a final => attnum."))); + + return JSON_SEM_ACTION_FAILED; + } + + scratch = pstrdup(fname); + + /* The subtring ' => ' must occur exactly once */ + arrow_p = strstr(scratch, arrow_delim); + if (arrow_p == NULL) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", parse->str), + errdetail("All dependencies attnum lists must be a comma separated list of attnums with a final => attnum."))); + + return JSON_SEM_ACTION_FAILED; + } + + /* + * Everything to the left of the arrow is the attribute list, so split + * that off into its own string. + * + * Everything to the right should be the lone target attribute. + */ + *arrow_p = '\0'; + + /* look for the character immediately beyond the delimiter we just found */ + after_arrow_p = arrow_p + strlen(arrow_delim); + + /* We should not find another arrow delim */ + if (strstr(after_arrow_p, arrow_delim) != NULL) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", parse->str), + errdetail("All dependencies attnum lists must be a comma separated list of attnums with a final => attnum."))); + + return JSON_SEM_ACTION_FAILED; + } + + /* what is left should be exactly one attnum */ + final_attnum = pg_strtoint16_safe(after_arrow_p, parse->escontext); + + if (SOFT_ERROR_OCCURRED(parse->escontext)) + return JSON_SEM_ACTION_FAILED; + + /* Left of the arrow is just regular attnums */ + token = strtok_r(scratch, delim, &saveptr); + + while (token != NULL) + { + attnum_list = lappend(attnum_list, (void *) token); + + token = strtok_r(NULL, delim, &saveptr); + } + natts = attnum_list->length; + + /* + * We need at least 2 attnums left of the arrow for a dependencies item, + * anything less is malformed. + */ + if (natts < 1) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", parse->str), + errdetail("All dependencies attnum lists must be a comma separated list of attnums."))); + + return JSON_SEM_ACTION_FAILED; + } + + /* + * Allocate enough space for the dependency, the attnums in the list, plus + * the final attnum + */ + dep = palloc0(offsetof(MVDependency, attributes) + ((natts + 1) * sizeof(AttrNumber))); + dep->nattributes = natts + 1; + dep->attributes[natts] = final_attnum; + + attrsort = palloc0(dep->nattributes * sizeof(AttrNumber)); + attrsort[natts] = final_attnum; + + for (int i = 0; i < natts; i++) + { + char *s = (char *) attnum_list->elements[i].ptr_value; + + attrsort[i] = pg_strtoint16_safe(s, parse->escontext); + dep->attributes[i] = attrsort[i]; + + if (SOFT_ERROR_OCCURRED(parse->escontext)) + return JSON_SEM_ACTION_FAILED; + } + + list_free(attnum_list); + pfree(scratch); + + qsort(attrsort,dep->nattributes,sizeof(AttrNumber),attnum_compare); + for (int i = 1; i < dep->nattributes; i++) + if (attrsort[i] == attrsort[i-1]) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", parse->str), + errdetail("attnum list duplicate value found: %d", attrsort[i]))); + + return JSON_SEM_ACTION_FAILED; + } + + pfree(attrsort); + + /* add dependencies-less MVdependenciesItem to the list */ + parse->current_dependency = dep; + parse->dependency_list = lappend(parse->dependency_list, (void *) dep); + return JSON_SUCCESS; +} + +/* + * ndsitinct input format does not have arrays, so any array elements encountered + * are an error. + */ +static JsonParseErrorType +dependencies_array_element_start(void *state, bool isnull) +{ + dependenciesParseState *parse = state; + + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", parse->str), + errdetail("Cannot contain array elements."))); + + return JSON_SEM_ACTION_FAILED; +} + +/* + * Handle scalar events from the dependencies input parser. + * + * There is only one case where we will encounter a scalar, and that is the + * dependency degree for the previous object key. + */ +static JsonParseErrorType +dependencies_scalar(void *state, char *token, JsonTokenType tokentype) +{ + dependenciesParseState *parse = state; + + /* if the entire json is just one scalar, that's wrong */ + if (parse->found_only_object != true) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", parse->str), + errdetail("Must begin with \"{\""))); + + return JSON_SEM_ACTION_FAILED; + } + + Assert(parse->current_dependency != NULL); + + parse->current_dependency->degree = float8in_internal(token, NULL, "double", + token, parse->escontext); + + if (SOFT_ERROR_OCCURRED(parse->escontext)) + return JSON_SEM_ACTION_FAILED; + + /* mark us done with this dependency */ + parse->current_dependency = NULL; + return JSON_SUCCESS; +} + /* * pg_dependencies_in - input routine for type pg_dependencies. * - * pg_dependencies is real enough to be a table column, but it has no operations - * of its own, and disallows input too + * example input: + * {"-2 => 6": 0.292508, + * "-2 => -1": 0.113999, + * "6, -2 => -1": 0.348479, + * "-1, -2 => 6": 0.839691} + * + * This import format is clearly a specific subset of JSON, therefore it makes + * sense to leverage those parsing utilities, and further validate it from there. */ Datum pg_dependencies_in(PG_FUNCTION_ARGS) { - /* - * pg_node_list stores the data in binary form and parsing text input is - * not needed, so disallow this. - */ - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot accept a value of type %s", "pg_dependencies"))); + char *str = PG_GETARG_CSTRING(0); + + dependenciesParseState parse_state; + JsonParseErrorType result; + JsonLexContext *lex; + JsonSemAction sem_action; + + /* initialize the semantic state */ + parse_state.str = str; + parse_state.found_only_object = false; + parse_state.dependency_list = NIL; + parse_state.escontext = fcinfo->context; + parse_state.current_dependency = NULL; + + /* set callbacks */ + sem_action.semstate = (void *) &parse_state; + sem_action.object_start = dependencies_object_start; + sem_action.object_end = NULL; + sem_action.array_start = dependencies_array_start; + sem_action.array_end = NULL; + sem_action.array_element_start = dependencies_array_element_start; + sem_action.array_element_end = NULL; + sem_action.object_field_start = dependencies_object_field_start; + sem_action.object_field_end = NULL; + sem_action.scalar = dependencies_scalar; + + lex = makeJsonLexContextCstringLen(NULL, str, strlen(str), PG_UTF8, true); + + result = pg_parse_json(lex, &sem_action); + freeJsonLexContext(lex); + + if (result == JSON_SUCCESS) + { + List *list = parse_state.dependency_list; + int ndeps = list->length; + MVDependencies *mvdeps; + bytea *bytes; + + mvdeps = palloc0(offsetof(MVDependencies, deps) + ndeps * sizeof(MVDependency)); + mvdeps->magic = STATS_DEPS_MAGIC; + mvdeps->type = STATS_DEPS_TYPE_BASIC; + mvdeps->ndeps = ndeps; + + /* copy MVDependency structs out of the list into the MVDependencies */ + for (int i = 0; i < ndeps; i++) + mvdeps->deps[i] = list->elements[i].ptr_value; + bytes = statext_dependencies_serialize(mvdeps); + + list_free(list); + for (int i = 0; i < ndeps; i++) + pfree(mvdeps->deps[i]); + pfree(mvdeps); + + PG_RETURN_BYTEA_P(bytes); + } + else if (result == JSON_SEM_ACTION_FAILED) + PG_RETURN_NULL(); - PG_RETURN_VOID(); /* keep compiler quiet */ + /* Anything else is a generic JSON parse error */ + ereturn(parse_state.escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", str), + errdetail("Must be valid JSON."))); + + PG_RETURN_NULL(); /* keep compiler quiet */ } /* diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out index 8c893305353d..6cc55b00f8d2 100644 --- a/src/test/regress/expected/stats_ext.out +++ b/src/test/regress/expected/stats_ext.out @@ -3365,6 +3365,24 @@ SELECT '{"6, -1": 14, "6, -2": 9143, "-1, -2": 13454, "6, -1, -2": 14549}'::pg_n {"6, -1": 14, "6, -2": 9143, "-1, -2": 13454, "6, -1, -2": 14549} (1 row) +-- can't have duplicates attnums in list +SELECT '{"6, -1": 14, "6, -2": 9143, "-1, -1": 13454, "6, -1, -2": 14549}'::pg_ndistinct; +ERROR: malformed pg_ndistinct: "{"6, -1": 14, "6, -2": 9143, "-1, -1": 13454, "6, -1, -2": 14549}" +LINE 1: SELECT '{"6, -1": 14, "6, -2": 9143, "-1, -1": 13454, "6, -1... + ^ +DETAIL: attnum list duplicate value found: -1 +SELECT '{"-2 => 6": 0.292508, "-2 => -1": 0.113999, "6, -2 => -1": 0.348479, "-1, -2 => 6": 0.839691}'::pg_dependencies; + pg_dependencies +----------------------------------------------------------------------------------------------- + {"-2 => 6": 0.292508, "-2 => -1": 0.113999, "6, -2 => -1": 0.348479, "-1, -2 => 6": 0.839691} +(1 row) + +-- can't have duplicates attnums in list +SELECT '{"6 => 6": 0.292508, "-2 => -1": 0.113999, "6, -2 => -1": 0.348479, "-1, -2 => 6": 0.839691}'::pg_dependencies; +ERROR: malformed pg_dependencies: "{"6 => 6": 0.292508, "-2 => -1": 0.113999, "6, -2 => -1": 0.348479, "-1, -2 => 6": 0.839691}" +LINE 1: SELECT '{"6 => 6": 0.292508, "-2 => -1": 0.113999, "6, -2 =>... + ^ +DETAIL: attnum list duplicate value found: 6 -- Tidy up DROP OPERATOR <<< (int, int); DROP FUNCTION op_leak(int, int); diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql index 3cecccc8b3ba..b033d7184fc4 100644 --- a/src/test/regress/sql/stats_ext.sql +++ b/src/test/regress/sql/stats_ext.sql @@ -1702,6 +1702,12 @@ SELECT statistics_name, most_common_vals FROM pg_stats_ext_exprs x -- new input functions SELECT '{"6, -1": 14, "6, -2": 9143, "-1, -2": 13454, "6, -1, -2": 14549}'::pg_ndistinct; +-- can't have duplicates attnums in list +SELECT '{"6, -1": 14, "6, -2": 9143, "-1, -1": 13454, "6, -1, -2": 14549}'::pg_ndistinct; + +SELECT '{"-2 => 6": 0.292508, "-2 => -1": 0.113999, "6, -2 => -1": 0.348479, "-1, -2 => 6": 0.839691}'::pg_dependencies; +-- can't have duplicates attnums in list +SELECT '{"6 => 6": 0.292508, "-2 => -1": 0.113999, "6, -2 => -1": 0.348479, "-1, -2 => 6": 0.839691}'::pg_dependencies; -- Tidy up DROP OPERATOR <<< (int, int); From c27e8af0395cd1321bd683304337dfe0c1020b0c Mon Sep 17 00:00:00 2001 From: Corey Huinker Date: Thu, 26 Dec 2024 05:02:06 -0500 Subject: [PATCH 3/4] Expose attribute statistics functions for use in extended_stats. Many of the operations of attribute stats have analogous operations in extended stats. * get_attr_stat_type() * init_empty_stats_tuple() * text_to_stavalues() * get_elem_stat_type() --- src/backend/statistics/attribute_stats.c | 24 +++++------------------- src/include/statistics/statistics.h | 17 +++++++++++++++++ 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/src/backend/statistics/attribute_stats.c b/src/backend/statistics/attribute_stats.c index ab198076401b..6d5006a13c1a 100644 --- a/src/backend/statistics/attribute_stats.c +++ b/src/backend/statistics/attribute_stats.c @@ -100,23 +100,9 @@ static struct StatsArgInfo cleararginfo[] = static bool attribute_statistics_update(FunctionCallInfo fcinfo); static Node *get_attr_expr(Relation rel, int attnum); -static void get_attr_stat_type(Oid reloid, AttrNumber attnum, - Oid *atttypid, int32 *atttypmod, - char *atttyptype, Oid *atttypcoll, - Oid *eq_opr, Oid *lt_opr); -static bool get_elem_stat_type(Oid atttypid, char atttyptype, - Oid *elemtypid, Oid *elem_eq_opr); -static Datum text_to_stavalues(const char *staname, FmgrInfo *array_in, Datum d, - Oid typid, int32 typmod, bool *ok); -static void set_stats_slot(Datum *values, bool *nulls, bool *replaces, - int16 stakind, Oid staop, Oid stacoll, - Datum stanumbers, bool stanumbers_isnull, - Datum stavalues, bool stavalues_isnull); static void upsert_pg_statistic(Relation starel, HeapTuple oldtup, Datum *values, bool *nulls, bool *replaces); static bool delete_pg_statistic(Oid reloid, AttrNumber attnum, bool stainherit); -static void init_empty_stats_tuple(Oid reloid, int16 attnum, bool inherited, - Datum *values, bool *nulls, bool *replaces); /* * Insert or Update Attribute Statistics @@ -568,7 +554,7 @@ get_attr_expr(Relation rel, int attnum) /* * Derive type information from the attribute. */ -static void +void get_attr_stat_type(Oid reloid, AttrNumber attnum, Oid *atttypid, int32 *atttypmod, char *atttyptype, Oid *atttypcoll, @@ -650,7 +636,7 @@ get_attr_stat_type(Oid reloid, AttrNumber attnum, /* * Derive element type information from the attribute type. */ -static bool +bool get_elem_stat_type(Oid atttypid, char atttyptype, Oid *elemtypid, Oid *elem_eq_opr) { @@ -690,7 +676,7 @@ get_elem_stat_type(Oid atttypid, char atttyptype, * to false. If the resulting array contains NULLs, raise a WARNING and set ok * to false. Otherwise, set ok to true. */ -static Datum +Datum text_to_stavalues(const char *staname, FmgrInfo *array_in, Datum d, Oid typid, int32 typmod, bool *ok) { @@ -743,7 +729,7 @@ text_to_stavalues(const char *staname, FmgrInfo *array_in, Datum d, Oid typid, * Find and update the slot with the given stakind, or use the first empty * slot. */ -static void +void set_stats_slot(Datum *values, bool *nulls, bool *replaces, int16 stakind, Oid staop, Oid stacoll, Datum stanumbers, bool stanumbers_isnull, @@ -867,7 +853,7 @@ delete_pg_statistic(Oid reloid, AttrNumber attnum, bool stainherit) /* * Initialize values and nulls for a new stats tuple. */ -static void +void init_empty_stats_tuple(Oid reloid, int16 attnum, bool inherited, Datum *values, bool *nulls, bool *replaces) { diff --git a/src/include/statistics/statistics.h b/src/include/statistics/statistics.h index 7dd0f9755454..a0ab4b7633c2 100644 --- a/src/include/statistics/statistics.h +++ b/src/include/statistics/statistics.h @@ -127,4 +127,21 @@ extern StatisticExtInfo *choose_best_statistics(List *stats, char requiredkind, int nclauses); extern HeapTuple statext_expressions_load(Oid stxoid, bool inh, int idx); +extern void get_attr_stat_type(Oid reloid, AttrNumber attnum, + Oid *atttypid, int32 *atttypmod, + char *atttyptype, Oid *atttypcoll, + Oid *eq_opr, Oid *lt_opr); +extern void init_empty_stats_tuple(Oid reloid, int16 attnum, bool inherited, + Datum *values, bool *nulls, bool *replaces); + +extern void set_stats_slot(Datum *values, bool *nulls, bool *replaces, + int16 stakind, Oid staop, Oid stacoll, + Datum stanumbers, bool stanumbers_isnull, + Datum stavalues, bool stavalues_isnull); + +extern Datum text_to_stavalues(const char *staname, FmgrInfo *array_in, Datum d, + Oid typid, int32 typmod, bool *ok); +extern bool get_elem_stat_type(Oid atttypid, char atttyptype, + Oid *elemtypid, Oid *elem_eq_opr); + #endif /* STATISTICS_H */ From 418aeba649c35498cbe705a32e7c1841a020dc20 Mon Sep 17 00:00:00 2001 From: Corey Huinker Date: Fri, 3 Jan 2025 13:43:29 -0500 Subject: [PATCH 4/4] Add extended statistics support functions. Add pg_restore_extended_stats() and pg_clear_extended_stats(). These functions closely mirror their relation and attribute counterparts, but for extended statistics (i.e. CREATE STATISTICS) objects. --- doc/src/sgml/func.sgml | 98 ++ src/backend/statistics/dependencies.c | 65 + src/backend/statistics/extended_stats.c | 1104 +++++++++++++++++ src/backend/statistics/mcv.c | 144 +++ src/backend/statistics/mvdistinct.c | 14 +- src/include/catalog/pg_proc.dat | 16 + .../statistics/extended_stats_internal.h | 8 + src/test/regress/expected/stats_ext.out | 13 + src/test/regress/expected/stats_import.out | 441 +++++++ src/test/regress/sql/stats_ext.sql | 4 + src/test/regress/sql/stats_import.sql | 305 +++++ 11 files changed, 2209 insertions(+), 3 deletions(-) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 574a544d9fa4..f09acea751be 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -30808,6 +30808,104 @@ postgres=# SELECT '0/0'::pg_lsn + pd.segment_number * ps.setting::int + :offset + + + + pg_restore_extended_stats + + pg_restore_extended_stats ( + VARIADIC kwargs "any" ) + boolean + + + Creates or updates statistics for statistics objects. Ordinarily, + these statistics are collected automatically or updated as a part of + or , so + it's not necessary to call this function. However, it is useful + after a restore to enable the optimizer to choose better plans if + ANALYZE has not been run yet. + + + The tracked statistics may change from version to version, so + arguments are passed as pairs of argname + and argvalue in the form: + + SELECT pg_restore_extended_stats( + 'arg1name', 'arg1value'::arg1type, + 'arg2name', 'arg2value'::arg2type, + 'arg3name', 'arg3value'::arg3type); + + + + For example, to set the n_distinct, + dependencies, and exprs + values for the statistics object myschema.mystatsobj: + + SELECT pg_restore_extended_stats( + 'statistics_schemaname', 'myschema'::name, + 'statistics_name', 'mytable'::name, + 'inherited', false, + 'n_distinct', '{"2, 3": 4, "2, -1": 4, "2, -2": 4, "3, -1": 4, "3, -2": 4}'::pg_ndistinct, + 'dependencies', '{"2 => 1": 1.000000, "2 => -1": 1.000000, "2 => -2": 1.000000}'::pg_dependencies + 'exprs', '{{0,4,-0.75,"{1}","{0.5}","{-1,0}",-0.6,NULL,NULL,NULL},{0.25,4,-0.5,"{2}","{0.5}",NULL,1,NULL,NULL,NULL}}'::text[]); + + + + The required arguments are statistics_schemaname with a value + of type name, which specifies the statistics object's schema; + statistics_name with a value of type name, which specifies + the name of the statistics object; and inherited, which + specifies whether the statistics include values from child tables. + Other arguments are the names and values of statistics corresponding + to columns in pg_stats_ext + . To accept statistics for any expressions in the extended statistics object, the + parameter exprs with a type text[] is available, the array + must be two dimensional with an outer array in length equal to the number of expressions in + the object, and the inner array elements for each of the statistical columns in pg_stats_ext_exprs, some + of which are themselves arrays. + + + Additionally, this function accepts argument name + version of type integer, which + specifies the server version from which the statistics originated. + This is anticipated to be helpful in porting statistics from older + versions of PostgreSQL. + + + Minor errors are reported as a WARNING and + ignored, and remaining statistics will still be restored. If all + specified statistics are successfully restored, returns + true, otherwise false. + + + The caller must have the MAINTAIN privilege on the + table or be the owner of the database. + + + + + + + + pg_clear_extended_stats + + pg_clear_extended_stats ( + statistics_schemaname name, + statistics_name name, + inherited boolean ) + void + + + Clears statistics for the given statistics object, as + though the object was newly created. + + + The caller must have the MAINTAIN privilege on + the table or be the owner of the database. + + + diff --git a/src/backend/statistics/dependencies.c b/src/backend/statistics/dependencies.c index ec26a2427e2f..db424d48d24b 100644 --- a/src/backend/statistics/dependencies.c +++ b/src/backend/statistics/dependencies.c @@ -337,6 +337,10 @@ dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency) return (n_supporting_rows * 1.0 / data->numrows); } + +void +free_pg_dependencies(MVDependencies *dependencies); + /* * detects functional dependencies between groups of columns * @@ -909,6 +913,55 @@ dependencies_scalar(void *state, char *token, JsonTokenType tokentype) return JSON_SUCCESS; } +/* + * Validate an MVDependencies against the extended statistics object definition. + * + * Every MVDependencies must be checked to ensure that the attnums in the + * attributes list correspond to attnums/expressions defined by the + * extended statistics object. + * + * Positive attnums are attributes which must be found in the stxkeys, + * while negative attnums correspond to an expr number, so the attnum + * can't be below (0 - numexprs). + */ +bool +pg_dependencies_validate_deps(MVDependencies *dependencies, int2vector *stxkeys, int numexprs, int elevel) +{ + int attnum_expr_lowbound = 0 - numexprs; + + for (int i = 0; i < dependencies->ndeps; i++) + { + MVDependency *dep = dependencies->deps[i]; + + for (int j = 0; j < dep->nattributes; j++) + { + AttrNumber attnum = dep->attributes[j]; + bool ok = false; + + if (attnum > 0) + { + for (int k = 0; k < stxkeys->dim1; k++) + if (attnum == stxkeys->values[k]) + { + ok = true; + break; + } + } + else if ((attnum < 0) && (attnum >= attnum_expr_lowbound)) + ok = true; + + if (!ok) + { + ereport(elevel, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("pg_dependencies: invalid attnum for this statistics object: %d", attnum))); + return false; + } + } + } + return true; +} + /* * pg_dependencies_in - input routine for type pg_dependencies. * @@ -991,6 +1044,18 @@ pg_dependencies_in(PG_FUNCTION_ARGS) PG_RETURN_NULL(); /* keep compiler quiet */ } +/* + * Free allocations of an MVNDistinct + */ +void +free_pg_dependencies(MVDependencies *dependencies) +{ + for (int i = 0; i < dependencies->ndeps; i++) + pfree(dependencies->deps[i]); + + pfree(dependencies); +} + /* * pg_dependencies - output routine for type pg_dependencies. */ diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c index a8b63ec0884a..ce883a01cc1e 100644 --- a/src/backend/statistics/extended_stats.c +++ b/src/backend/statistics/extended_stats.c @@ -18,11 +18,16 @@ #include "access/detoast.h" #include "access/genam.h" +#include "access/heapam.h" +#include "access/htup.h" #include "access/htup_details.h" #include "access/table.h" #include "catalog/indexing.h" +#include "catalog/pg_collation.h" #include "catalog/pg_statistic_ext.h" #include "catalog/pg_statistic_ext_data.h" +#include "catalog/pg_type_d.h" +#include "catalog/namespace.h" #include "commands/defrem.h" #include "commands/progress.h" #include "executor/executor.h" @@ -33,6 +38,7 @@ #include "pgstat.h" #include "postmaster/autovacuum.h" #include "statistics/extended_stats_internal.h" +#include "statistics/stat_utils.h" #include "statistics/statistics.h" #include "utils/acl.h" #include "utils/array.h" @@ -72,6 +78,71 @@ typedef struct StatExtEntry List *exprs; /* expressions */ } StatExtEntry; +enum extended_stats_argnum +{ + STATSCHEMA_ARG = 0, + STATNAME_ARG, + INHERITED_ARG, + NDISTINCT_ARG, + DEPENDENCIES_ARG, + MOST_COMMON_VALS_ARG, + MOST_COMMON_VAL_NULLS_ARG, + MOST_COMMON_FREQS_ARG, + MOST_COMMON_BASE_FREQS_ARG, + EXPRESSIONS_ARG, + NUM_EXTENDED_STATS_ARGS +}; + +static struct StatsArgInfo extarginfo[] = +{ + [STATSCHEMA_ARG] = {"statistics_schemaname", TEXTOID}, + [STATNAME_ARG] = {"statistics_name", TEXTOID}, + [INHERITED_ARG] = {"inherited", BOOLOID}, + [NDISTINCT_ARG] = {"n_distinct", PG_NDISTINCTOID}, + [DEPENDENCIES_ARG] = {"dependencies", PG_DEPENDENCIESOID}, + [MOST_COMMON_VALS_ARG] = {"most_common_vals", TEXTARRAYOID}, + [MOST_COMMON_VAL_NULLS_ARG] = {"most_common_val_nulls", BOOLARRAYOID}, + [MOST_COMMON_FREQS_ARG] = {"most_common_freqs", FLOAT8ARRAYOID}, + [MOST_COMMON_BASE_FREQS_ARG] = {"most_common_base_freqs", FLOAT8ARRAYOID}, + [EXPRESSIONS_ARG] = {"exprs", TEXTARRAYOID}, + [NUM_EXTENDED_STATS_ARGS] = {0} +}; + +/* + * NOTE: the RANGE_LENGTH & RANGE_BOUNDS stats are not yet reflected in any + * version of pg_stat_ext_exprs. + */ +enum extended_stats_exprs_element +{ + NULL_FRAC_ELEM = 0, + AVG_WIDTH_ELEM, + N_DISTINCT_ELEM, + MOST_COMMON_VALS_ELEM, + MOST_COMMON_FREQS_ELEM, + HISTOGRAM_BOUNDS_ELEM, + CORRELATION_ELEM, + MOST_COMMON_ELEMS_ELEM, + MOST_COMMON_ELEM_FREQS_ELEM, + ELEM_COUNT_HISTOGRAM_ELEM, + NUM_ATTRIBUTE_STATS_ELEMS +}; + +static struct StatsArgInfo extexprarginfo[] = +{ + [NULL_FRAC_ELEM] = {"null_frac", FLOAT4OID}, + [AVG_WIDTH_ELEM] = {"avg_width", INT4OID}, + [N_DISTINCT_ELEM] = {"n_distinct", FLOAT4OID}, + [MOST_COMMON_VALS_ELEM] = {"most_common_vals", TEXTOID}, + [MOST_COMMON_FREQS_ELEM] = {"most_common_freqs", FLOAT4ARRAYOID}, + [HISTOGRAM_BOUNDS_ELEM] = {"histogram_bounds", TEXTOID}, + [CORRELATION_ELEM] = {"correlation", FLOAT4OID}, + [MOST_COMMON_ELEMS_ELEM] = {"most_common_elems", TEXTOID}, + [MOST_COMMON_ELEM_FREQS_ELEM] = {"most_common_elem_freqs", FLOAT4ARRAYOID}, + [ELEM_COUNT_HISTOGRAM_ELEM] = {"elem_count_histogram", FLOAT4ARRAYOID}, + [NUM_ATTRIBUTE_STATS_ELEMS] = {0} +}; + +static bool extended_statistics_update(FunctionCallInfo fcinfo); static List *fetch_statentries_for_relation(Relation pg_statext, Oid relid); static VacAttrStats **lookup_var_attr_stats(Bitmapset *attrs, List *exprs, @@ -99,6 +170,28 @@ static StatsBuildData *make_build_data(Relation rel, StatExtEntry *stat, int numrows, HeapTuple *rows, VacAttrStats **stats, int stattarget); +static HeapTuple get_pg_statistic_ext(Relation pg_stext, Oid nspoid, + const char *stxname); +static bool delete_pg_statistic_ext_data(Oid stxoid, bool inherited); + +typedef struct +{ + bool ndistinct; + bool dependencies; + bool mcv; + bool expressions; +} stakindFlags; + +static void expand_stxkind(HeapTuple tup, stakindFlags * enabled); +static void upsert_pg_statistic_ext_data(Datum *values, bool *nulls, bool *replaces); +static bool check_mcvlist_array(ArrayType *arr, int argindex, + int required_ndimss, int mcv_length); +static Datum import_expressions(Relation pgsd, int numexprs, + Oid *atttypids, int32 *atttypmods, + Oid *atttypcolls, ArrayType *exprs_arr); +static bool text_to_float4(Datum input, Datum *output); +static bool text_to_int4(Datum input, Datum *output); + /* * Compute requested extended stats, using the rows sampled for the plain @@ -2631,3 +2724,1014 @@ make_build_data(Relation rel, StatExtEntry *stat, int numrows, HeapTuple *rows, return result; } + +static HeapTuple +get_pg_statistic_ext(Relation pg_stext, Oid nspoid, const char *stxname) +{ + ScanKeyData key[2]; + SysScanDesc scan; + HeapTuple tup; + Oid stxoid = InvalidOid; + + ScanKeyInit(&key[0], + Anum_pg_statistic_ext_stxname, + BTEqualStrategyNumber, + F_NAMEEQ, + CStringGetDatum(stxname)); + ScanKeyInit(&key[1], + Anum_pg_statistic_ext_stxnamespace, + BTEqualStrategyNumber, + F_OIDEQ, + ObjectIdGetDatum(nspoid)); + + /* + * Try to find matching pg_statistic_ext row. + */ + scan = systable_beginscan(pg_stext, + StatisticExtNameIndexId, + true, + NULL, + 2, + key); + + /* Unique index, either we get a tuple or we don't. */ + tup = systable_getnext(scan); + + if (HeapTupleIsValid(tup)) + stxoid = ((Form_pg_statistic_ext) GETSTRUCT(tup))->oid; + + systable_endscan(scan); + + if (!OidIsValid(stxoid)) + return NULL; + + return SearchSysCacheCopy1(STATEXTOID, ObjectIdGetDatum(stxoid)); +} + +/* + * Decode the stxkind column so that we know which stats types to expect. + */ +static void +expand_stxkind(HeapTuple tup, stakindFlags * enabled) +{ + Datum datum; + ArrayType *arr; + char *kinds; + + datum = SysCacheGetAttrNotNull(STATEXTOID, + tup, + Anum_pg_statistic_ext_stxkind); + arr = DatumGetArrayTypeP(datum); + if (ARR_NDIM(arr) != 1 || ARR_HASNULL(arr) || ARR_ELEMTYPE(arr) != CHAROID) + elog(ERROR, "stxkind is not a 1-D char array"); + + kinds = (char *) ARR_DATA_PTR(arr); + + for (int i = 0; i < ARR_DIMS(arr)[0]; i++) + if (kinds[i] == STATS_EXT_NDISTINCT) + enabled->ndistinct = true; + else if (kinds[i] == STATS_EXT_DEPENDENCIES) + enabled->dependencies = true; + else if (kinds[i] == STATS_EXT_MCV) + enabled->mcv = true; + else if (kinds[i] == STATS_EXT_EXPRESSIONS) + enabled->expressions = true; +} + +static void +upsert_pg_statistic_ext_data(Datum *values, bool *nulls, bool *replaces) +{ + Relation pg_stextdata; + HeapTuple stxdtup; + HeapTuple newtup; + + pg_stextdata = table_open(StatisticExtDataRelationId, RowExclusiveLock); + + stxdtup = SearchSysCache2(STATEXTDATASTXOID, + values[Anum_pg_statistic_ext_data_stxoid - 1], + values[Anum_pg_statistic_ext_data_stxdinherit - 1]); + + if (HeapTupleIsValid(stxdtup)) + { + newtup = heap_modify_tuple(stxdtup, + RelationGetDescr(pg_stextdata), + values, + nulls, + replaces); + CatalogTupleUpdate(pg_stextdata, &newtup->t_self, newtup); + ReleaseSysCache(stxdtup); + } + else + { + newtup = heap_form_tuple(RelationGetDescr(pg_stextdata), values, nulls); + CatalogTupleInsert(pg_stextdata, newtup); + } + + heap_freetuple(newtup); + + CommandCounterIncrement(); + + table_close(pg_stextdata, RowExclusiveLock); +} + +/* + * Insert or Update Extended Statistics + * + * Major errors, such as the table not existing, the statistics object not + * existing, or a permissions failure are always reported at ERROR. Other + * errors, such as a conversion failure on one statistic kind, are reported + * as WARNINGs, and other statistic kinds may still be updated. + */ +static bool +extended_statistics_update(FunctionCallInfo fcinfo) +{ + Oid nspoid; + char *nspname; + char *stxname; + bool inherited; + Relation pg_stext; + HeapTuple tup = NULL; + + stakindFlags enabled; + stakindFlags has; + + Form_pg_statistic_ext stxform; + + Datum values[Natts_pg_statistic_ext_data]; + bool nulls[Natts_pg_statistic_ext_data]; + bool replaces[Natts_pg_statistic_ext_data]; + + bool success = true; + + Datum exprdatum; + bool isnull; + List *exprs = NIL; + int numattnums = 0; + int numexprs = 0; + int numattrs = 0; + + /* arrays of type info, if we need them */ + Oid *atttypids = NULL; + int32 *atttypmods = NULL; + Oid *atttypcolls = NULL; + + memset(nulls, false, sizeof(nulls)); + memset(values, 0, sizeof(values)); + memset(replaces, 0, sizeof(replaces)); + memset(&enabled, 0, sizeof(enabled)); + + has.mcv = (!PG_ARGISNULL(MOST_COMMON_VALS_ARG) && + !PG_ARGISNULL(MOST_COMMON_VAL_NULLS_ARG) && + !PG_ARGISNULL(MOST_COMMON_FREQS_ARG) && + !PG_ARGISNULL(MOST_COMMON_BASE_FREQS_ARG)); + has.ndistinct = !PG_ARGISNULL(NDISTINCT_ARG); + has.dependencies = !PG_ARGISNULL(DEPENDENCIES_ARG); + has.expressions = !PG_ARGISNULL(EXPRESSIONS_ARG); + + if (RecoveryInProgress()) + { + ereport(WARNING, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("recovery is in progress"), + errhint("Statistics cannot be modified during recovery."))); + PG_RETURN_BOOL(false); + } + + stats_check_required_arg(fcinfo, extarginfo, STATSCHEMA_ARG); + nspname = TextDatumGetCString(PG_GETARG_DATUM(STATSCHEMA_ARG)); + stats_check_required_arg(fcinfo, extarginfo, STATNAME_ARG); + stxname = TextDatumGetCString(PG_GETARG_DATUM(STATNAME_ARG)); + stats_check_required_arg(fcinfo, extarginfo, INHERITED_ARG); + inherited = PG_GETARG_NAME(INHERITED_ARG); + + nspoid = get_namespace_oid(nspname, true); + if (nspoid == InvalidOid) + { + ereport(WARNING, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("Namespace \"%s\" not found.", stxname))); + PG_RETURN_BOOL(false); + } + + pg_stext = table_open(StatisticExtRelationId, RowExclusiveLock); + tup = get_pg_statistic_ext(pg_stext, nspoid, stxname); + + if (!HeapTupleIsValid(tup)) + { + table_close(pg_stext, RowExclusiveLock); + ereport(WARNING, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("Extended Statistics Object \"%s\".\"%s\" not found.", + get_namespace_name(nspoid), stxname))); + PG_RETURN_BOOL(false); + } + + stxform = (Form_pg_statistic_ext) GETSTRUCT(tup); + expand_stxkind(tup, &enabled); + numattnums = stxform->stxkeys.dim1; + + /* decode expression (if any) */ + exprdatum = SysCacheGetAttr(STATEXTOID, + tup, + Anum_pg_statistic_ext_stxexprs, + &isnull); + + if (!isnull) + { + char *s; + + s = TextDatumGetCString(exprdatum); + exprs = (List *) stringToNode(s); + pfree(s); + + /* + * Run the expressions through eval_const_expressions. This is not + * just an optimization, but is necessary, because the planner + * will be comparing them to similarly-processed qual clauses, and + * may fail to detect valid matches without this. We must not use + * canonicalize_qual, however, since these aren't qual + * expressions. + */ + exprs = (List *) eval_const_expressions(NULL, (Node *) exprs); + + /* May as well fix opfuncids too */ + fix_opfuncids((Node *) exprs); + } + numexprs = list_length(exprs); + numattrs = numattnums + numexprs; + + /* lock table */ + stats_lock_check_privileges(stxform->stxrelid); + + if (has.mcv) + { + if (!enabled.mcv) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("MCV parameters \"%s\", \"%s\", \"%s\", and \"%s\" were all " + "specified for extended statistics object that does not expect MCV ", + extarginfo[MOST_COMMON_VALS_ARG].argname, + extarginfo[MOST_COMMON_VAL_NULLS_ARG].argname, + extarginfo[MOST_COMMON_FREQS_ARG].argname, + extarginfo[MOST_COMMON_BASE_FREQS_ARG].argname))); + has.mcv = false; + success = false; + } + } + else + { + /* The MCV args must all be NULL */ + if (!PG_ARGISNULL(MOST_COMMON_VALS_ARG) || + !PG_ARGISNULL(MOST_COMMON_VAL_NULLS_ARG) || + !PG_ARGISNULL(MOST_COMMON_FREQS_ARG) || + !PG_ARGISNULL(MOST_COMMON_BASE_FREQS_ARG)) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("MCV parameters \"%s\", \"%s\", \"%s\", and \"%s\" must be all specified if any are specified", + extarginfo[MOST_COMMON_VALS_ARG].argname, + extarginfo[MOST_COMMON_VAL_NULLS_ARG].argname, + extarginfo[MOST_COMMON_FREQS_ARG].argname, + extarginfo[MOST_COMMON_BASE_FREQS_ARG].argname))); + success = false; + } + } + + if (has.ndistinct && !enabled.ndistinct) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Parameters \"%s\" was specified for extended statistics object " + "that does not expect \"%s\"", + extarginfo[NDISTINCT_ARG].argname, + extarginfo[NDISTINCT_ARG].argname))); + has.ndistinct = false; + success = false; + } + + if (has.dependencies && !enabled.dependencies) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Parameters \"%s\" was specified for extended statistics object " + "that does not expect \"%s\"", + extarginfo[DEPENDENCIES_ARG].argname, + extarginfo[DEPENDENCIES_ARG].argname))); + has.dependencies = false; + success = false; + } + + if (has.expressions && !enabled.expressions) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Parameters \"%s\" was specified for extended statistics object " + "that does not expect \"%s\"", + extarginfo[DEPENDENCIES_ARG].argname, + extarginfo[DEPENDENCIES_ARG].argname))); + has.expressions = false; + success = false; + } + + /* + * Either of these statsistic types requires that we supply + * semi-filled-out VacAttrStatP array. + * + * + * It is not possible to use the existing lookup_var_attr_stats() and + * examine_attribute() because these functions will skip attributes for + * which attstattarget is 0, and we may have stats to import for those + * attributes. + */ + if (has.mcv || has.expressions) + { + atttypids = palloc0(numattrs * sizeof(Oid)); + atttypmods = palloc0(numattrs * sizeof(int32)); + atttypcolls = palloc0(numattrs * sizeof(Oid)); + + for (int i = 0; i < numattnums; i++) + { + AttrNumber attnum = stxform->stxkeys.values[i]; + + Oid lt_opr; + Oid eq_opr; + char typetype; + + /* + * fetch attribute entries the same as are done for attribute + * stats + */ + get_attr_stat_type(stxform->stxrelid, + attnum, + &atttypids[i], + &atttypmods[i], + &typetype, + &atttypcolls[i], + <_opr, + &eq_opr); + } + + for (int i = numattnums; i < numattrs; i++) + { + Node *expr = list_nth(exprs, i - numattnums); + + atttypids[i] = exprType(expr); + atttypmods[i] = exprTypmod(expr); + atttypcolls[i] = exprCollation(expr); + + /* + * Duplicate logic from get_attr_stat_type + */ + + /* + * If it's a multirange, step down to the range type, as is done + * by multirange_typanalyze(). + */ + if (type_is_multirange(atttypids[i])) + atttypids[i] = get_multirange_range(atttypids[i]); + + /* + * Special case: collation for tsvector is DEFAULT_COLLATION_OID. + * See compute_tsvector_stats(). + */ + if (atttypids[i] == TSVECTOROID) + atttypcolls[i] = DEFAULT_COLLATION_OID; + + } + } + + /* Primary Key: cannot be NULL or replaced. */ + values[Anum_pg_statistic_ext_data_stxoid - 1] = ObjectIdGetDatum(stxform->oid); + values[Anum_pg_statistic_ext_data_stxdinherit - 1] = BoolGetDatum(inherited); + + if (has.ndistinct) + { + Datum ndistinct_datum = PG_GETARG_DATUM(NDISTINCT_ARG); + bytea *data = DatumGetByteaPP(ndistinct_datum); + MVNDistinct *ndistinct = statext_ndistinct_deserialize(data); + + if (pg_ndistinct_validate_items(ndistinct, &stxform->stxkeys, numexprs, WARNING)) + { + values[Anum_pg_statistic_ext_data_stxdndistinct - 1] = ndistinct_datum; + replaces[Anum_pg_statistic_ext_data_stxdndistinct - 1] = true; + } + else + { + nulls[Anum_pg_statistic_ext_data_stxdndistinct - 1] = true; + success = false; + } + + free_pg_ndistinct(ndistinct); + } + else + nulls[Anum_pg_statistic_ext_data_stxdndistinct - 1] = true; + + if (has.dependencies) + { + Datum dependencies_datum = PG_GETARG_DATUM(DEPENDENCIES_ARG); + bytea *data = DatumGetByteaPP(dependencies_datum); + MVDependencies *dependencies = statext_dependencies_deserialize(data); + + if (pg_dependencies_validate_deps(dependencies, &stxform->stxkeys, numexprs, WARNING)) + { + values[Anum_pg_statistic_ext_data_stxddependencies - 1] = dependencies_datum; + replaces[Anum_pg_statistic_ext_data_stxddependencies - 1] = true; + } + else + { + nulls[Anum_pg_statistic_ext_data_stxddependencies - 1] = true; + success = false; + } + + free_pg_dependencies(dependencies); + } + else + nulls[Anum_pg_statistic_ext_data_stxddependencies - 1] = true; + + if (has.mcv) + { + Datum datum; + ArrayType *mcv_arr = PG_GETARG_ARRAYTYPE_P(MOST_COMMON_VALS_ARG); + ArrayType *nulls_arr = PG_GETARG_ARRAYTYPE_P(MOST_COMMON_VAL_NULLS_ARG); + ArrayType *freqs_arr = PG_GETARG_ARRAYTYPE_P(MOST_COMMON_FREQS_ARG); + ArrayType *base_freqs_arr = PG_GETARG_ARRAYTYPE_P(MOST_COMMON_BASE_FREQS_ARG); + int nitems; + Datum *mcv_elems; + bool *mcv_nulls; + int check_nummcv; + + /* + * The mcv_arr is an array of arrays of text, and we use it as the + * reference array for checking the lengths of the other 3 arrays. + */ + if (ARR_NDIM(mcv_arr) != 2) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Parameters \"%s\" must be a text array of 2 dimensions.", + extarginfo[MOST_COMMON_VALS_ARG].argname))); + return (Datum) 0; + } + + nitems = ARR_DIMS(mcv_arr)[0]; + + /* fixed length arrays that cannot contain NULLs */ + if (!check_mcvlist_array(nulls_arr, MOST_COMMON_VAL_NULLS_ARG, + 2, nitems) || + !check_mcvlist_array(freqs_arr, MOST_COMMON_FREQS_ARG, + 1, nitems) || + !check_mcvlist_array(base_freqs_arr, MOST_COMMON_BASE_FREQS_ARG, + 1, nitems)) + return (Datum) 0; + + + deconstruct_array_builtin(mcv_arr, TEXTOID, &mcv_elems, + &mcv_nulls, &check_nummcv); + + Assert(check_nummcv == (nitems * numattrs)); + + datum = import_mcvlist(tup, WARNING, numattrs, + atttypids, atttypmods, atttypcolls, + nitems, mcv_elems, mcv_nulls, + (bool *) ARR_DATA_PTR(nulls_arr), + (float8 *) ARR_DATA_PTR(freqs_arr), + (float8 *) ARR_DATA_PTR(base_freqs_arr)); + + values[Anum_pg_statistic_ext_data_stxdmcv - 1] = datum; + replaces[Anum_pg_statistic_ext_data_stxdmcv - 1] = true; + } + else + nulls[Anum_pg_statistic_ext_data_stxdmcv - 1] = true; + + if (has.expressions) + { + Datum datum; + Relation pgsd; + + pgsd = table_open(StatisticRelationId, RowExclusiveLock); + + datum = import_expressions(pgsd, numexprs, + &atttypids[numattnums], &atttypmods[numattnums], + &atttypcolls[numattnums], + PG_GETARG_ARRAYTYPE_P(EXPRESSIONS_ARG)); + + table_close(pgsd, RowExclusiveLock); + + values[Anum_pg_statistic_ext_data_stxdexpr - 1] = datum; + replaces[Anum_pg_statistic_ext_data_stxdexpr - 1] = true; + } + else + nulls[Anum_pg_statistic_ext_data_stxdexpr - 1] = true; + + upsert_pg_statistic_ext_data(values, nulls, replaces); + + heap_freetuple(tup); + table_close(pg_stext, RowExclusiveLock); + + if (atttypids != NULL) + pfree(atttypids); + if (atttypmods != NULL) + pfree(atttypmods); + if (atttypcolls != NULL) + pfree(atttypcolls); + return success; +} + +/* + * Consistency checks to ensure that other mcvlist arrays are in alignment + * with the mcv array. + */ +static bool +check_mcvlist_array(ArrayType *arr, int argindex, int required_ndims, + int mcv_length) +{ + if (ARR_NDIM(arr) != required_ndims) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Parameter \"%s\" must be an array of %d dimensions.", + extarginfo[argindex].argname, required_ndims))); + return false; + } + + if (array_contains_nulls(arr)) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Array \"%s\" cannot contain NULLs.", + extarginfo[argindex].argname))); + return false; + } + + if (ARR_DIMS(arr)[0] != mcv_length) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Parameters \"%s\" must have the same number of elements as \"%s\"", + extarginfo[argindex].argname, + extarginfo[MOST_COMMON_VALS_ARG].argname))); + return false; + } + + return true; +} + +/* + * Create the stxdexprs datum using the user input in an array of array of + * text, referenced against the datatypes for the expressions. + */ +static Datum +import_expressions(Relation pgsd, int numexprs, + Oid *atttypids, int32 *atttypmods, + Oid *atttypcolls, ArrayType *exprs_arr) +{ + Datum *exprs_elems; + bool *exprs_nulls; + int check_numexprs; + int offset = 0; + + FmgrInfo array_in_fn; + + Oid pgstypoid = get_rel_type_id(StatisticRelationId); + + ArrayBuildState *astate = NULL; + + + if (ARR_NDIM(exprs_arr) != 2) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Parameter \"%s\" must be a text array of 2 dimensions.", + extarginfo[EXPRESSIONS_ARG].argname))); + return (Datum) 0; + } + + if (ARR_DIMS(exprs_arr)[0] != numexprs) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Parameter \"%s\" must have an outer dimension of %d elements.", + extarginfo[EXPRESSIONS_ARG].argname, numexprs))); + return (Datum) 0; + } + if (ARR_DIMS(exprs_arr)[1] != NUM_ATTRIBUTE_STATS_ELEMS) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Parameter \"%s\" must have an inner dimension of %d elements.", + extarginfo[EXPRESSIONS_ARG].argname, + NUM_ATTRIBUTE_STATS_ELEMS))); + return (Datum) 0; + } + + fmgr_info(F_ARRAY_IN, &array_in_fn); + + deconstruct_array_builtin(exprs_arr, TEXTOID, &exprs_elems, + &exprs_nulls, &check_numexprs); + + for (int i = 0; i < numexprs; i++) + { + Oid typid = atttypids[i]; + int32 typmod = atttypmods[i]; + Oid stacoll = atttypcolls[i]; + TypeCacheEntry *typcache; + + Oid elemtypid = InvalidOid; + Oid elem_eq_opr = InvalidOid; + + bool ok; + + Datum values[Natts_pg_statistic]; + bool nulls[Natts_pg_statistic]; + bool replaces[Natts_pg_statistic]; + + HeapTuple pgstup; + Datum pgstdat; + + /* finds the right operators even if atttypid is a domain */ + typcache = lookup_type_cache(typid, TYPECACHE_LT_OPR | TYPECACHE_EQ_OPR); + + init_empty_stats_tuple(InvalidOid, InvalidAttrNumber, false, + values, nulls, replaces); + + if (!exprs_nulls[offset + NULL_FRAC_ELEM]) + { + ok = text_to_float4(exprs_elems[offset + NULL_FRAC_ELEM], + &values[Anum_pg_statistic_stanullfrac - 1]); + + if (!ok) + { + char *s = TextDatumGetCString(exprs_elems[offset + NULL_FRAC_ELEM]); + + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Expression %s element \"%s\" does not match expected input type.", + extexprarginfo[NULL_FRAC_ELEM].argname, s))); + pfree(s); + return (Datum) 0; + } + } + + if (!exprs_nulls[offset + AVG_WIDTH_ELEM]) + { + ok = text_to_int4(exprs_elems[offset + AVG_WIDTH_ELEM], + &values[Anum_pg_statistic_stawidth - 1]); + + if (!ok) + { + char *s = TextDatumGetCString(exprs_elems[offset + NULL_FRAC_ELEM]); + + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Expression %s element \"%s\" does not match expected input type.", + extexprarginfo[AVG_WIDTH_ELEM].argname, s))); + pfree(s); + return (Datum) 0; + } + } + + if (!exprs_nulls[offset + N_DISTINCT_ELEM]) + { + ok = text_to_float4(exprs_elems[offset + N_DISTINCT_ELEM], + &values[Anum_pg_statistic_stadistinct - 1]); + + if (!ok) + { + char *s = TextDatumGetCString(exprs_elems[offset + NULL_FRAC_ELEM]); + + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Expression %s element \"%s\" does not match expected input type.", + extexprarginfo[N_DISTINCT_ELEM].argname, s))); + pfree(s); + return (Datum) 0; + } + } + + /* + * The STAKIND statistics are the same as the ones found in attribute + * stats. However, these are all derived from text columns, whereas + * the ones derived for attribute stats are a mix of datatypes. This + * limits the opportunities for code sharing between the two. + */ + + /* STATISTIC_KIND_MCV */ + if (exprs_nulls[offset + MOST_COMMON_VALS_ELEM] != + exprs_nulls[offset + MOST_COMMON_FREQS_ELEM]) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Expression %s and %s must both be NOT NULL or both NULL.", + extexprarginfo[MOST_COMMON_VALS_ELEM].argname, + extexprarginfo[MOST_COMMON_FREQS_ELEM].argname))); + return (Datum) 0; + } + + if (!exprs_nulls[offset + MOST_COMMON_VALS_ELEM]) + { + Datum stavalues; + Datum stanumbers; + + stavalues = text_to_stavalues(extexprarginfo[MOST_COMMON_VALS_ELEM].argname, + &array_in_fn, exprs_elems[offset + MOST_COMMON_VALS_ELEM], + typid, typmod, &ok); + + if (!ok) + return (Datum) 0; + + stanumbers = text_to_stavalues(extexprarginfo[MOST_COMMON_VALS_ELEM].argname, + &array_in_fn, exprs_elems[offset + MOST_COMMON_FREQS_ELEM], + FLOAT4OID, -1, &ok); + + if (!ok) + return (Datum) 0; + + set_stats_slot(values, nulls, replaces, + STATISTIC_KIND_MCV, + typcache->eq_opr, stacoll, + stanumbers, false, stavalues, false); + } + + /* STATISTIC_KIND_HISTOGRAM */ + if (!exprs_nulls[offset + HISTOGRAM_BOUNDS_ELEM]) + { + Datum stavalues; + + stavalues = text_to_stavalues(extexprarginfo[HISTOGRAM_BOUNDS_ELEM].argname, + &array_in_fn, exprs_elems[offset + HISTOGRAM_BOUNDS_ELEM], + typid, typmod, &ok); + + if (!ok) + return (Datum) 0; + + set_stats_slot(values, nulls, replaces, + STATISTIC_KIND_HISTOGRAM, + typcache->lt_opr, stacoll, + 0, true, stavalues, false); + } + + /* STATISTIC_KIND_CORRELATION */ + if (!exprs_nulls[offset + CORRELATION_ELEM]) + { + Datum corr[] = {(Datum) 0}; + ArrayType *arry; + Datum stanumbers; + + ok = text_to_float4(exprs_elems[offset + CORRELATION_ELEM], &corr[0]); + + if (!ok) + { + char *s = TextDatumGetCString(exprs_elems[offset + CORRELATION_ELEM]); + + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Expression %s element \"%s\" does not match expected input type.", + extexprarginfo[CORRELATION_ELEM].argname, s))); + return (Datum) 0; + } + + arry = construct_array_builtin(corr, 1, FLOAT4OID); + + stanumbers = PointerGetDatum(arry); + + set_stats_slot(values, nulls, replaces, + STATISTIC_KIND_CORRELATION, + typcache->lt_opr, stacoll, + stanumbers, false, 0, true); + } + + /* STATISTIC_KIND_MCELEM */ + if (exprs_nulls[offset + MOST_COMMON_ELEMS_ELEM] != + exprs_nulls[offset + MOST_COMMON_ELEM_FREQS_ELEM]) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Expression %s and %s must both be NOT NULL or both NULL.", + extexprarginfo[MOST_COMMON_ELEMS_ELEM].argname, + extexprarginfo[MOST_COMMON_ELEM_FREQS_ELEM].argname))); + return (Datum) 0; + } + + /* + * We only need to fetch element type and eq operator if we have a + * stat of type MCELEM or DECHIST. + */ + if (!exprs_nulls[offset + MOST_COMMON_ELEMS_ELEM] || + !exprs_nulls[offset + ELEM_COUNT_HISTOGRAM_ELEM]) + { + if (!get_elem_stat_type(typid, typcache->typtype, + &elemtypid, &elem_eq_opr)) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + (errmsg("unable to determine element type of expression")))); + return (Datum) 0; + } + } + + if (!exprs_nulls[offset + MOST_COMMON_ELEMS_ELEM]) + { + Datum stavalues; + Datum stanumbers; + + stavalues = text_to_stavalues(extexprarginfo[MOST_COMMON_ELEMS_ELEM].argname, + &array_in_fn, + exprs_elems[offset + MOST_COMMON_ELEMS_ELEM], + elemtypid, typmod, &ok); + + if (!ok) + return (Datum) 0; + + stanumbers = text_to_stavalues(extexprarginfo[MOST_COMMON_ELEM_FREQS_ELEM].argname, + &array_in_fn, + exprs_elems[offset + MOST_COMMON_ELEM_FREQS_ELEM], + FLOAT4OID, -1, &ok); + + if (!ok) + return (Datum) 0; + + set_stats_slot(values, nulls, replaces, + STATISTIC_KIND_MCELEM, + elem_eq_opr, stacoll, + stanumbers, false, stavalues, false); + } + + if (!exprs_nulls[offset + ELEM_COUNT_HISTOGRAM_ELEM]) + { + Datum stanumbers; + + stanumbers = text_to_stavalues(extexprarginfo[ELEM_COUNT_HISTOGRAM_ELEM].argname, + &array_in_fn, + exprs_elems[offset + ELEM_COUNT_HISTOGRAM_ELEM], + FLOAT4OID, -1, &ok); + + if (!ok) + return (Datum) 0; + + set_stats_slot(values, nulls, replaces, STATISTIC_KIND_DECHIST, + elem_eq_opr, stacoll, + stanumbers, false, 0, true); + } + + /* + * Currently there are no extended stats exports of the statistic + * kinds STATISTIC_KIND_BOUNDS_HISTOGRAM or + * STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM so these cannot be imported. + * These may be added in the future. + */ + + pgstup = heap_form_tuple(RelationGetDescr(pgsd), values, nulls); + pgstdat = heap_copy_tuple_as_datum(pgstup, RelationGetDescr(pgsd)); + astate = accumArrayResult(astate, pgstdat, false, pgstypoid, + CurrentMemoryContext); + + offset += NUM_ATTRIBUTE_STATS_ELEMS; + } + + pfree(exprs_elems); + pfree(exprs_nulls); + + return makeArrayResult(astate, CurrentMemoryContext); +} + +static bool +text_to_float4(Datum input, Datum *output) +{ + ErrorSaveContext escontext = {T_ErrorSaveContext}; + + char *s; + bool ok; + + s = TextDatumGetCString(input); + ok = DirectInputFunctionCallSafe(float4in, s, InvalidOid, -1, + (Node *) &escontext, output); + + pfree(s); + return ok; +} + + +static bool +text_to_int4(Datum input, Datum *output) +{ + ErrorSaveContext escontext = {T_ErrorSaveContext}; + + char *s; + bool ok; + + s = TextDatumGetCString(input); + ok = DirectInputFunctionCallSafe(int4in, s, InvalidOid, -1, + (Node *) &escontext, output); + + pfree(s); + return ok; +} + +static bool +delete_pg_statistic_ext_data(Oid stxoid, bool inherited) +{ + Relation sed = table_open(StatisticExtDataRelationId, RowExclusiveLock); + HeapTuple oldtup; + bool result = false; + + /* Is there already a pg_statistic tuple for this attribute? */ + oldtup = SearchSysCache2(STATEXTDATASTXOID, + ObjectIdGetDatum(stxoid), + BoolGetDatum(inherited)); + + if (HeapTupleIsValid(oldtup)) + { + CatalogTupleDelete(sed, &oldtup->t_self); + ReleaseSysCache(oldtup); + result = true; + } + + table_close(sed, RowExclusiveLock); + + CommandCounterIncrement(); + + return result; +} + +Datum +pg_restore_extended_stats(PG_FUNCTION_ARGS) +{ + LOCAL_FCINFO(positional_fcinfo, NUM_EXTENDED_STATS_ARGS); + bool result = true; + + InitFunctionCallInfoData(*positional_fcinfo, NULL, NUM_EXTENDED_STATS_ARGS, + InvalidOid, NULL, NULL); + + if (!stats_fill_fcinfo_from_arg_pairs(fcinfo, positional_fcinfo, extarginfo)) + result = false; + + if (!extended_statistics_update(positional_fcinfo)) + result = false; + + PG_RETURN_BOOL(result); +} + +/* + * Delete statistics for the given statistics object. + */ +Datum +pg_clear_extended_stats(PG_FUNCTION_ARGS) +{ + char *nspname; + Oid nspoid; + char *stxname; + bool inherited; + Relation pg_stext; + HeapTuple tup; + + Form_pg_statistic_ext stxform; + + stats_check_required_arg(fcinfo, extarginfo, STATSCHEMA_ARG); + nspname = TextDatumGetCString(PG_GETARG_DATUM(STATSCHEMA_ARG)); + stats_check_required_arg(fcinfo, extarginfo, STATNAME_ARG); + stxname = TextDatumGetCString(PG_GETARG_DATUM(STATNAME_ARG)); + stats_check_required_arg(fcinfo, extarginfo, INHERITED_ARG); + inherited = PG_GETARG_NAME(INHERITED_ARG); + + nspoid = get_namespace_oid(nspname, true); + if (nspoid == InvalidOid) + { + ereport(WARNING, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("Namespace \"%s\" not found.", stxname))); + PG_RETURN_VOID(); + } + + if (RecoveryInProgress()) + { + ereport(WARNING, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("recovery is in progress"), + errhint("Statistics cannot be modified during recovery."))); + PG_RETURN_VOID(); + } + + pg_stext = table_open(StatisticExtRelationId, RowExclusiveLock); + tup = get_pg_statistic_ext(pg_stext, nspoid, stxname); + + if (!HeapTupleIsValid(tup)) + { + table_close(pg_stext, RowExclusiveLock); + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("Extended Statistics Object \"%s\".\"%s\" not found.", + nspname, stxname))); + PG_RETURN_VOID(); + } + + stxform = (Form_pg_statistic_ext) GETSTRUCT(tup); + + stats_lock_check_privileges(stxform->stxrelid); + + delete_pg_statistic_ext_data(stxform->oid, inherited); + heap_freetuple(tup); + table_close(pg_stext, RowExclusiveLock); + + PG_RETURN_VOID(); +} diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c index d98cda698d94..73f78e060785 100644 --- a/src/backend/statistics/mcv.c +++ b/src/backend/statistics/mcv.c @@ -2173,3 +2173,147 @@ mcv_clause_selectivity_or(PlannerInfo *root, StatisticExtInfo *stat, return s; } + +/* + * The MCV is an array of records, but this is expected as 4 separate arrays. + * It is not possible to have a generic input function for pg_mcv_list + * because the most_common_values is a composite type with element types + * defined by the specific statistics object. + */ +Datum +import_mcvlist(HeapTuple tup, int elevel, int numattrs, Oid *atttypids, + int32 *atttypmods, Oid *atttypcolls, int nitems, + Datum *mcv_elems, bool *mcv_nulls, + bool *mcv_elem_nulls, float8 *freqs, float8 *base_freqs) +{ + MCVList *mcvlist; + bytea *bytes; + + HeapTuple *vatuples; + VacAttrStats **vastats; + + /* + * Allocate the MCV list structure, set the global parameters. + */ + mcvlist = (MCVList *) palloc0(offsetof(MCVList, items) + + (sizeof(MCVItem) * nitems)); + + mcvlist->magic = STATS_MCV_MAGIC; + mcvlist->type = STATS_MCV_TYPE_BASIC; + mcvlist->ndimensions = numattrs; + mcvlist->nitems = nitems; + + /* Set the values for the 1-D arrays and allocate space for the 2-D arrays */ + for (int i = 0; i < nitems; i++) + { + MCVItem *item = &mcvlist->items[i]; + + item->frequency = freqs[i]; + item->base_frequency = base_freqs[i]; + item->values = (Datum *) palloc0(sizeof(Datum) * numattrs); + item->isnull = (bool *) palloc0(sizeof(bool) * numattrs); + } + + /* Walk through each dimension */ + for (int j = 0; j < numattrs; j++) + { + FmgrInfo finfo; + Oid ioparam; + Oid infunc; + int index = j; + + getTypeInputInfo(atttypids[j], &infunc, &ioparam); + fmgr_info(infunc, &finfo); + + /* store info about data type OIDs */ + mcvlist->types[j] = atttypids[j]; + + for (int i = 0; i < nitems; i++) + { + MCVItem *item = &mcvlist->items[i]; + + /* These should be in agreement, but just to be safe check both */ + if (mcv_elem_nulls[index] || mcv_nulls[index]) + { + item->values[j] = (Datum) 0; + item->isnull[j] = true; + } + else + { + char *s = TextDatumGetCString(mcv_elems[index]); + ErrorSaveContext escontext = {T_ErrorSaveContext}; + + if (!InputFunctionCallSafe(&finfo, s, ioparam, atttypmods[j], + (fmNodePtr) &escontext, &item->values[j])) + { + ereport(elevel, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("MCV elemement \"%s\" does not match expected input type.", s))); + return (Datum) 0; + } + + pfree(s); + } + + index += numattrs; + } + } + + /* + * The function statext_mcv_serialize() requires an array of pointers to + * VacAttrStats records, but only a few fields within those records have + * to be filled out. + */ + vastats = (VacAttrStats **) palloc0(numattrs * sizeof(VacAttrStats)); + vatuples = (HeapTuple *) palloc0(numattrs * sizeof(HeapTuple)); + + for (int i = 0; i < numattrs; i++) + { + Oid typid = atttypids[i]; + HeapTuple typtuple; + + typtuple = SearchSysCacheCopy1(TYPEOID, ObjectIdGetDatum(typid)); + + if (!HeapTupleIsValid(typtuple)) + elog(ERROR, "cache lookup failed for type %u", typid); + + vatuples[i] = typtuple; + + vastats[i] = palloc0(sizeof(VacAttrStats)); + + vastats[i]->attrtype = (Form_pg_type) GETSTRUCT(typtuple); + vastats[i]->attrtypid = typid; + vastats[i]->attrcollid = atttypcolls[i]; + } + + bytes = statext_mcv_serialize(mcvlist, vastats); + + for (int i = 0; i < numattrs; i++) + { + pfree(vatuples[i]); + pfree(vastats[i]); + } + pfree((void *) vatuples); + pfree((void *) vastats); + + if (bytes == NULL) + { + ereport(elevel, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Unable to import mcv list"))); + return (Datum) 0; + } + + for (int i = 0; i < nitems; i++) + { + MCVItem *item = &mcvlist->items[i]; + + pfree(item->values); + pfree(item->isnull); + } + pfree(mcvlist); + pfree(mcv_elems); + pfree(mcv_nulls); + + return PointerGetDatum(bytes); +} diff --git a/src/backend/statistics/mvdistinct.c b/src/backend/statistics/mvdistinct.c index e9c02aaa63ec..df56e3808aa5 100644 --- a/src/backend/statistics/mvdistinct.c +++ b/src/backend/statistics/mvdistinct.c @@ -516,6 +516,7 @@ static JsonParseErrorType ndistinct_scalar(void *state, char *token, JsonTokenType tokentype) { ndistinctParseState *parse = state; + int64 ndistinct; /* if the entire json is just one scalar, that's wrong */ if (parse->found_only_object != true) @@ -530,12 +531,19 @@ ndistinct_scalar(void *state, char *token, JsonTokenType tokentype) Assert(parse->current_item != NULL); - parse->current_item->ndistinct = float8in_internal(token, NULL, "double", - token, parse->escontext); + /* + * While the structure dictates that ndistinct in a double precision floating + * point, in practice it has always been an integer, and it is output as such. + * Therefore, we follow usage precendent over the actual storage structure, + * and read it in as an integer. + */ + ndistinct = pg_strtoint64_safe(token, parse->escontext); if (SOFT_ERROR_OCCURRED(parse->escontext)) return JSON_SEM_ACTION_FAILED; + parse->current_item->ndistinct = (double) ndistinct; + /* mark us done with this item */ parse->current_item = NULL; return JSON_SUCCESS; @@ -650,7 +658,7 @@ free_pg_ndistinct(MVNDistinct *ndistinct) * extended statistics object. * * Positive attnums are attributes which must be found in the stxkeys, - * while negative attnums correspond to an expr number, so the attnum + * while negative attnums correspond to an expr number, so the attnum * can't be below (0 - numexprs). */ bool diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 62beb71da288..661f03d9dc25 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -12556,6 +12556,22 @@ proname => 'gist_stratnum_common', prorettype => 'int2', proargtypes => 'int4', prosrc => 'gist_stratnum_common' }, +{ oid => '9947', + descr => 'restore statistics on extended statistics object', + proname => 'pg_restore_extended_stats', provolatile => 'v', proisstrict => 'f', + provariadic => 'any', + proparallel => 'u', prorettype => 'bool', + proargtypes => 'any', + proargnames => '{kwargs}', + proargmodes => '{v}', + prosrc => 'pg_restore_extended_stats' }, +{ oid => '9948', + descr => 'clear statistics on extended statistics object', + proname => 'pg_clear_extended_stats', provolatile => 'v', proisstrict => 'f', + proparallel => 'u', prorettype => 'void', + proargtypes => 'text text bool', + proargnames => '{statistics_schemaname,statistics_name,inherited}', + prosrc => 'pg_clear_extended_stats' }, # AIO related functions { oid => '9200', descr => 'information about in-progress asynchronous IOs', diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h index 396915a8a978..68862bb13045 100644 --- a/src/include/statistics/extended_stats_internal.h +++ b/src/include/statistics/extended_stats_internal.h @@ -126,6 +126,10 @@ extern Selectivity mcv_clause_selectivity_or(PlannerInfo *root, Selectivity *overlap_mcvsel, Selectivity *overlap_basesel, Selectivity *totalsel); +extern Datum import_mcvlist(HeapTuple tup, int elevel, int numattrs, + Oid *atttypids, int32 *atttypmods, Oid *atttypcolls, + int nitems, Datum *mcv_elems, bool *mcv_nulls, + bool *mcv_elem_nulls, float8 *freqs, float8 *base_freqs); extern Datum import_mcvlist(HeapTuple tup, int elevel, int numattrs, Oid *atttypids, int32 *atttypmods, Oid *atttypcolls, @@ -134,5 +138,9 @@ extern Datum import_mcvlist(HeapTuple tup, int elevel, int numattrs, extern bool pg_ndistinct_validate_items(MVNDistinct *ndistinct, int2vector *stxkeys, int numexprs, int elevel); extern void free_pg_ndistinct(MVNDistinct *ndistinct); +extern bool pg_dependencies_validate_deps(MVDependencies *dependencies, + int2vector *stxkeys, int numexprs, + int elevel); +extern void free_pg_dependencies(MVDependencies *dependencies); #endif /* EXTENDED_STATS_INTERNAL_H */ diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out index 6cc55b00f8d2..bda82d6de35b 100644 --- a/src/test/regress/expected/stats_ext.out +++ b/src/test/regress/expected/stats_ext.out @@ -3371,6 +3371,19 @@ ERROR: malformed pg_ndistinct: "{"6, -1": 14, "6, -2": 9143, "-1, -1": 13454, " LINE 1: SELECT '{"6, -1": 14, "6, -2": 9143, "-1, -1": 13454, "6, -1... ^ DETAIL: attnum list duplicate value found: -1 +-- can't use inf/-inf/NaN for ndistinct values. +SELECT '{"6, -1": "Inf", "6, -2": 9143, "-1, -2": 13454, "6, -1, -2": 14549}'::pg_ndistinct; +ERROR: invalid input syntax for type bigint: "Inf" +LINE 1: SELECT '{"6, -1": "Inf", "6, -2": 9143, "-1, -2": 13454, "6,... + ^ +SELECT '{"6, -1": "-Inf", "6, -2": 9143, "-1, -2": 13454, "6, -1, -2": 14549}'::pg_ndistinct; +ERROR: invalid input syntax for type bigint: "-Inf" +LINE 1: SELECT '{"6, -1": "-Inf", "6, -2": 9143, "-1, -2": 13454, "6... + ^ +SELECT '{"6, -1": "NaN", "6, -2": 9143, "-1, -2": 13454, "6, -1, -2": 14549}'::pg_ndistinct; +ERROR: invalid input syntax for type bigint: "NaN" +LINE 1: SELECT '{"6, -1": "NaN", "6, -2": 9143, "-1, -2": 13454, "6,... + ^ SELECT '{"-2 => 6": 0.292508, "-2 => -1": 0.113999, "6, -2 => -1": 0.348479, "-1, -2 => 6": 0.839691}'::pg_dependencies; pg_dependencies ----------------------------------------------------------------------------------------------- diff --git a/src/test/regress/expected/stats_import.out b/src/test/regress/expected/stats_import.out index 48d6392b4ad4..330ee5799a73 100644 --- a/src/test/regress/expected/stats_import.out +++ b/src/test/regress/expected/stats_import.out @@ -1084,11 +1084,15 @@ SELECT 3, 'tre', (3, 3.3, 'TRE', '2003-03-03', NULL)::stats_import.complex_type, UNION ALL SELECT 4, 'four', NULL, int4range(0,100), NULL; CREATE INDEX is_odd ON stats_import.test(((comp).a % 2 = 1)); +CREATE STATISTICS stats_import.test_stat ON name, comp, lower(arange), array_length(tags,1) +FROM stats_import.test; -- Generate statistics on table with data ANALYZE stats_import.test; CREATE TABLE stats_import.test_clone ( LIKE stats_import.test ) WITH (autovacuum_enabled = false); CREATE INDEX is_odd_clone ON stats_import.test_clone(((comp).a % 2 = 1)); +CREATE STATISTICS stats_import.test_stat_clone ON name, comp, lower(arange), array_length(tags,1) +FROM stats_import.test_clone; -- -- Copy stats from test to test_clone, and is_odd to is_odd_clone -- @@ -1342,6 +1346,443 @@ AND attname = 'i'; (1 row) DROP TABLE stats_temp; +-- set n_distinct using at attnum (1) that is not in the statistics object +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'n_distinct', '{"2, 3": 4, "2, -1": 4, "2, -2": 4, "3, -1": 4, "3, -2": 4, "-1, -2": 3, "2, 3, -1": 4, "2, 3, -2": 4, "2, -1, -2": 4, "3, -1, -2": 4, "1, 3, -1, -2": 4}'::pg_ndistinct + ); +WARNING: pg_ndistinct: invalid attnum for this statistics object: 1 + pg_restore_extended_stats +--------------------------- + f +(1 row) + +-- set n_distinct using at attnum that is 0 +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'n_distinct', '{"2, 3": 4, "2, -1": 4, "2, 0": 4, "3, -1": 4, "3, -2": 4, "-1, -2": 3, "2, 3, -1": 4, "2, 3, -2": 4, "2, -1, -2": 4, "3, -1, -2": 4, "2, 3, -1, -2": 4}'::pg_ndistinct + ); +WARNING: pg_ndistinct: invalid attnum for this statistics object: 0 + pg_restore_extended_stats +--------------------------- + f +(1 row) + +-- set n_distinct using at attnum that is outside the expression bounds(below -2) +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'n_distinct', '{"2, 3": 4, "2, -4": 4, "2, -2": 4, "3, -1": 4, "3, -2": 4, "-1, -2": 3, "2, 3, -1": 4, "2, 3, -2": 4, "2, -1, -2": 4, "3, -1, -2": 4, "1, 3, -1, -2": 4}'::pg_ndistinct + ); +WARNING: pg_ndistinct: invalid attnum for this statistics object: -4 + pg_restore_extended_stats +--------------------------- + f +(1 row) + +-- ok +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'n_distinct', '{"2, 3": 4, "2, -1": 4, "2, -2": 4, "3, -1": 4, "3, -2": 4, "-1, -2": 3, "2, 3, -1": 4, "2, 3, -2": 4, "2, -1, -2": 4, "3, -1, -2": 4, "2, 3, -1, -2": 4}'::pg_ndistinct + ); + pg_restore_extended_stats +--------------------------- + t +(1 row) + +SELECT + e.n_distinct, e.dependencies, e.most_common_vals, e.most_common_val_nulls, + e.most_common_freqs, e.most_common_base_freqs +FROM pg_stats_ext AS e +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'test_stat_clone' +AND e.inherited = false +\gx +-[ RECORD 1 ]----------+---------------------------------------------------------------------------------------------------------------------------------------------------------- +n_distinct | {"2, 3": 4, "2, -1": 4, "2, -2": 4, "3, -1": 4, "3, -2": 4, "-1, -2": 3, "2, 3, -1": 4, "2, 3, -2": 4, "2, -1, -2": 4, "3, -1, -2": 4, "2, 3, -1, -2": 4} +dependencies | +most_common_vals | +most_common_val_nulls | +most_common_freqs | +most_common_base_freqs | + +-- set dependencies using at attnum (1) that is not in the statistics object +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'dependencies', '{"2 => 1": 1.000000, "2 => -1": 1.000000, "2 => -2": 1.000000, "3 => 2": 1.000000, "3 => -1": 1.000000, "3 => -2": 1.000000, "-1 => 2": 0.500000, "-1 => 3": 0.500000, "-1 => -2": 1.000000, "-2 => 2": 0.500000, "-2 => 3": 0.500000, "-2 => -1": 1.000000, "2, 3 => -1": 1.000000, "2, 3 => -2": 1.000000, "2, -1 => 3": 1.000000, "2, -1 => -2": 1.000000, "2, -2 => 3": 1.000000, "2, -2 => -1": 1.000000, "3, -1 => 2": 1.000000, "3, -1 => -2": 1.000000, "3, -2 => 2": 1.000000, "3, -2 => -1": 1.000000, "-1, -2 => 2": 0.500000, "-1, -2 => 3": 0.500000, "2, 3, -1 => -2": 1.000000, "2, 3, -2 => -1": 1.000000, "2, -1, -2 => 3": 1.000000, "3, -1, -2 => 2": 1.000000}'::pg_dependencies + ); +WARNING: pg_dependencies: invalid attnum for this statistics object: 1 + pg_restore_extended_stats +--------------------------- + f +(1 row) + +-- set dependencies using at attnum that is 0 +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'dependencies', '{"2 => 3": 1.000000, "0 => -1": 1.000000, "2 => -2": 1.000000, "3 => 2": 1.000000, "3 => -1": 1.000000, "3 => -2": 1.000000, "-1 => 2": 0.500000, "-1 => 3": 0.500000, "-1 => -2": 1.000000, "-2 => 2": 0.500000, "-2 => 3": 0.500000, "-2 => -1": 1.000000, "2, 3 => -1": 1.000000, "2, 3 => -2": 1.000000, "2, -1 => 3": 1.000000, "2, -1 => -2": 1.000000, "2, -2 => 3": 1.000000, "2, -2 => -1": 1.000000, "3, -1 => 2": 1.000000, "3, -1 => -2": 1.000000, "3, -2 => 2": 1.000000, "3, -2 => -1": 1.000000, "-1, -2 => 2": 0.500000, "-1, -2 => 3": 0.500000, "2, 3, -1 => -2": 1.000000, "2, 3, -2 => -1": 1.000000, "2, -1, -2 => 3": 1.000000, "3, -1, -2 => 2": 1.000000}'::pg_dependencies + ); +WARNING: pg_dependencies: invalid attnum for this statistics object: 0 + pg_restore_extended_stats +--------------------------- + f +(1 row) + +-- set dependencies using at attnum that is outside the expression bounds(below -2) +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'dependencies', '{"2 => 3": 1.000000, "2 => -3": 1.000000, "2 => -2": 1.000000, "3 => 2": 1.000000, "3 => -1": 1.000000, "3 => -2": 1.000000, "-1 => 2": 0.500000, "-1 => 3": 0.500000, "-1 => -2": 1.000000, "-2 => 2": 0.500000, "-2 => 3": 0.500000, "-2 => -1": 1.000000, "2, 3 => -1": 1.000000, "2, 3 => -2": 1.000000, "2, -1 => 3": 1.000000, "2, -1 => -2": 1.000000, "2, -2 => 3": 1.000000, "2, -2 => -1": 1.000000, "3, -1 => 2": 1.000000, "3, -1 => -2": 1.000000, "3, -2 => 2": 1.000000, "3, -2 => -1": 1.000000, "-1, -2 => 2": 0.500000, "-1, -2 => 3": 0.500000, "2, 3, -1 => -2": 1.000000, "2, 3, -2 => -1": 1.000000, "2, -1, -2 => 3": 1.000000, "3, -1, -2 => 2": 1.000000}'::pg_dependencies + ); +WARNING: pg_dependencies: invalid attnum for this statistics object: -3 + pg_restore_extended_stats +--------------------------- + f +(1 row) + +-- ok +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'dependencies', '{"2 => 3": 1.000000, "2 => -1": 1.000000, "2 => -2": 1.000000, "3 => 2": 1.000000, "3 => -1": 1.000000, "3 => -2": 1.000000, "-1 => 2": 0.500000, "-1 => 3": 0.500000, "-1 => -2": 1.000000, "-2 => 2": 0.500000, "-2 => 3": 0.500000, "-2 => -1": 1.000000, "2, 3 => -1": 1.000000, "2, 3 => -2": 1.000000, "2, -1 => 3": 1.000000, "2, -1 => -2": 1.000000, "2, -2 => 3": 1.000000, "2, -2 => -1": 1.000000, "3, -1 => 2": 1.000000, "3, -1 => -2": 1.000000, "3, -2 => 2": 1.000000, "3, -2 => -1": 1.000000, "-1, -2 => 2": 0.500000, "-1, -2 => 3": 0.500000, "2, 3, -1 => -2": 1.000000, "2, 3, -2 => -1": 1.000000, "2, -1, -2 => 3": 1.000000, "3, -1, -2 => 2": 1.000000}'::pg_dependencies + ); + pg_restore_extended_stats +--------------------------- + t +(1 row) + +SELECT + e.n_distinct, e.dependencies, e.most_common_vals, e.most_common_val_nulls, + e.most_common_freqs, e.most_common_base_freqs +FROM pg_stats_ext AS e +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'test_stat_clone' +AND e.inherited = false +\gx +-[ RECORD 1 ]----------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +n_distinct | {"2, 3": 4, "2, -1": 4, "2, -2": 4, "3, -1": 4, "3, -2": 4, "-1, -2": 3, "2, 3, -1": 4, "2, 3, -2": 4, "2, -1, -2": 4, "3, -1, -2": 4, "2, 3, -1, -2": 4} +dependencies | {"2 => 3": 1.000000, "2 => -1": 1.000000, "2 => -2": 1.000000, "3 => 2": 1.000000, "3 => -1": 1.000000, "3 => -2": 1.000000, "-1 => 2": 0.500000, "-1 => 3": 0.500000, "-1 => -2": 1.000000, "-2 => 2": 0.500000, "-2 => 3": 0.500000, "-2 => -1": 1.000000, "2, 3 => -1": 1.000000, "2, 3 => -2": 1.000000, "2, -1 => 3": 1.000000, "2, -1 => -2": 1.000000, "2, -2 => 3": 1.000000, "2, -2 => -1": 1.000000, "3, -1 => 2": 1.000000, "3, -1 => -2": 1.000000, "3, -2 => 2": 1.000000, "3, -2 => -1": 1.000000, "-1, -2 => 2": 0.500000, "-1, -2 => 3": 0.500000, "2, 3, -1 => -2": 1.000000, "2, 3, -2 => -1": 1.000000, "2, -1, -2 => 3": 1.000000, "3, -1, -2 => 2": 1.000000} +most_common_vals | +most_common_val_nulls | +most_common_freqs | +most_common_base_freqs | + +-- if any one mcv param specified, all four must be specified (part 1) +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'most_common_vals', '{{four,NULL,0,NULL},{one,"(1,1.1,ONE,01-01-2001,\"{\"\"xkey\"\": \"\"xval\"\"}\")",1,2},{tre,"(3,3.3,TRE,03-03-2003,)",-1,3},{two,"(2,2.2,TWO,02-02-2002,\"[true, 4, \"\"six\"\"]\")",1,2}}'::text[] + ); +WARNING: MCV parameters "most_common_vals", "most_common_val_nulls", "most_common_freqs", and "most_common_base_freqs" must be all specified if any are specified + pg_restore_extended_stats +--------------------------- + f +(1 row) + +-- if any one mcv param specified, all four must be specified (part 2) +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'most_common_val_nulls', '{{f,t,f,t},{f,f,f,f},{f,f,f,f},{f,f,f,f}}'::boolean[] + ); +WARNING: MCV parameters "most_common_vals", "most_common_val_nulls", "most_common_freqs", and "most_common_base_freqs" must be all specified if any are specified + pg_restore_extended_stats +--------------------------- + f +(1 row) + +-- if any one mcv param specified, all four must be specified (part 3) +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'most_common_freqs', '{0.25,0.25,0.25,0.25}'::double precision[] + ); +WARNING: MCV parameters "most_common_vals", "most_common_val_nulls", "most_common_freqs", and "most_common_base_freqs" must be all specified if any are specified + pg_restore_extended_stats +--------------------------- + f +(1 row) + +-- if any one mcv param specified, all four must be specified (part 4) +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'most_common_base_freqs', '{0.00390625,0.015625,0.00390625,0.015625}'::double precision[] + ); +WARNING: MCV parameters "most_common_vals", "most_common_val_nulls", "most_common_freqs", and "most_common_base_freqs" must be all specified if any are specified + pg_restore_extended_stats +--------------------------- + f +(1 row) + +-- ok +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'most_common_vals', '{{four,NULL,0,NULL},{one,"(1,1.1,ONE,01-01-2001,\"{\"\"xkey\"\": \"\"xval\"\"}\")",1,2},{tre,"(3,3.3,TRE,03-03-2003,)",-1,3},{two,"(2,2.2,TWO,02-02-2002,\"[true, 4, \"\"six\"\"]\")",1,2}}'::text[], + 'most_common_val_nulls', '{{f,t,f,t},{f,f,f,f},{f,f,f,f},{f,f,f,f}}'::boolean[], + 'most_common_freqs', '{0.25,0.25,0.25,0.25}'::double precision[], + 'most_common_base_freqs', '{0.00390625,0.015625,0.00390625,0.015625}'::double precision[] + ); + pg_restore_extended_stats +--------------------------- + t +(1 row) + +SELECT + e.n_distinct, e.dependencies, e.most_common_vals, e.most_common_val_nulls, + e.most_common_freqs, e.most_common_base_freqs +FROM pg_stats_ext AS e +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'test_stat_clone' +AND e.inherited = false +\gx +-[ RECORD 1 ]----------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +n_distinct | {"2, 3": 4, "2, -1": 4, "2, -2": 4, "3, -1": 4, "3, -2": 4, "-1, -2": 3, "2, 3, -1": 4, "2, 3, -2": 4, "2, -1, -2": 4, "3, -1, -2": 4, "2, 3, -1, -2": 4} +dependencies | {"2 => 3": 1.000000, "2 => -1": 1.000000, "2 => -2": 1.000000, "3 => 2": 1.000000, "3 => -1": 1.000000, "3 => -2": 1.000000, "-1 => 2": 0.500000, "-1 => 3": 0.500000, "-1 => -2": 1.000000, "-2 => 2": 0.500000, "-2 => 3": 0.500000, "-2 => -1": 1.000000, "2, 3 => -1": 1.000000, "2, 3 => -2": 1.000000, "2, -1 => 3": 1.000000, "2, -1 => -2": 1.000000, "2, -2 => 3": 1.000000, "2, -2 => -1": 1.000000, "3, -1 => 2": 1.000000, "3, -1 => -2": 1.000000, "3, -2 => 2": 1.000000, "3, -2 => -1": 1.000000, "-1, -2 => 2": 0.500000, "-1, -2 => 3": 0.500000, "2, 3, -1 => -2": 1.000000, "2, 3, -2 => -1": 1.000000, "2, -1, -2 => 3": 1.000000, "3, -1, -2 => 2": 1.000000} +most_common_vals | {{four,NULL,0,NULL},{one,"(1,1.1,ONE,01-01-2001,\"{\"\"xkey\"\": \"\"xval\"\"}\")",1,2},{tre,"(3,3.3,TRE,03-03-2003,)",-1,3},{two,"(2,2.2,TWO,02-02-2002,\"[true, 4, \"\"six\"\"]\")",1,2}} +most_common_val_nulls | {{f,t,f,t},{f,f,f,f},{f,f,f,f},{f,f,f,f}} +most_common_freqs | {0.25,0.25,0.25,0.25} +most_common_base_freqs | {0.00390625,0.015625,0.00390625,0.015625} + +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'exprs', '{{0,4,-0.75,"{1}","{0.5}","{-1,0}",-0.6,NULL,NULL,NULL},{0.25,4,-0.5,"{2}","{0.5}",NULL,1,NULL,NULL,NULL}}'::text[] + ); + pg_restore_extended_stats +--------------------------- + t +(1 row) + +SELECT + e.inherited, e.null_frac, e.avg_width, e.n_distinct, e.most_common_vals, + e.most_common_freqs, e.histogram_bounds, e.correlation, + e.most_common_elems, e.most_common_elem_freqs, e.elem_count_histogram +FROM pg_stats_ext_exprs AS e +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'test_stat_clone' +and e.inherited = false +\gx +-[ RECORD 1 ]----------+------- +inherited | f +null_frac | 0 +avg_width | 4 +n_distinct | -0.75 +most_common_vals | {1} +most_common_freqs | {0.5} +histogram_bounds | {-1,0} +correlation | -0.6 +most_common_elems | +most_common_elem_freqs | +elem_count_histogram | +-[ RECORD 2 ]----------+------- +inherited | f +null_frac | 0.25 +avg_width | 4 +n_distinct | -0.5 +most_common_vals | {2} +most_common_freqs | {0.5} +histogram_bounds | +correlation | 1 +most_common_elems | +most_common_elem_freqs | +elem_count_histogram | + +SELECT + pg_catalog.pg_clear_extended_stats( + statistics_schemaname => 'stats_import', + statistics_name => 'test_stat_clone', + inherited => false); + pg_clear_extended_stats +------------------------- + +(1 row) + +SELECT COUNT(*) +FROM pg_stats_ext AS e +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'test_stat_clone' +AND e.inherited = false; + count +------- + 0 +(1 row) + +SELECT COUNT(*) +FROM pg_stats_ext_exprs AS e +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'test_stat_clone' +AND e.inherited = false; + count +------- + 0 +(1 row) + +-- +-- Copy stats from test_stat to test_stat_clone +-- +SELECT + e.statistics_name, + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', e.statistics_schemaname::text, + 'statistics_name', 'test_stat_clone', + 'inherited', e.inherited, + 'n_distinct', e.n_distinct, + 'dependencies', e.dependencies, + 'most_common_vals', e.most_common_vals, + 'most_common_val_nulls', e.most_common_val_nulls, + 'most_common_freqs', e.most_common_freqs, + 'most_common_base_freqs', e.most_common_base_freqs, + 'exprs', x.exprs + ) +FROM pg_stats_ext AS e +CROSS JOIN LATERAL ( + SELECT + array_agg( + ARRAY[ee.null_frac::text, ee.avg_width::text, + ee.n_distinct::text, ee.most_common_vals::text, + ee.most_common_freqs::text, ee.histogram_bounds::text, + ee.correlation::text, ee.most_common_elems::text, + ee.most_common_elem_freqs::text, + ee.elem_count_histogram::text]) + FROM pg_stats_ext_exprs AS ee + WHERE ee.statistics_schemaname = e.statistics_schemaname + AND ee.statistics_name = e.statistics_name + AND ee.inherited = e.inherited + ) AS x(exprs) +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'test_stat'; + statistics_name | pg_restore_extended_stats +-----------------+--------------------------- + test_stat | t +(1 row) + +SELECT o.inherited, + o.n_distinct, o.dependencies, o.most_common_vals, + o.most_common_val_nulls, o.most_common_freqs, + o.most_common_base_freqs +FROM pg_stats_ext AS o +WHERE o.statistics_schemaname = 'stats_import' +AND o.statistics_name = 'test_stat' +EXCEPT +SELECT n.inherited, + n.n_distinct, n.dependencies, n.most_common_vals, + n.most_common_val_nulls, n.most_common_freqs, + n.most_common_base_freqs +FROM pg_stats_ext AS n +WHERE n.statistics_schemaname = 'stats_import' +AND n.statistics_name = 'test_stat_clone'; + inherited | n_distinct | dependencies | most_common_vals | most_common_val_nulls | most_common_freqs | most_common_base_freqs +-----------+------------+--------------+------------------+-----------------------+-------------------+------------------------ +(0 rows) + +SELECT n.inherited, + n.n_distinct, n.dependencies, n.most_common_vals, + n.most_common_val_nulls, n.most_common_freqs, + n.most_common_base_freqs +FROM pg_stats_ext AS n +WHERE n.statistics_schemaname = 'stats_import' +AND n.statistics_name = 'test_stat_clone' +EXCEPT +SELECT o.inherited, + o.n_distinct, o.dependencies, o.most_common_vals, + o.most_common_val_nulls, o.most_common_freqs, + o.most_common_base_freqs +FROM pg_stats_ext AS o +WHERE o.statistics_schemaname = 'stats_import' +AND o.statistics_name = 'test_stat'; + inherited | n_distinct | dependencies | most_common_vals | most_common_val_nulls | most_common_freqs | most_common_base_freqs +-----------+------------+--------------+------------------+-----------------------+-------------------+------------------------ +(0 rows) + +SELECT o.inherited, + o.null_frac, o.avg_width, o.n_distinct, + o.most_common_vals::text AS most_common_vals, + o.most_common_freqs, + o.histogram_bounds::text AS histogram_bounds, + o.correlation, + o.most_common_elems::text AS most_common_elems, + o.most_common_elem_freqs, o.elem_count_histogram +FROM pg_stats_ext_exprs AS o +WHERE o.statistics_schemaname = 'stats_import' +AND o.statistics_name = 'test_stat' +EXCEPT +SELECT n.inherited, + n.null_frac, n.avg_width, n.n_distinct, + n.most_common_vals::text AS most_common_vals, + n.most_common_freqs, + n.histogram_bounds::text AS histogram_bounds, + n.correlation, + n.most_common_elems::text AS most_common_elems, + n.most_common_elem_freqs, n.elem_count_histogram +FROM pg_stats_ext_exprs AS n +WHERE n.statistics_schemaname = 'stats_import' +AND n.statistics_name = 'test_stat_clone'; + inherited | null_frac | avg_width | n_distinct | most_common_vals | most_common_freqs | histogram_bounds | correlation | most_common_elems | most_common_elem_freqs | elem_count_histogram +-----------+-----------+-----------+------------+------------------+-------------------+------------------+-------------+-------------------+------------------------+---------------------- +(0 rows) + +SELECT n.inherited, + n.null_frac, n.avg_width, n.n_distinct, + n.most_common_vals::text AS most_common_vals, + n.most_common_freqs, + n.histogram_bounds::text AS histogram_bounds, + n.correlation, + n.most_common_elems::text AS most_common_elems, + n.most_common_elem_freqs, n.elem_count_histogram +FROM pg_stats_ext_exprs AS n +WHERE n.statistics_schemaname = 'stats_import' +AND n.statistics_name = 'test_stat_clone' +EXCEPT +SELECT o.inherited, + o.null_frac, o.avg_width, o.n_distinct, + o.most_common_vals::text AS most_common_vals, + o.most_common_freqs, + o.histogram_bounds::text AS histogram_bounds, + o.correlation, + o.most_common_elems::text AS most_common_elems, + o.most_common_elem_freqs, o.elem_count_histogram +FROM pg_stats_ext_exprs AS o +WHERE o.statistics_schemaname = 'stats_import' +AND o.statistics_name = 'test_stat'; + inherited | null_frac | avg_width | n_distinct | most_common_vals | most_common_freqs | histogram_bounds | correlation | most_common_elems | most_common_elem_freqs | elem_count_histogram +-----------+-----------+-----------+------------+------------------+-------------------+------------------+-------------+-------------------+------------------------+---------------------- +(0 rows) + DROP SCHEMA stats_import CASCADE; NOTICE: drop cascades to 6 other objects DETAIL: drop cascades to type stats_import.complex_type diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql index b033d7184fc4..c29523f5785f 100644 --- a/src/test/regress/sql/stats_ext.sql +++ b/src/test/regress/sql/stats_ext.sql @@ -1704,6 +1704,10 @@ SELECT statistics_name, most_common_vals FROM pg_stats_ext_exprs x SELECT '{"6, -1": 14, "6, -2": 9143, "-1, -2": 13454, "6, -1, -2": 14549}'::pg_ndistinct; -- can't have duplicates attnums in list SELECT '{"6, -1": 14, "6, -2": 9143, "-1, -1": 13454, "6, -1, -2": 14549}'::pg_ndistinct; +-- can't use inf/-inf/NaN for ndistinct values. +SELECT '{"6, -1": "Inf", "6, -2": 9143, "-1, -2": 13454, "6, -1, -2": 14549}'::pg_ndistinct; +SELECT '{"6, -1": "-Inf", "6, -2": 9143, "-1, -2": 13454, "6, -1, -2": 14549}'::pg_ndistinct; +SELECT '{"6, -1": "NaN", "6, -2": 9143, "-1, -2": 13454, "6, -1, -2": 14549}'::pg_ndistinct; SELECT '{"-2 => 6": 0.292508, "-2 => -1": 0.113999, "6, -2 => -1": 0.348479, "-1, -2 => 6": 0.839691}'::pg_dependencies; -- can't have duplicates attnums in list diff --git a/src/test/regress/sql/stats_import.sql b/src/test/regress/sql/stats_import.sql index d140733a7502..bb03e046129b 100644 --- a/src/test/regress/sql/stats_import.sql +++ b/src/test/regress/sql/stats_import.sql @@ -766,6 +766,9 @@ SELECT 4, 'four', NULL, int4range(0,100), NULL; CREATE INDEX is_odd ON stats_import.test(((comp).a % 2 = 1)); +CREATE STATISTICS stats_import.test_stat ON name, comp, lower(arange), array_length(tags,1) +FROM stats_import.test; + -- Generate statistics on table with data ANALYZE stats_import.test; @@ -774,6 +777,9 @@ CREATE TABLE stats_import.test_clone ( LIKE stats_import.test ) CREATE INDEX is_odd_clone ON stats_import.test_clone(((comp).a % 2 = 1)); +CREATE STATISTICS stats_import.test_stat_clone ON name, comp, lower(arange), array_length(tags,1) +FROM stats_import.test_clone; + -- -- Copy stats from test to test_clone, and is_odd to is_odd_clone -- @@ -970,4 +976,303 @@ AND tablename = 'stats_temp' AND inherited = false AND attname = 'i'; DROP TABLE stats_temp; + +-- set n_distinct using at attnum (1) that is not in the statistics object +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'n_distinct', '{"2, 3": 4, "2, -1": 4, "2, -2": 4, "3, -1": 4, "3, -2": 4, "-1, -2": 3, "2, 3, -1": 4, "2, 3, -2": 4, "2, -1, -2": 4, "3, -1, -2": 4, "1, 3, -1, -2": 4}'::pg_ndistinct + ); + +-- set n_distinct using at attnum that is 0 +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'n_distinct', '{"2, 3": 4, "2, -1": 4, "2, 0": 4, "3, -1": 4, "3, -2": 4, "-1, -2": 3, "2, 3, -1": 4, "2, 3, -2": 4, "2, -1, -2": 4, "3, -1, -2": 4, "2, 3, -1, -2": 4}'::pg_ndistinct + ); + +-- set n_distinct using at attnum that is outside the expression bounds(below -2) +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'n_distinct', '{"2, 3": 4, "2, -4": 4, "2, -2": 4, "3, -1": 4, "3, -2": 4, "-1, -2": 3, "2, 3, -1": 4, "2, 3, -2": 4, "2, -1, -2": 4, "3, -1, -2": 4, "1, 3, -1, -2": 4}'::pg_ndistinct + ); + +-- ok +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'n_distinct', '{"2, 3": 4, "2, -1": 4, "2, -2": 4, "3, -1": 4, "3, -2": 4, "-1, -2": 3, "2, 3, -1": 4, "2, 3, -2": 4, "2, -1, -2": 4, "3, -1, -2": 4, "2, 3, -1, -2": 4}'::pg_ndistinct + ); + +SELECT + e.n_distinct, e.dependencies, e.most_common_vals, e.most_common_val_nulls, + e.most_common_freqs, e.most_common_base_freqs +FROM pg_stats_ext AS e +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'test_stat_clone' +AND e.inherited = false +\gx + +-- set dependencies using at attnum (1) that is not in the statistics object +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'dependencies', '{"2 => 1": 1.000000, "2 => -1": 1.000000, "2 => -2": 1.000000, "3 => 2": 1.000000, "3 => -1": 1.000000, "3 => -2": 1.000000, "-1 => 2": 0.500000, "-1 => 3": 0.500000, "-1 => -2": 1.000000, "-2 => 2": 0.500000, "-2 => 3": 0.500000, "-2 => -1": 1.000000, "2, 3 => -1": 1.000000, "2, 3 => -2": 1.000000, "2, -1 => 3": 1.000000, "2, -1 => -2": 1.000000, "2, -2 => 3": 1.000000, "2, -2 => -1": 1.000000, "3, -1 => 2": 1.000000, "3, -1 => -2": 1.000000, "3, -2 => 2": 1.000000, "3, -2 => -1": 1.000000, "-1, -2 => 2": 0.500000, "-1, -2 => 3": 0.500000, "2, 3, -1 => -2": 1.000000, "2, 3, -2 => -1": 1.000000, "2, -1, -2 => 3": 1.000000, "3, -1, -2 => 2": 1.000000}'::pg_dependencies + ); + +-- set dependencies using at attnum that is 0 +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'dependencies', '{"2 => 3": 1.000000, "0 => -1": 1.000000, "2 => -2": 1.000000, "3 => 2": 1.000000, "3 => -1": 1.000000, "3 => -2": 1.000000, "-1 => 2": 0.500000, "-1 => 3": 0.500000, "-1 => -2": 1.000000, "-2 => 2": 0.500000, "-2 => 3": 0.500000, "-2 => -1": 1.000000, "2, 3 => -1": 1.000000, "2, 3 => -2": 1.000000, "2, -1 => 3": 1.000000, "2, -1 => -2": 1.000000, "2, -2 => 3": 1.000000, "2, -2 => -1": 1.000000, "3, -1 => 2": 1.000000, "3, -1 => -2": 1.000000, "3, -2 => 2": 1.000000, "3, -2 => -1": 1.000000, "-1, -2 => 2": 0.500000, "-1, -2 => 3": 0.500000, "2, 3, -1 => -2": 1.000000, "2, 3, -2 => -1": 1.000000, "2, -1, -2 => 3": 1.000000, "3, -1, -2 => 2": 1.000000}'::pg_dependencies + ); + +-- set dependencies using at attnum that is outside the expression bounds(below -2) +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'dependencies', '{"2 => 3": 1.000000, "2 => -3": 1.000000, "2 => -2": 1.000000, "3 => 2": 1.000000, "3 => -1": 1.000000, "3 => -2": 1.000000, "-1 => 2": 0.500000, "-1 => 3": 0.500000, "-1 => -2": 1.000000, "-2 => 2": 0.500000, "-2 => 3": 0.500000, "-2 => -1": 1.000000, "2, 3 => -1": 1.000000, "2, 3 => -2": 1.000000, "2, -1 => 3": 1.000000, "2, -1 => -2": 1.000000, "2, -2 => 3": 1.000000, "2, -2 => -1": 1.000000, "3, -1 => 2": 1.000000, "3, -1 => -2": 1.000000, "3, -2 => 2": 1.000000, "3, -2 => -1": 1.000000, "-1, -2 => 2": 0.500000, "-1, -2 => 3": 0.500000, "2, 3, -1 => -2": 1.000000, "2, 3, -2 => -1": 1.000000, "2, -1, -2 => 3": 1.000000, "3, -1, -2 => 2": 1.000000}'::pg_dependencies + ); + +-- ok +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'dependencies', '{"2 => 3": 1.000000, "2 => -1": 1.000000, "2 => -2": 1.000000, "3 => 2": 1.000000, "3 => -1": 1.000000, "3 => -2": 1.000000, "-1 => 2": 0.500000, "-1 => 3": 0.500000, "-1 => -2": 1.000000, "-2 => 2": 0.500000, "-2 => 3": 0.500000, "-2 => -1": 1.000000, "2, 3 => -1": 1.000000, "2, 3 => -2": 1.000000, "2, -1 => 3": 1.000000, "2, -1 => -2": 1.000000, "2, -2 => 3": 1.000000, "2, -2 => -1": 1.000000, "3, -1 => 2": 1.000000, "3, -1 => -2": 1.000000, "3, -2 => 2": 1.000000, "3, -2 => -1": 1.000000, "-1, -2 => 2": 0.500000, "-1, -2 => 3": 0.500000, "2, 3, -1 => -2": 1.000000, "2, 3, -2 => -1": 1.000000, "2, -1, -2 => 3": 1.000000, "3, -1, -2 => 2": 1.000000}'::pg_dependencies + ); + +SELECT + e.n_distinct, e.dependencies, e.most_common_vals, e.most_common_val_nulls, + e.most_common_freqs, e.most_common_base_freqs +FROM pg_stats_ext AS e +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'test_stat_clone' +AND e.inherited = false +\gx + +-- if any one mcv param specified, all four must be specified (part 1) +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'most_common_vals', '{{four,NULL,0,NULL},{one,"(1,1.1,ONE,01-01-2001,\"{\"\"xkey\"\": \"\"xval\"\"}\")",1,2},{tre,"(3,3.3,TRE,03-03-2003,)",-1,3},{two,"(2,2.2,TWO,02-02-2002,\"[true, 4, \"\"six\"\"]\")",1,2}}'::text[] + ); + +-- if any one mcv param specified, all four must be specified (part 2) +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'most_common_val_nulls', '{{f,t,f,t},{f,f,f,f},{f,f,f,f},{f,f,f,f}}'::boolean[] + ); + +-- if any one mcv param specified, all four must be specified (part 3) +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'most_common_freqs', '{0.25,0.25,0.25,0.25}'::double precision[] + ); + +-- if any one mcv param specified, all four must be specified (part 4) +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'most_common_base_freqs', '{0.00390625,0.015625,0.00390625,0.015625}'::double precision[] + ); + +-- ok +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'most_common_vals', '{{four,NULL,0,NULL},{one,"(1,1.1,ONE,01-01-2001,\"{\"\"xkey\"\": \"\"xval\"\"}\")",1,2},{tre,"(3,3.3,TRE,03-03-2003,)",-1,3},{two,"(2,2.2,TWO,02-02-2002,\"[true, 4, \"\"six\"\"]\")",1,2}}'::text[], + 'most_common_val_nulls', '{{f,t,f,t},{f,f,f,f},{f,f,f,f},{f,f,f,f}}'::boolean[], + 'most_common_freqs', '{0.25,0.25,0.25,0.25}'::double precision[], + 'most_common_base_freqs', '{0.00390625,0.015625,0.00390625,0.015625}'::double precision[] + ); + +SELECT + e.n_distinct, e.dependencies, e.most_common_vals, e.most_common_val_nulls, + e.most_common_freqs, e.most_common_base_freqs +FROM pg_stats_ext AS e +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'test_stat_clone' +AND e.inherited = false +\gx + +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'exprs', '{{0,4,-0.75,"{1}","{0.5}","{-1,0}",-0.6,NULL,NULL,NULL},{0.25,4,-0.5,"{2}","{0.5}",NULL,1,NULL,NULL,NULL}}'::text[] + ); + +SELECT + e.inherited, e.null_frac, e.avg_width, e.n_distinct, e.most_common_vals, + e.most_common_freqs, e.histogram_bounds, e.correlation, + e.most_common_elems, e.most_common_elem_freqs, e.elem_count_histogram +FROM pg_stats_ext_exprs AS e +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'test_stat_clone' +and e.inherited = false +\gx + +SELECT + pg_catalog.pg_clear_extended_stats( + statistics_schemaname => 'stats_import', + statistics_name => 'test_stat_clone', + inherited => false); + +SELECT COUNT(*) +FROM pg_stats_ext AS e +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'test_stat_clone' +AND e.inherited = false; + +SELECT COUNT(*) +FROM pg_stats_ext_exprs AS e +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'test_stat_clone' +AND e.inherited = false; + +-- +-- Copy stats from test_stat to test_stat_clone +-- +SELECT + e.statistics_name, + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', e.statistics_schemaname::text, + 'statistics_name', 'test_stat_clone', + 'inherited', e.inherited, + 'n_distinct', e.n_distinct, + 'dependencies', e.dependencies, + 'most_common_vals', e.most_common_vals, + 'most_common_val_nulls', e.most_common_val_nulls, + 'most_common_freqs', e.most_common_freqs, + 'most_common_base_freqs', e.most_common_base_freqs, + 'exprs', x.exprs + ) +FROM pg_stats_ext AS e +CROSS JOIN LATERAL ( + SELECT + array_agg( + ARRAY[ee.null_frac::text, ee.avg_width::text, + ee.n_distinct::text, ee.most_common_vals::text, + ee.most_common_freqs::text, ee.histogram_bounds::text, + ee.correlation::text, ee.most_common_elems::text, + ee.most_common_elem_freqs::text, + ee.elem_count_histogram::text]) + FROM pg_stats_ext_exprs AS ee + WHERE ee.statistics_schemaname = e.statistics_schemaname + AND ee.statistics_name = e.statistics_name + AND ee.inherited = e.inherited + ) AS x(exprs) +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'test_stat'; + +SELECT o.inherited, + o.n_distinct, o.dependencies, o.most_common_vals, + o.most_common_val_nulls, o.most_common_freqs, + o.most_common_base_freqs +FROM pg_stats_ext AS o +WHERE o.statistics_schemaname = 'stats_import' +AND o.statistics_name = 'test_stat' +EXCEPT +SELECT n.inherited, + n.n_distinct, n.dependencies, n.most_common_vals, + n.most_common_val_nulls, n.most_common_freqs, + n.most_common_base_freqs +FROM pg_stats_ext AS n +WHERE n.statistics_schemaname = 'stats_import' +AND n.statistics_name = 'test_stat_clone'; + +SELECT n.inherited, + n.n_distinct, n.dependencies, n.most_common_vals, + n.most_common_val_nulls, n.most_common_freqs, + n.most_common_base_freqs +FROM pg_stats_ext AS n +WHERE n.statistics_schemaname = 'stats_import' +AND n.statistics_name = 'test_stat_clone' +EXCEPT +SELECT o.inherited, + o.n_distinct, o.dependencies, o.most_common_vals, + o.most_common_val_nulls, o.most_common_freqs, + o.most_common_base_freqs +FROM pg_stats_ext AS o +WHERE o.statistics_schemaname = 'stats_import' +AND o.statistics_name = 'test_stat'; + +SELECT o.inherited, + o.null_frac, o.avg_width, o.n_distinct, + o.most_common_vals::text AS most_common_vals, + o.most_common_freqs, + o.histogram_bounds::text AS histogram_bounds, + o.correlation, + o.most_common_elems::text AS most_common_elems, + o.most_common_elem_freqs, o.elem_count_histogram +FROM pg_stats_ext_exprs AS o +WHERE o.statistics_schemaname = 'stats_import' +AND o.statistics_name = 'test_stat' +EXCEPT +SELECT n.inherited, + n.null_frac, n.avg_width, n.n_distinct, + n.most_common_vals::text AS most_common_vals, + n.most_common_freqs, + n.histogram_bounds::text AS histogram_bounds, + n.correlation, + n.most_common_elems::text AS most_common_elems, + n.most_common_elem_freqs, n.elem_count_histogram +FROM pg_stats_ext_exprs AS n +WHERE n.statistics_schemaname = 'stats_import' +AND n.statistics_name = 'test_stat_clone'; + +SELECT n.inherited, + n.null_frac, n.avg_width, n.n_distinct, + n.most_common_vals::text AS most_common_vals, + n.most_common_freqs, + n.histogram_bounds::text AS histogram_bounds, + n.correlation, + n.most_common_elems::text AS most_common_elems, + n.most_common_elem_freqs, n.elem_count_histogram +FROM pg_stats_ext_exprs AS n +WHERE n.statistics_schemaname = 'stats_import' +AND n.statistics_name = 'test_stat_clone' +EXCEPT +SELECT o.inherited, + o.null_frac, o.avg_width, o.n_distinct, + o.most_common_vals::text AS most_common_vals, + o.most_common_freqs, + o.histogram_bounds::text AS histogram_bounds, + o.correlation, + o.most_common_elems::text AS most_common_elems, + o.most_common_elem_freqs, o.elem_count_histogram +FROM pg_stats_ext_exprs AS o +WHERE o.statistics_schemaname = 'stats_import' +AND o.statistics_name = 'test_stat'; + DROP SCHEMA stats_import CASCADE;