Improve readability and error detection of array_in().
authorTom Lane <[email protected]>
Mon, 13 Nov 2023 18:01:47 +0000 (13:01 -0500)
committerTom Lane <[email protected]>
Mon, 13 Nov 2023 18:01:51 +0000 (13:01 -0500)
Rewrite array_in() and its subroutines so that we make only one
pass over the input text, rather than two.  This requires
potentially re-pallocing the working arrays values[] and nulls[]
larger than our initial guess, but that cost will hopefully be made
up by avoiding duplicate parsing.  In any case this coding seems
much clearer and more straightforward than what we had before.

This also fixes array_in() to reject non-rectangular input (that is,
different brace depths in different parts of the input) more reliably
than before, and to give a better error message when it does so.
This is analogous to the plpython and plperl fixes in 0553528e7 and
f47004add.  Like those PLs, we now accept input such as '{{},{}}'
as a valid representation of an empty array, which we did not before.

Additionally, reject explicit array subscripts that are outside the
integer range (previously you just got whatever atoi() converted
them to), and make some other minor improvements in error reporting.

Although this is arguably a bug fix, it's also a behavioral change
that might trip somebody up, so no back-.

Tom Lane, Heikki Linnakangas, and Jian He.  Thanks to Alexander Lakhin
for the initial report and for review/testing.

Discussion: https://postgr.es/m/2794005.1683042087@sss.pgh.pa.us

doc/src/sgml/array.sgml
src/backend/utils/adt/arrayfuncs.c
src/backend/utils/adt/arrayutils.c
src/include/utils/array.h
src/test/regress/expected/arrays.out
src/test/regress/sql/arrays.sql
src/tools/pgindent/typedefs.list

index 56185b9b0387dba4ed615658427c5b957d4a403b..ce338c770c95d2affdb2a9f489425f6d715e30e3 100644 (file)
@@ -171,7 +171,8 @@ INSERT INTO sal_emp
     VALUES ('Bill',
     '{10000, 10000, 10000, 10000}',
     '{{"meeting", "lunch"}, {"meeting"}}');
-ERROR:  multidimensional arrays must have array expressions with matching dimensions
+ERROR:  malformed array literal: "{{"meeting", "lunch"}, {"meeting"}}"
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
 </programlisting>
  </para>
 
index 3ff13eb4198a9862a7327d6df5150b732bcc9747..d71967de01f0931563db42fe5b9f06cb12880760 100644 (file)
@@ -54,18 +54,16 @@ bool                Array_nulls = true;
                        PG_FREE_IF_COPY(array, n); \
        } while (0)
 
+/* ReadArrayToken return type */
 typedef enum
 {
-       ARRAY_NO_LEVEL,
-       ARRAY_LEVEL_STARTED,
-       ARRAY_ELEM_STARTED,
-       ARRAY_ELEM_COMPLETED,
-       ARRAY_QUOTED_ELEM_STARTED,
-       ARRAY_QUOTED_ELEM_COMPLETED,
-       ARRAY_ELEM_DELIMITED,
-       ARRAY_LEVEL_COMPLETED,
-       ARRAY_LEVEL_DELIMITED,
-} ArrayParseState;
+       ATOK_LEVEL_START,
+       ATOK_LEVEL_END,
+       ATOK_DELIM,
+       ATOK_ELEM,
+       ATOK_ELEM_NULL,
+       ATOK_ERROR,
+} ArrayToken;
 
 /* Working state for array_iterate() */
 typedef struct ArrayIteratorData
@@ -91,15 +89,21 @@ typedef struct ArrayIteratorData
        int                     current_item;   /* the item # we're at in the array */
 }                      ArrayIteratorData;
 
-static int     ArrayCount(const char *str, int *dim, char typdelim,
-                                          Node *escontext);
-static bool ReadArrayStr(char *arrayStr, const char *origStr,
-                                                int nitems, int ndim, int *dim,
+static bool ReadArrayDimensions(char **srcptr, int *ndim_p,
+                                                               int *dim, int *lBound,
+                                                               const char *origStr, Node *escontext);
+static bool ReadDimensionInt(char **srcptr, int *result,
+                                                        const char *origStr, Node *escontext);
+static bool ReadArrayStr(char **srcptr,
                                                 FmgrInfo *inputproc, Oid typioparam, int32 typmod,
                                                 char typdelim,
                                                 int typlen, bool typbyval, char typalign,
-                                                Datum *values, bool *nulls,
-                                                bool *hasnulls, int32 *nbytes, Node *escontext);
+                                                int *ndim_p, int *dim,
+                                                int *nitems_p,
+                                                Datum **values_p, bool **nulls_p,
+                                                const char *origStr, Node *escontext);
+static ArrayToken ReadArrayToken(char **srcptr, StringInfo elembuf, char typdelim,
+                                                                const char *origStr, Node *escontext);
 static void ReadArrayBinary(StringInfo buf, int nitems,
                                                        FmgrInfo *receiveproc, Oid typioparam, int32 typmod,
                                                        int typlen, bool typbyval, char typalign,
@@ -185,12 +189,10 @@ array_in(PG_FUNCTION_ARGS)
        char            typalign;
        char            typdelim;
        Oid                     typioparam;
-       char       *string_save,
-                          *p;
-       int                     i,
-                               nitems;
-       Datum      *dataPtr;
-       bool       *nullsPtr;
+       char       *p;
+       int                     nitems;
+       Datum      *values;
+       bool       *nulls;
        bool            hasnulls;
        int32           nbytes;
        int32           dataoffset;
@@ -233,104 +235,38 @@ array_in(PG_FUNCTION_ARGS)
        typdelim = my_extra->typdelim;
        typioparam = my_extra->typioparam;
 
-       /* Make a modifiable copy of the input */
-       string_save = pstrdup(string);
-
        /*
-        * If the input string starts with dimension info, read and use that.
-        * Otherwise, we require the input to be in curly-brace style, and we
-        * prescan the input to determine dimensions.
-        *
-        * Dimension info takes the form of one or more [n] or [m:n] items. The
-        * outer loop iterates once per dimension item.
+        * Initialize dim[] and lBound[] for ReadArrayStr, in case there is no
+        * explicit dimension info.  (If there is, ReadArrayDimensions will
+        * overwrite this.)
         */
-       p = string_save;
-       ndim = 0;
-       for (;;)
+       for (int i = 0; i < MAXDIM; i++)
        {
-               char       *q;
-               int                     ub;
-
-               /*
-                * Note: we currently allow whitespace between, but not within,
-                * dimension items.
-                */
-               while (scanner_isspace(*p))
-                       p++;
-               if (*p != '[')
-                       break;                          /* no more dimension items */
-               p++;
-               if (ndim >= MAXDIM)
-                       ereturn(escontext, (Datum) 0,
-                                       (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-                                        errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
-                                                       ndim + 1, MAXDIM)));
-
-               for (q = p; isdigit((unsigned char) *q) || (*q == '-') || (*q == '+'); q++)
-                        /* skip */ ;
-               if (q == p)                             /* no digits? */
-                       ereturn(escontext, (Datum) 0,
-                                       (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                        errmsg("malformed array literal: \"%s\"", string),
-                                        errdetail("\"[\" must introduce explicitly-specified array dimensions.")));
-
-               if (*q == ':')
-               {
-                       /* [m:n] format */
-                       *q = '\0';
-                       lBound[ndim] = atoi(p);
-                       p = q + 1;
-                       for (q = p; isdigit((unsigned char) *q) || (*q == '-') || (*q == '+'); q++)
-                                /* skip */ ;
-                       if (q == p)                     /* no digits? */
-                               ereturn(escontext, (Datum) 0,
-                                               (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                                errmsg("malformed array literal: \"%s\"", string),
-                                                errdetail("Missing array dimension value.")));
-               }
-               else
-               {
-                       /* [n] format */
-                       lBound[ndim] = 1;
-               }
-               if (*q != ']')
-                       ereturn(escontext, (Datum) 0,
-                                       (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                        errmsg("malformed array literal: \"%s\"", string),
-                                        errdetail("Missing \"%s\" after array dimensions.",
-                                                          "]")));
-
-               *q = '\0';
-               ub = atoi(p);
-               p = q + 1;
-               if (ub < lBound[ndim])
-                       ereturn(escontext, (Datum) 0,
-                                       (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
-                                        errmsg("upper bound cannot be less than lower bound")));
-
-               dim[ndim] = ub - lBound[ndim] + 1;
-               ndim++;
+               dim[i] = -1;                    /* indicates "not yet known" */
+               lBound[i] = 1;                  /* default lower bound */
        }
 
+       /*
+        * Start processing the input string.
+        *
+        * If the input string starts with dimension info, read and use that.
+        * Otherwise, we'll determine the dimensions during ReadArrayStr.
+        */
+       p = string;
+       if (!ReadArrayDimensions(&p, &ndim, dim, lBound, string, escontext))
+               return (Datum) 0;
+
        if (ndim == 0)
        {
-               /* No array dimensions, so intuit dimensions from brace structure */
+               /* No array dimensions, so next character should be a left brace */
                if (*p != '{')
                        ereturn(escontext, (Datum) 0,
                                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                                         errmsg("malformed array literal: \"%s\"", string),
                                         errdetail("Array value must start with \"{\" or dimension information.")));
-               ndim = ArrayCount(p, dim, typdelim, escontext);
-               if (ndim < 0)
-                       PG_RETURN_NULL();
-               for (i = 0; i < ndim; i++)
-                       lBound[i] = 1;
        }
        else
        {
-               int                     ndim_braces,
-                                       dim_braces[MAXDIM];
-
                /* If array dimensions are given, expect '=' operator */
                if (strncmp(p, ASSGN, strlen(ASSGN)) != 0)
                        ereturn(escontext, (Datum) 0,
@@ -339,66 +275,68 @@ array_in(PG_FUNCTION_ARGS)
                                         errdetail("Missing \"%s\" after array dimensions.",
                                                           ASSGN)));
                p += strlen(ASSGN);
+               /* Allow whitespace after it */
                while (scanner_isspace(*p))
                        p++;
 
-               /*
-                * intuit dimensions from brace structure -- it better match what we
-                * were given
-                */
                if (*p != '{')
                        ereturn(escontext, (Datum) 0,
                                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                                         errmsg("malformed array literal: \"%s\"", string),
                                         errdetail("Array contents must start with \"{\".")));
-               ndim_braces = ArrayCount(p, dim_braces, typdelim, escontext);
-               if (ndim_braces < 0)
-                       PG_RETURN_NULL();
-               if (ndim_braces != ndim)
+       }
+
+       /* Parse the value part, in the curly braces: { ... } */
+       if (!ReadArrayStr(&p,
+                                         &my_extra->proc, typioparam, typmod,
+                                         typdelim,
+                                         typlen, typbyval, typalign,
+                                         &ndim,
+                                         dim,
+                                         &nitems,
+                                         &values, &nulls,
+                                         string,
+                                         escontext))
+               return (Datum) 0;
+
+       /* only whitespace is allowed after the closing brace */
+       while (*p)
+       {
+               if (!scanner_isspace(*p++))
                        ereturn(escontext, (Datum) 0,
                                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                                         errmsg("malformed array literal: \"%s\"", string),
-                                        errdetail("Specified array dimensions do not match array contents.")));
-               for (i = 0; i < ndim; ++i)
-               {
-                       if (dim[i] != dim_braces[i])
-                               ereturn(escontext, (Datum) 0,
-                                               (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                                errmsg("malformed array literal: \"%s\"", string),
-                                                errdetail("Specified array dimensions do not match array contents.")));
-               }
+                                        errdetail("Junk after closing right brace.")));
        }
 
-#ifdef ARRAYDEBUG
-       printf("array_in- ndim %d (", ndim);
-       for (i = 0; i < ndim; i++)
-       {
-               printf(" %d", dim[i]);
-       };
-       printf(") for %s\n", string);
-#endif
-
-       /* This checks for overflow of the array dimensions */
-       nitems = ArrayGetNItemsSafe(ndim, dim, escontext);
-       if (nitems < 0)
-               PG_RETURN_NULL();
-       if (!ArrayCheckBoundsSafe(ndim, dim, lBound, escontext))
-               PG_RETURN_NULL();
-
        /* Empty array? */
        if (nitems == 0)
                PG_RETURN_ARRAYTYPE_P(construct_empty_array(element_type));
 
-       dataPtr = (Datum *) palloc(nitems * sizeof(Datum));
-       nullsPtr = (bool *) palloc(nitems * sizeof(bool));
-       if (!ReadArrayStr(p, string,
-                                         nitems, ndim, dim,
-                                         &my_extra->proc, typioparam, typmod,
-                                         typdelim,
-                                         typlen, typbyval, typalign,
-                                         dataPtr, nullsPtr,
-                                         &hasnulls, &nbytes, escontext))
-               PG_RETURN_NULL();
+       /*
+        * Check for nulls, compute total data space needed
+        */
+       hasnulls = false;
+       nbytes = 0;
+       for (int i = 0; i < nitems; i++)
+       {
+               if (nulls[i])
+                       hasnulls = true;
+               else
+               {
+                       /* let's just make sure data is not toasted */
+                       if (typlen == -1)
+                               values[i] = PointerGetDatum(PG_DETOAST_DATUM(values[i]));
+                       nbytes = att_addlength_datum(nbytes, typlen, values[i]);
+                       nbytes = att_align_nominal(nbytes, typalign);
+                       /* check for overflow of total request */
+                       if (!AllocSizeIsValid(nbytes))
+                               ereturn(escontext, (Datum) 0,
+                                               (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                                                errmsg("array size exceeds the maximum allowed (%d)",
+                                                               (int) MaxAllocSize)));
+               }
+       }
        if (hasnulls)
        {
                dataoffset = ARR_OVERHEAD_WITHNULLS(ndim, nitems);
@@ -409,6 +347,10 @@ array_in(PG_FUNCTION_ARGS)
                dataoffset = 0;                 /* marker for no null bitmap */
                nbytes += ARR_OVERHEAD_NONULLS(ndim);
        }
+
+       /*
+        * Construct the final array datum
+        */
        retval = (ArrayType *) palloc0(nbytes);
        SET_VARSIZE(retval, nbytes);
        retval->ndim = ndim;
@@ -424,302 +366,218 @@ array_in(PG_FUNCTION_ARGS)
        memcpy(ARR_LBOUND(retval), lBound, ndim * sizeof(int));
 
        CopyArrayEls(retval,
-                                dataPtr, nullsPtr, nitems,
+                                values, nulls, nitems,
                                 typlen, typbyval, typalign,
                                 true);
 
-       pfree(dataPtr);
-       pfree(nullsPtr);
-       pfree(string_save);
+       pfree(values);
+       pfree(nulls);
 
        PG_RETURN_ARRAYTYPE_P(retval);
 }
 
 /*
- * ArrayCount
- *      Determines the dimensions for an array string.
+ * ReadArrayDimensions
+ *      parses the array dimensions part of the input and converts the values
+ *      to internal format.
+ *
+ * On entry, *srcptr points to the string to parse. It is advanced to point
+ * after whitespace (if any) and dimension info (if any).
+ *
+ * *ndim_p, dim[], and lBound[] are output variables. They are filled with the
+ * number of dimensions (<= MAXDIM), the lengths of each dimension, and the
+ * lower subscript bounds, respectively.  If no dimension info appears,
+ * *ndim_p will be set to zero, and dim[] and lBound[] are unchanged.
  *
- * Returns number of dimensions as function result.  The axis lengths are
- * returned in dim[], which must be of size MAXDIM.
+ * 'origStr' is the original input string, used only in error messages.
+ * If *escontext points to an ErrorSaveContext, details of any error are
+ * reported there.
+ *
+ * Result:
+ *     true for success, false for failure (if escontext is provided).
  *
- * If we detect an error, fill *escontext with error details and return -1
- * (unless escontext isn't provided, in which case errors will be thrown).
+ * Note that dim[] and lBound[] are allocated by the caller, and must have
+ * MAXDIM elements.
  */
-static int
-ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
+static bool
+ReadArrayDimensions(char **srcptr, int *ndim_p, int *dim, int *lBound,
+                                       const char *origStr, Node *escontext)
 {
-       int                     nest_level = 0,
-                               i;
-       int                     ndim = 1,
-                               temp[MAXDIM],
-                               nelems[MAXDIM],
-                               nelems_last[MAXDIM];
-       bool            in_quotes = false;
-       bool            eoArray = false;
-       bool            empty_array = true;
-       const char *ptr;
-       ArrayParseState parse_state = ARRAY_NO_LEVEL;
+       char       *p = *srcptr;
+       int                     ndim;
 
-       for (i = 0; i < MAXDIM; ++i)
+       /*
+        * Dimension info takes the form of one or more [n] or [m:n] items.  This
+        * loop iterates once per dimension item.
+        */
+       ndim = 0;
+       for (;;)
        {
-               temp[i] = dim[i] = nelems_last[i] = 0;
-               nelems[i] = 1;
-       }
+               char       *q;
+               int                     ub;
+               int                     i;
 
-       ptr = str;
-       while (!eoArray)
-       {
-               bool            itemdone = false;
+               /*
+                * Note: we currently allow whitespace between, but not within,
+                * dimension items.
+                */
+               while (scanner_isspace(*p))
+                       p++;
+               if (*p != '[')
+                       break;                          /* no more dimension items */
+               p++;
+               if (ndim >= MAXDIM)
+                       ereturn(escontext, false,
+                                       (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                                        errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
+                                                       ndim + 1, MAXDIM)));
 
-               while (!itemdone)
-               {
-                       if (parse_state == ARRAY_ELEM_STARTED ||
-                               parse_state == ARRAY_QUOTED_ELEM_STARTED)
-                               empty_array = false;
+               q = p;
+               if (!ReadDimensionInt(&p, &i, origStr, escontext))
+                       return false;
+               if (p == q)                             /* no digits? */
+                       ereturn(escontext, false,
+                                       (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                        errmsg("malformed array literal: \"%s\"", origStr),
+                                        errdetail("\"[\" must introduce explicitly-specified array dimensions.")));
 
-                       switch (*ptr)
-                       {
-                               case '\0':
-                                       /* Signal a premature end of the string */
-                                       ereturn(escontext, -1,
-                                                       (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                                        errmsg("malformed array literal: \"%s\"", str),
-                                                        errdetail("Unexpected end of input.")));
-                               case '\\':
-
-                                       /*
-                                        * An escape must be after a level start, after an element
-                                        * start, or after an element delimiter. In any case we
-                                        * now must be past an element start.
-                                        */
-                                       if (parse_state != ARRAY_LEVEL_STARTED &&
-                                               parse_state != ARRAY_ELEM_STARTED &&
-                                               parse_state != ARRAY_QUOTED_ELEM_STARTED &&
-                                               parse_state != ARRAY_ELEM_DELIMITED)
-                                               ereturn(escontext, -1,
-                                                               (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                                                errmsg("malformed array literal: \"%s\"", str),
-                                                                errdetail("Unexpected \"%c\" character.",
-                                                                                  '\\')));
-                                       if (parse_state != ARRAY_QUOTED_ELEM_STARTED)
-                                               parse_state = ARRAY_ELEM_STARTED;
-                                       /* skip the escaped character */
-                                       if (*(ptr + 1))
-                                               ptr++;
-                                       else
-                                               ereturn(escontext, -1,
-                                                               (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                                                errmsg("malformed array literal: \"%s\"", str),
-                                                                errdetail("Unexpected end of input.")));
-                                       break;
-                               case '"':
-
-                                       /*
-                                        * A quote must be after a level start, after a quoted
-                                        * element start, or after an element delimiter. In any
-                                        * case we now must be past an element start.
-                                        */
-                                       if (parse_state != ARRAY_LEVEL_STARTED &&
-                                               parse_state != ARRAY_QUOTED_ELEM_STARTED &&
-                                               parse_state != ARRAY_ELEM_DELIMITED)
-                                               ereturn(escontext, -1,
-                                                               (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                                                errmsg("malformed array literal: \"%s\"", str),
-                                                                errdetail("Unexpected array element.")));
-                                       in_quotes = !in_quotes;
-                                       if (in_quotes)
-                                               parse_state = ARRAY_QUOTED_ELEM_STARTED;
-                                       else
-                                               parse_state = ARRAY_QUOTED_ELEM_COMPLETED;
-                                       break;
-                               case '{':
-                                       if (!in_quotes)
-                                       {
-                                               /*
-                                                * A left brace can occur if no nesting has occurred
-                                                * yet, after a level start, or after a level
-                                                * delimiter.
-                                                */
-                                               if (parse_state != ARRAY_NO_LEVEL &&
-                                                       parse_state != ARRAY_LEVEL_STARTED &&
-                                                       parse_state != ARRAY_LEVEL_DELIMITED)
-                                                       ereturn(escontext, -1,
-                                                                       (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                                                        errmsg("malformed array literal: \"%s\"", str),
-                                                                        errdetail("Unexpected \"%c\" character.",
-                                                                                          '{')));
-                                               parse_state = ARRAY_LEVEL_STARTED;
-                                               if (nest_level >= MAXDIM)
-                                                       ereturn(escontext, -1,
-                                                                       (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-                                                                        errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
-                                                                                       nest_level + 1, MAXDIM)));
-                                               temp[nest_level] = 0;
-                                               nest_level++;
-                                               if (ndim < nest_level)
-                                                       ndim = nest_level;
-                                       }
-                                       break;
-                               case '}':
-                                       if (!in_quotes)
-                                       {
-                                               /*
-                                                * A right brace can occur after an element start, an
-                                                * element completion, a quoted element completion, or
-                                                * a level completion.
-                                                */
-                                               if (parse_state != ARRAY_ELEM_STARTED &&
-                                                       parse_state != ARRAY_ELEM_COMPLETED &&
-                                                       parse_state != ARRAY_QUOTED_ELEM_COMPLETED &&
-                                                       parse_state != ARRAY_LEVEL_COMPLETED &&
-                                                       !(nest_level == 1 && parse_state == ARRAY_LEVEL_STARTED))
-                                                       ereturn(escontext, -1,
-                                                                       (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                                                        errmsg("malformed array literal: \"%s\"", str),
-                                                                        errdetail("Unexpected \"%c\" character.",
-                                                                                          '}')));
-                                               parse_state = ARRAY_LEVEL_COMPLETED;
-                                               if (nest_level == 0)
-                                                       ereturn(escontext, -1,
-                                                                       (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                                                        errmsg("malformed array literal: \"%s\"", str),
-                                                                        errdetail("Unmatched \"%c\" character.", '}')));
-                                               nest_level--;
-
-                                               if (nelems_last[nest_level] != 0 &&
-                                                       nelems[nest_level] != nelems_last[nest_level])
-                                                       ereturn(escontext, -1,
-                                                                       (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                                                        errmsg("malformed array literal: \"%s\"", str),
-                                                                        errdetail("Multidimensional arrays must have "
-                                                                                          "sub-arrays with matching "
-                                                                                          "dimensions.")));
-                                               nelems_last[nest_level] = nelems[nest_level];
-                                               nelems[nest_level] = 1;
-                                               if (nest_level == 0)
-                                                       eoArray = itemdone = true;
-                                               else
-                                               {
-                                                       /*
-                                                        * We don't set itemdone here; see comments in
-                                                        * ReadArrayStr
-                                                        */
-                                                       temp[nest_level - 1]++;
-                                               }
-                                       }
-                                       break;
-                               default:
-                                       if (!in_quotes)
-                                       {
-                                               if (*ptr == typdelim)
-                                               {
-                                                       /*
-                                                        * Delimiters can occur after an element start, an
-                                                        * element completion, a quoted element
-                                                        * completion, or a level completion.
-                                                        */
-                                                       if (parse_state != ARRAY_ELEM_STARTED &&
-                                                               parse_state != ARRAY_ELEM_COMPLETED &&
-                                                               parse_state != ARRAY_QUOTED_ELEM_COMPLETED &&
-                                                               parse_state != ARRAY_LEVEL_COMPLETED)
-                                                               ereturn(escontext, -1,
-                                                                               (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                                                                errmsg("malformed array literal: \"%s\"", str),
-                                                                                errdetail("Unexpected \"%c\" character.",
-                                                                                                  typdelim)));
-                                                       if (parse_state == ARRAY_LEVEL_COMPLETED)
-                                                               parse_state = ARRAY_LEVEL_DELIMITED;
-                                                       else
-                                                               parse_state = ARRAY_ELEM_DELIMITED;
-                                                       itemdone = true;
-                                                       nelems[nest_level - 1]++;
-                                               }
-                                               else if (!scanner_isspace(*ptr))
-                                               {
-                                                       /*
-                                                        * Other non-space characters must be after a
-                                                        * level start, after an element start, or after
-                                                        * an element delimiter. In any case we now must
-                                                        * be past an element start.
-                                                        */
-                                                       if (parse_state != ARRAY_LEVEL_STARTED &&
-                                                               parse_state != ARRAY_ELEM_STARTED &&
-                                                               parse_state != ARRAY_ELEM_DELIMITED)
-                                                               ereturn(escontext, -1,
-                                                                               (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                                                                errmsg("malformed array literal: \"%s\"", str),
-                                                                                errdetail("Unexpected array element.")));
-                                                       parse_state = ARRAY_ELEM_STARTED;
-                                               }
-                                       }
-                                       break;
-                       }
-                       if (!itemdone)
-                               ptr++;
+               if (*p == ':')
+               {
+                       /* [m:n] format */
+                       lBound[ndim] = i;
+                       p++;
+                       q = p;
+                       if (!ReadDimensionInt(&p, &ub, origStr, escontext))
+                               return false;
+                       if (p == q)                     /* no digits? */
+                               ereturn(escontext, false,
+                                               (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                                errmsg("malformed array literal: \"%s\"", origStr),
+                                                errdetail("Missing array dimension value.")));
+               }
+               else
+               {
+                       /* [n] format */
+                       lBound[ndim] = 1;
+                       ub = i;
                }
-               temp[ndim - 1]++;
-               ptr++;
+               if (*p != ']')
+                       ereturn(escontext, false,
+                                       (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                        errmsg("malformed array literal: \"%s\"", origStr),
+                                        errdetail("Missing \"%s\" after array dimensions.",
+                                                          "]")));
+               p++;
+
+               /*
+                * Note: we could accept ub = lb-1 to represent a zero-length
+                * dimension.  However, that would result in an empty array, for which
+                * we don't keep any dimension data, so that e.g. [1:0] and [101:100]
+                * would be equivalent.  Given the lack of field demand, there seems
+                * little point in allowing such cases.
+                */
+               if (ub < lBound[ndim])
+                       ereturn(escontext, false,
+                                       (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+                                        errmsg("upper bound cannot be less than lower bound")));
+
+               /* Upper bound of INT_MAX must be disallowed, cf ArrayCheckBounds() */
+               if (ub == INT_MAX)
+                       ereturn(escontext, false,
+                                       (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                                        errmsg("array upper bound is too large: %d", ub)));
+
+               /* Compute "ub - lBound[ndim] + 1", detecting overflow */
+               if (pg_sub_s32_overflow(ub, lBound[ndim], &ub) ||
+                       pg_add_s32_overflow(ub, 1, &ub))
+                       ereturn(escontext, false,
+                                       (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                                        errmsg("array size exceeds the maximum allowed (%d)",
+                                                       (int) MaxArraySize)));
+
+               dim[ndim] = ub;
+               ndim++;
        }
 
-       /* only whitespace is allowed after the closing brace */
-       while (*ptr)
+       *srcptr = p;
+       *ndim_p = ndim;
+       return true;
+}
+
+/*
+ * ReadDimensionInt
+ *      parse an integer, for the array dimensions
+ *
+ * On entry, *srcptr points to the string to parse. It is advanced past the
+ * digits of the integer. If there are no digits, returns true and leaves
+ * *srcptr unchanged.
+ *
+ * Result:
+ *     true for success, false for failure (if escontext is provided).
+ *  On success, the parsed integer is returned in *result.
+ */
+static bool
+ReadDimensionInt(char **srcptr, int *result,
+                                const char *origStr, Node *escontext)
+{
+       char       *p = *srcptr;
+       long            l;
+
+       /* don't accept leading whitespace */
+       if (!isdigit((unsigned char) *p) && *p != '-' && *p != '+')
        {
-               if (!scanner_isspace(*ptr++))
-                       ereturn(escontext, -1,
-                                       (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                        errmsg("malformed array literal: \"%s\"", str),
-                                        errdetail("Junk after closing right brace.")));
+               *result = 0;
+               return true;
        }
 
-       /* special case for an empty array */
-       if (empty_array)
-               return 0;
+       errno = 0;
+       l = strtol(p, srcptr, 10);
 
-       for (i = 0; i < ndim; ++i)
-               dim[i] = temp[i];
+       if (errno == ERANGE || l > PG_INT32_MAX || l < PG_INT32_MIN)
+               ereturn(escontext, false,
+                               (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                                errmsg("array bound is out of integer range")));
 
-       return ndim;
+       *result = (int) l;
+       return true;
 }
 
 /*
  * ReadArrayStr :
- *      parses the array string pointed to by "arrayStr" and converts the values
- *      to internal format.  Unspecified elements are initialized to nulls.
- *      The array dimensions must already have been determined.
+ *      parses the array string pointed to by *srcptr and converts the values
+ *      to internal format.  Determines the array dimensions as it goes.
  *
- * Inputs:
- *     arrayStr: the string to parse.
- *                       CAUTION: the contents of "arrayStr" will be modified!
- *     origStr: the unmodified input string, used only in error messages.
- *     nitems: total number of array elements, as already determined.
- *     ndim: number of array dimensions
- *     dim[]: array axis lengths
+ * On entry, *srcptr points to the string to parse (it must point to a '{').
+ * On successful return, it is advanced to point past the closing '}'.
+ *
+ * If dimensions were specified explicitly, they are passed in *ndim_p and
+ * dim[].  This function will check that the array values match the specified
+ * dimensions.  If dimensions were not given, caller must pass *ndim_p == 0
+ * and initialize all elements of dim[] to -1.  Then this function will
+ * deduce the dimensions from the structure of the input and store them in
+ * *ndim_p and the dim[] array.
+ *
+ * Element type information:
  *     inputproc: type-specific input procedure for element datatype.
  *     typioparam, typmod: auxiliary values to pass to inputproc.
  *     typdelim: the value delimiter (type-specific).
  *     typlen, typbyval, typalign: storage parameters of element datatype.
  *
  * Outputs:
- *     values[]: filled with converted data values.
- *     nulls[]: filled with is-null markers.
- *     *hasnulls: set true iff there are any null elements.
- *     *nbytes: set to total size of data area needed (including alignment
- *             padding but not including array header overhead).
- *     *escontext: if this points to an ErrorSaveContext, details of
- *             any error are reported there.
+ *  *ndim_p, dim: dimensions deduced from the input structure.
+ *  *nitems_p: total number of elements.
+ *     *values_p[]: palloc'd array, filled with converted data values.
+ *     *nulls_p[]: palloc'd array, filled with is-null markers.
+ *
+ * 'origStr' is the original input string, used only in error messages.
+ * If *escontext points to an ErrorSaveContext, details of any error are
+ * reported there.
  *
  * Result:
  *     true for success, false for failure (if escontext is provided).
- *
- * Note that values[] and nulls[] are allocated by the caller, and must have
- * nitems elements.
  */
 static bool
-ReadArrayStr(char *arrayStr,
-                        const char *origStr,
-                        int nitems,
-                        int ndim,
-                        int *dim,
+ReadArrayStr(char **srcptr,
                         FmgrInfo *inputproc,
                         Oid typioparam,
                         int32 typmod,
@@ -727,224 +585,363 @@ ReadArrayStr(char *arrayStr,
                         int typlen,
                         bool typbyval,
                         char typalign,
-                        Datum *values,
-                        bool *nulls,
-                        bool *hasnulls,
-                        int32 *nbytes,
+                        int *ndim_p,
+                        int *dim,
+                        int *nitems_p,
+                        Datum **values_p,
+                        bool **nulls_p,
+                        const char *origStr,
                         Node *escontext)
 {
-       int                     i,
-                               nest_level = 0;
-       char       *srcptr;
-       bool            in_quotes = false;
-       bool            eoArray = false;
-       bool            hasnull;
-       int32           totbytes;
-       int                     indx[MAXDIM] = {0},
-                               prod[MAXDIM];
+       int                     ndim = *ndim_p;
+       bool            dimensions_specified = (ndim != 0);
+       int                     maxitems;
+       Datum      *values;
+       bool       *nulls;
+       StringInfoData elembuf;
+       int                     nest_level;
+       int                     nitems;
+       bool            ndim_frozen;
+       bool            expect_delim;
+       int                     nelems[MAXDIM];
+
+       /* Allocate some starting output workspace; we'll enlarge as needed */
+       maxitems = 16;
+       values = palloc_array(Datum, maxitems);
+       nulls = palloc_array(bool, maxitems);
+
+       /* Allocate workspace to hold (string representation of) one element */
+       initStringInfo(&elembuf);
+
+       /* Loop below assumes first token is ATOK_LEVEL_START */
+       Assert(**srcptr == '{');
+
+       /* Parse tokens until we reach the matching right brace */
+       nest_level = 0;
+       nitems = 0;
+       ndim_frozen = dimensions_specified;
+       expect_delim = false;
+       do
+       {
+               ArrayToken      tok;
 
-       mda_get_prod(ndim, dim, prod);
+               tok = ReadArrayToken(srcptr, &elembuf, typdelim, origStr, escontext);
 
-       /* Initialize is-null markers to true */
-       memset(nulls, true, nitems * sizeof(bool));
+               switch (tok)
+               {
+                       case ATOK_LEVEL_START:
+                               /* Can't write left brace where delim is expected */
+                               if (expect_delim)
+                                       ereturn(escontext, false,
+                                                       (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                                        errmsg("malformed array literal: \"%s\"", origStr),
+                                                        errdetail("Unexpected \"%c\" character.", '{')));
 
-       /*
-        * We have to remove " and \ characters to create a clean item value to
-        * pass to the datatype input routine.  We overwrite each item value
-        * in-place within arrayStr to do this.  srcptr is the current scan point,
-        * and dstptr is where we are copying to.
-        *
-        * We also want to suppress leading and trailing unquoted whitespace. We
-        * use the leadingspace flag to suppress leading space.  Trailing space is
-        * tracked by using dstendptr to point to the last significant output
-        * character.
-        *
-        * The error checking in this routine is mostly pro-forma, since we expect
-        * that ArrayCount() already validated the string.  So we don't bother
-        * with errdetail messages.
-        */
-       srcptr = arrayStr;
-       while (!eoArray)
-       {
-               bool            itemdone = false;
-               bool            leadingspace = true;
-               bool            hasquoting = false;
-               char       *itemstart;
-               char       *dstptr;
-               char       *dstendptr;
+                               /* Initialize element counting in the new level */
+                               if (nest_level >= MAXDIM)
+                                       ereturn(escontext, false,
+                                                       (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                                                        errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
+                                                                       nest_level + 1, MAXDIM)));
 
-               i = -1;
-               itemstart = dstptr = dstendptr = srcptr;
+                               nelems[nest_level] = 0;
+                               nest_level++;
+                               if (nest_level > ndim)
+                               {
+                                       /* Can't increase ndim once it's frozen */
+                                       if (ndim_frozen)
+                                               goto dimension_error;
+                                       ndim = nest_level;
+                               }
+                               break;
 
-               while (!itemdone)
-               {
-                       switch (*srcptr)
-                       {
-                               case '\0':
-                                       /* Signal a premature end of the string */
+                       case ATOK_LEVEL_END:
+                               /* Can't get here with nest_level == 0 */
+                               Assert(nest_level > 0);
+
+                               /*
+                                * We allow a right brace to terminate an empty sub-array,
+                                * otherwise it must occur where we expect a delimiter.
+                                */
+                               if (nelems[nest_level - 1] > 0 && !expect_delim)
                                        ereturn(escontext, false,
                                                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                                        errmsg("malformed array literal: \"%s\"",
-                                                                       origStr)));
-                                       break;
-                               case '\\':
-                                       /* Skip backslash, copy next character as-is. */
-                                       srcptr++;
-                                       if (*srcptr == '\0')
+                                                        errmsg("malformed array literal: \"%s\"", origStr),
+                                                        errdetail("Unexpected \"%c\" character.",
+                                                                          '}')));
+                               nest_level--;
+                               /* Nested sub-arrays count as elements of outer level */
+                               if (nest_level > 0)
+                                       nelems[nest_level - 1]++;
+
+                               /*
+                                * Note: if we had dimensionality info, then dim[nest_level]
+                                * is initially non-negative, and we'll check each sub-array's
+                                * length against that.
+                                */
+                               if (dim[nest_level] < 0)
+                               {
+                                       /* Save length of first sub-array of this level */
+                                       dim[nest_level] = nelems[nest_level];
+                               }
+                               else if (nelems[nest_level] != dim[nest_level])
+                               {
+                                       /* Subsequent sub-arrays must have same length */
+                                       goto dimension_error;
+                               }
+
+                               /*
+                                * Must have a delim or another right brace following, unless
+                                * we have reached nest_level 0, where this won't matter.
+                                */
+                               expect_delim = true;
+                               break;
+
+                       case ATOK_DELIM:
+                               if (!expect_delim)
+                                       ereturn(escontext, false,
+                                                       (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                                        errmsg("malformed array literal: \"%s\"", origStr),
+                                                        errdetail("Unexpected \"%c\" character.",
+                                                                          typdelim)));
+                               expect_delim = false;
+                               break;
+
+                       case ATOK_ELEM:
+                       case ATOK_ELEM_NULL:
+                               /* Can't get here with nest_level == 0 */
+                               Assert(nest_level > 0);
+
+                               /* Disallow consecutive ELEM tokens */
+                               if (expect_delim)
+                                       ereturn(escontext, false,
+                                                       (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                                        errmsg("malformed array literal: \"%s\"", origStr),
+                                                        errdetail("Unexpected array element.")));
+
+                               /* Enlarge the values/nulls arrays if needed */
+                               if (nitems >= maxitems)
+                               {
+                                       if (maxitems >= MaxArraySize)
                                                ereturn(escontext, false,
-                                                               (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                                                errmsg("malformed array literal: \"%s\"",
-                                                                               origStr)));
-                                       *dstptr++ = *srcptr++;
-                                       /* Treat the escaped character as non-whitespace */
-                                       leadingspace = false;
-                                       dstendptr = dstptr;
-                                       hasquoting = true;      /* can't be a NULL marker */
-                                       break;
-                               case '"':
-                                       in_quotes = !in_quotes;
-                                       if (in_quotes)
-                                               leadingspace = false;
-                                       else
-                                       {
-                                               /*
-                                                * Advance dstendptr when we exit in_quotes; this
-                                                * saves having to do it in all the other in_quotes
-                                                * cases.
-                                                */
-                                               dstendptr = dstptr;
-                                       }
-                                       hasquoting = true;      /* can't be a NULL marker */
-                                       srcptr++;
-                                       break;
-                               case '{':
-                                       if (!in_quotes)
-                                       {
-                                               if (nest_level >= ndim)
-                                                       ereturn(escontext, false,
-                                                                       (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                                                        errmsg("malformed array literal: \"%s\"",
-                                                                                       origStr)));
-                                               nest_level++;
-                                               indx[nest_level - 1] = 0;
-                                               srcptr++;
-                                       }
-                                       else
-                                               *dstptr++ = *srcptr++;
-                                       break;
-                               case '}':
-                                       if (!in_quotes)
-                                       {
-                                               if (nest_level == 0)
-                                                       ereturn(escontext, false,
-                                                                       (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                                                        errmsg("malformed array literal: \"%s\"",
-                                                                                       origStr)));
-                                               if (i == -1)
-                                                       i = ArrayGetOffset0(ndim, indx, prod);
-                                               indx[nest_level - 1] = 0;
-                                               nest_level--;
-                                               if (nest_level == 0)
-                                                       eoArray = itemdone = true;
-                                               else
-                                                       indx[nest_level - 1]++;
-                                               srcptr++;
-                                       }
-                                       else
-                                               *dstptr++ = *srcptr++;
-                                       break;
-                               default:
-                                       if (in_quotes)
-                                               *dstptr++ = *srcptr++;
-                                       else if (*srcptr == typdelim)
-                                       {
-                                               if (i == -1)
-                                                       i = ArrayGetOffset0(ndim, indx, prod);
-                                               itemdone = true;
-                                               indx[ndim - 1]++;
-                                               srcptr++;
-                                       }
-                                       else if (scanner_isspace(*srcptr))
-                                       {
-                                               /*
-                                                * If leading space, drop it immediately.  Else, copy
-                                                * but don't advance dstendptr.
-                                                */
-                                               if (leadingspace)
-                                                       srcptr++;
-                                               else
-                                                       *dstptr++ = *srcptr++;
-                                       }
-                                       else
-                                       {
-                                               *dstptr++ = *srcptr++;
-                                               leadingspace = false;
-                                               dstendptr = dstptr;
-                                       }
-                                       break;
-                       }
+                                                               (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                                                                errmsg("array size exceeds the maximum allowed (%d)",
+                                                                               (int) MaxArraySize)));
+                                       maxitems = Min(maxitems * 2, MaxArraySize);
+                                       values = repalloc_array(values, Datum, maxitems);
+                                       nulls = repalloc_array(nulls, bool, maxitems);
+                               }
+
+                               /* Read the element's value, or check that NULL is allowed */
+                               if (!InputFunctionCallSafe(inputproc,
+                                                                                  (tok == ATOK_ELEM_NULL) ? NULL : elembuf.data,
+                                                                                  typioparam, typmod,
+                                                                                  escontext,
+                                                                                  &values[nitems]))
+                                       return false;
+                               nulls[nitems] = (tok == ATOK_ELEM_NULL);
+                               nitems++;
+
+                               /*
+                                * Once we have found an element, the number of dimensions can
+                                * no longer increase, and subsequent elements must all be at
+                                * the same nesting depth.
+                                */
+                               ndim_frozen = true;
+                               if (nest_level != ndim)
+                                       goto dimension_error;
+                               /* Count the new element */
+                               nelems[nest_level - 1]++;
+
+                               /* Must have a delim or a right brace following */
+                               expect_delim = true;
+                               break;
+
+                       case ATOK_ERROR:
+                               return false;
                }
+       } while (nest_level > 0);
 
-               Assert(dstptr < srcptr);
-               *dstendptr = '\0';
+       /* Clean up and return results */
+       pfree(elembuf.data);
 
-               if (i < 0 || i >= nitems)
-                       ereturn(escontext, false,
-                                       (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                        errmsg("malformed array literal: \"%s\"",
-                                                       origStr)));
+       *ndim_p = ndim;
+       *nitems_p = nitems;
+       *values_p = values;
+       *nulls_p = nulls;
+       return true;
+
+dimension_error:
+       if (dimensions_specified)
+               ereturn(escontext, false,
+                               (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                errmsg("malformed array literal: \"%s\"", origStr),
+                                errdetail("Specified array dimensions do not match array contents.")));
+       else
+               ereturn(escontext, false,
+                               (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                errmsg("malformed array literal: \"%s\"", origStr),
+                                errdetail("Multidimensional arrays must have sub-arrays with matching dimensions.")));
+}
 
-               if (Array_nulls && !hasquoting &&
-                       pg_strcasecmp(itemstart, "NULL") == 0)
+/*
+ * ReadArrayToken
+ *      read one token from an array value string
+ *
+ * Starts scanning from *srcptr.  On non-error return, *srcptr is
+ * advanced past the token.
+ *
+ * If the token is ATOK_ELEM, the de-escaped string is returned in elembuf.
+ */
+static ArrayToken
+ReadArrayToken(char **srcptr, StringInfo elembuf, char typdelim,
+                          const char *origStr, Node *escontext)
+{
+       char       *p = *srcptr;
+       int                     dstlen;
+       bool            has_escapes;
+
+       resetStringInfo(elembuf);
+
+       /* Identify token type.  Loop advances over leading whitespace. */
+       for (;;)
+       {
+               switch (*p)
                {
-                       /* it's a NULL item */
-                       if (!InputFunctionCallSafe(inputproc, NULL,
-                                                                          typioparam, typmod,
-                                                                          escontext,
-                                                                          &values[i]))
-                               return false;
-                       nulls[i] = true;
+                       case '\0':
+                               goto ending_error;
+                       case '{':
+                               *srcptr = p + 1;
+                               return ATOK_LEVEL_START;
+                       case '}':
+                               *srcptr = p + 1;
+                               return ATOK_LEVEL_END;
+                       case '"':
+                               p++;
+                               goto quoted_element;
+                       default:
+                               if (*p == typdelim)
+                               {
+                                       *srcptr = p + 1;
+                                       return ATOK_DELIM;
+                               }
+                               if (scanner_isspace(*p))
+                               {
+                                       p++;
+                                       continue;
+                               }
+                               goto unquoted_element;
                }
-               else
+       }
+
+quoted_element:
+       for (;;)
+       {
+               switch (*p)
                {
-                       if (!InputFunctionCallSafe(inputproc, itemstart,
-                                                                          typioparam, typmod,
-                                                                          escontext,
-                                                                          &values[i]))
-                               return false;
-                       nulls[i] = false;
+                       case '\0':
+                               goto ending_error;
+                       case '\\':
+                               /* Skip backslash, copy next character as-is. */
+                               p++;
+                               if (*p == '\0')
+                                       goto ending_error;
+                               appendStringInfoChar(elembuf, *p++);
+                               break;
+                       case '"':
+
+                               /*
+                                * If next non-whitespace isn't typdelim or a brace, complain
+                                * about incorrect quoting.  While we could leave such cases
+                                * to be detected as incorrect token sequences, the resulting
+                                * message wouldn't be as helpful.  (We could also give the
+                                * incorrect-quoting error when next is '{', but treating that
+                                * as a token sequence error seems better.)
+                                */
+                               while (*(++p) != '\0')
+                               {
+                                       if (*p == typdelim || *p == '}' || *p == '{')
+                                       {
+                                               *srcptr = p;
+                                               return ATOK_ELEM;
+                                       }
+                                       if (!scanner_isspace(*p))
+                                               ereturn(escontext, ATOK_ERROR,
+                                                               (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                                                errmsg("malformed array literal: \"%s\"", origStr),
+                                                                errdetail("Incorrectly quoted array element.")));
+                               }
+                               goto ending_error;
+                       default:
+                               appendStringInfoChar(elembuf, *p++);
+                               break;
                }
        }
 
+unquoted_element:
+
        /*
-        * Check for nulls, compute total data space needed
+        * We don't include trailing whitespace in the result.  dstlen tracks how
+        * much of the output string is known to not be trailing whitespace.
         */
-       hasnull = false;
-       totbytes = 0;
-       for (i = 0; i < nitems; i++)
+       dstlen = 0;
+       has_escapes = false;
+       for (;;)
        {
-               if (nulls[i])
-                       hasnull = true;
-               else
+               switch (*p)
                {
-                       /* let's just make sure data is not toasted */
-                       if (typlen == -1)
-                               values[i] = PointerGetDatum(PG_DETOAST_DATUM(values[i]));
-                       totbytes = att_addlength_datum(totbytes, typlen, values[i]);
-                       totbytes = att_align_nominal(totbytes, typalign);
-                       /* check for overflow of total request */
-                       if (!AllocSizeIsValid(totbytes))
-                               ereturn(escontext, false,
-                                               (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-                                                errmsg("array size exceeds the maximum allowed (%d)",
-                                                               (int) MaxAllocSize)));
+                       case '\0':
+                               goto ending_error;
+                       case '{':
+                               ereturn(escontext, ATOK_ERROR,
+                                               (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                                errmsg("malformed array literal: \"%s\"", origStr),
+                                                errdetail("Unexpected \"%c\" character.",
+                                                                  '{')));
+                       case '"':
+                               /* Must double-quote all or none of an element. */
+                               ereturn(escontext, ATOK_ERROR,
+                                               (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                                errmsg("malformed array literal: \"%s\"", origStr),
+                                                errdetail("Incorrectly quoted array element.")));
+                       case '\\':
+                               /* Skip backslash, copy next character as-is. */
+                               p++;
+                               if (*p == '\0')
+                                       goto ending_error;
+                               appendStringInfoChar(elembuf, *p++);
+                               dstlen = elembuf->len;  /* treat it as non-whitespace */
+                               has_escapes = true;
+                               break;
+                       default:
+                               /* End of elem? */
+                               if (*p == typdelim || *p == '}')
+                               {
+                                       /* hack: truncate the output string to dstlen */
+                                       elembuf->data[dstlen] = '\0';
+                                       elembuf->len = dstlen;
+                                       *srcptr = p;
+                                       /* Check if it's unquoted "NULL" */
+                                       if (Array_nulls && !has_escapes &&
+                                               pg_strcasecmp(elembuf->data, "NULL") == 0)
+                                               return ATOK_ELEM_NULL;
+                                       else
+                                               return ATOK_ELEM;
+                               }
+                               appendStringInfoChar(elembuf, *p);
+                               if (!scanner_isspace(*p))
+                                       dstlen = elembuf->len;
+                               p++;
+                               break;
                }
        }
-       *hasnulls = hasnull;
-       *nbytes = totbytes;
-       return true;
-}
 
+ending_error:
+       ereturn(escontext, ATOK_ERROR,
+                       (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                        errmsg("malformed array literal: \"%s\"", origStr),
+                        errdetail("Unexpected end of input.")));
+}
 
 /*
  * Copy data into an array object from a temporary array of Datums.
index aed799234cdee3c096a10b57a977c0c90cd631d8..62439715433361a190c455739d16da4299ed2af4 100644 (file)
@@ -43,21 +43,6 @@ ArrayGetOffset(int n, const int *dim, const int *lb, const int *indx)
        return offset;
 }
 
-/*
- * Same, but subscripts are assumed 0-based, and use a scale array
- * instead of raw dimension data (see mda_get_prod to create scale array)
- */
-int
-ArrayGetOffset0(int n, const int *tup, const int *scale)
-{
-       int                     i,
-                               lin = 0;
-
-       for (i = 0; i < n; i++)
-               lin += tup[i] * scale[i];
-       return lin;
-}
-
 /*
  * Convert array dimensions into number of elements
  *
index e6c8d88d9f20c5ac7253b52e219fa61ab8fe5e06..7308007185e04ab9ff9cd43aa60c5a711f7ef6c7 100644 (file)
@@ -455,7 +455,6 @@ extern void array_free_iterator(ArrayIterator iterator);
  */
 
 extern int     ArrayGetOffset(int n, const int *dim, const int *lb, const int *indx);
-extern int     ArrayGetOffset0(int n, const int *tup, const int *scale);
 extern int     ArrayGetNItems(int ndim, const int *dims);
 extern int     ArrayGetNItemsSafe(int ndim, const int *dims,
                                                           struct Node *escontext);
index 957498432d632ddef615218e5fb76a0763887c35..23404982f71739c258e516853e247430ac998fba 100644 (file)
@@ -1492,21 +1492,36 @@ select '{{1,{2}},{2,3}}'::text[];
 ERROR:  malformed array literal: "{{1,{2}},{2,3}}"
 LINE 1: select '{{1,{2}},{2,3}}'::text[];
                ^
-DETAIL:  Unexpected "{" character.
-select '{{},{}}'::text[];
-ERROR:  malformed array literal: "{{},{}}"
-LINE 1: select '{{},{}}'::text[];
-               ^
-DETAIL:  Unexpected "}" character.
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
 select E'{{1,2},\\{2,3}}'::text[];
 ERROR:  malformed array literal: "{{1,2},\{2,3}}"
 LINE 1: select E'{{1,2},\\{2,3}}'::text[];
                ^
-DETAIL:  Unexpected "\" character.
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '{"a"b}'::text[];
+ERROR:  malformed array literal: "{"a"b}"
+LINE 1: select '{"a"b}'::text[];
+               ^
+DETAIL:  Incorrectly quoted array element.
+select '{a"b"}'::text[];
+ERROR:  malformed array literal: "{a"b"}"
+LINE 1: select '{a"b"}'::text[];
+               ^
+DETAIL:  Incorrectly quoted array element.
+select '{"a""b"}'::text[];
+ERROR:  malformed array literal: "{"a""b"}"
+LINE 1: select '{"a""b"}'::text[];
+               ^
+DETAIL:  Incorrectly quoted array element.
 select '{{"1 2" x},{3}}'::text[];
 ERROR:  malformed array literal: "{{"1 2" x},{3}}"
 LINE 1: select '{{"1 2" x},{3}}'::text[];
                ^
+DETAIL:  Incorrectly quoted array element.
+select '{{"1 2"} x,{3}}'::text[];
+ERROR:  malformed array literal: "{{"1 2"} x,{3}}"
+LINE 1: select '{{"1 2"} x,{3}}'::text[];
+               ^
 DETAIL:  Unexpected array element.
 select '{}}'::text[];
 ERROR:  malformed array literal: "{}}"
@@ -1518,11 +1533,116 @@ ERROR:  malformed array literal: "{ }}"
 LINE 1: select '{ }}'::text[];
                ^
 DETAIL:  Junk after closing right brace.
+select '}{'::text[];
+ERROR:  malformed array literal: "}{"
+LINE 1: select '}{'::text[];
+               ^
+DETAIL:  Array value must start with "{" or dimension information.
+select '{foo{}}'::text[];
+ERROR:  malformed array literal: "{foo{}}"
+LINE 1: select '{foo{}}'::text[];
+               ^
+DETAIL:  Unexpected "{" character.
+select '{"foo"{}}'::text[];
+ERROR:  malformed array literal: "{"foo"{}}"
+LINE 1: select '{"foo"{}}'::text[];
+               ^
+DETAIL:  Unexpected "{" character.
+select '{foo,,bar}'::text[];
+ERROR:  malformed array literal: "{foo,,bar}"
+LINE 1: select '{foo,,bar}'::text[];
+               ^
+DETAIL:  Unexpected "," character.
+select '{{1},{{2}}}'::text[];
+ERROR:  malformed array literal: "{{1},{{2}}}"
+LINE 1: select '{{1},{{2}}}'::text[];
+               ^
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '{{{1}},{2}}'::text[];
+ERROR:  malformed array literal: "{{{1}},{2}}"
+LINE 1: select '{{{1}},{2}}'::text[];
+               ^
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '{{},{{}}}'::text[];
+ERROR:  malformed array literal: "{{},{{}}}"
+LINE 1: select '{{},{{}}}'::text[];
+               ^
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '{{{}},{}}'::text[];
+ERROR:  malformed array literal: "{{{}},{}}"
+LINE 1: select '{{{}},{}}'::text[];
+               ^
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '{{1},{}}'::text[];
+ERROR:  malformed array literal: "{{1},{}}"
+LINE 1: select '{{1},{}}'::text[];
+               ^
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '{{},{1}}'::text[];
+ERROR:  malformed array literal: "{{},{1}}"
+LINE 1: select '{{},{1}}'::text[];
+               ^
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '[1:0]={}'::int[];
+ERROR:  upper bound cannot be less than lower bound
+LINE 1: select '[1:0]={}'::int[];
+               ^
+select '[2147483646:2147483647]={1,2}'::int[];
+ERROR:  array upper bound is too large: 2147483647
+LINE 1: select '[2147483646:2147483647]={1,2}'::int[];
+               ^
+select '[1:-1]={}'::int[];
+ERROR:  upper bound cannot be less than lower bound
+LINE 1: select '[1:-1]={}'::int[];
+               ^
+select '[2]={1}'::int[];
+ERROR:  malformed array literal: "[2]={1}"
+LINE 1: select '[2]={1}'::int[];
+               ^
+DETAIL:  Specified array dimensions do not match array contents.
+select '[1:]={1}'::int[];
+ERROR:  malformed array literal: "[1:]={1}"
+LINE 1: select '[1:]={1}'::int[];
+               ^
+DETAIL:  Missing array dimension value.
+select '[:1]={1}'::int[];
+ERROR:  malformed array literal: "[:1]={1}"
+LINE 1: select '[:1]={1}'::int[];
+               ^
+DETAIL:  "[" must introduce explicitly-specified array dimensions.
 select array[];
 ERROR:  cannot determine type of empty array
 LINE 1: select array[];
                ^
 HINT:  Explicitly cast to the desired type, for example ARRAY[]::integer[].
+select '{{1,},{1},}'::text[];
+ERROR:  malformed array literal: "{{1,},{1},}"
+LINE 1: select '{{1,},{1},}'::text[];
+               ^
+DETAIL:  Unexpected "}" character.
+select '{{1,},{1}}'::text[];
+ERROR:  malformed array literal: "{{1,},{1}}"
+LINE 1: select '{{1,},{1}}'::text[];
+               ^
+DETAIL:  Unexpected "}" character.
+select '{{1,}}'::text[];
+ERROR:  malformed array literal: "{{1,}}"
+LINE 1: select '{{1,}}'::text[];
+               ^
+DETAIL:  Unexpected "}" character.
+select '{1,}'::text[];
+ERROR:  malformed array literal: "{1,}"
+LINE 1: select '{1,}'::text[];
+               ^
+DETAIL:  Unexpected "}" character.
+select '[21474836488:21474836489]={1,2}'::int[];
+ERROR:  array bound is out of integer range
+LINE 1: select '[21474836488:21474836489]={1,2}'::int[];
+               ^
+select '[-2147483649:-2147483648]={1,2}'::int[];
+ERROR:  array bound is out of integer range
+LINE 1: select '[-2147483649:-2147483648]={1,2}'::int[];
+               ^
 -- none of the above should be accepted
 -- all of the following should be accepted
 select '{}'::text[];
@@ -1531,12 +1651,30 @@ select '{}'::text[];
  {}
 (1 row)
 
+select '{{},{}}'::text[];
+ text 
+------
+ {}
+(1 row)
+
 select '{{{1,2,3,4},{2,3,4,5}},{{3,4,5,6},{4,5,6,7}}}'::text[];
                      text                      
 -----------------------------------------------
  {{{1,2,3,4},{2,3,4,5}},{{3,4,5,6},{4,5,6,7}}}
 (1 row)
 
+select '{null,n\ull,"null"}'::text[];
+         text         
+----------------------
+ {NULL,"null","null"}
+(1 row)
+
+select '{ ab\c , "ab\"c" }'::text[];
+     text      
+---------------
+ {abc,"ab\"c"}
+(1 row)
+
 select '{0 second  ,0 second}'::interval[];
    interval    
 ---------------
@@ -1570,12 +1708,30 @@ select array[]::text[];
  {}
 (1 row)
 
+select '[2]={1,7}'::int[];
+ int4  
+-------
+ {1,7}
+(1 row)
+
 select '[0:1]={1.1,2.2}'::float8[];
      float8      
 -----------------
  [0:1]={1.1,2.2}
 (1 row)
 
+select '[2147483646:2147483646]={1}'::int[];
+            int4             
+-----------------------------
+ [2147483646:2147483646]={1}
+(1 row)
+
+select '[-2147483648:-2147483647]={1,2}'::int[];
+              int4               
+---------------------------------
+ [-2147483648:-2147483647]={1,2}
+(1 row)
+
 -- all of the above should be accepted
 -- tests for array aggregates
 CREATE TEMP TABLE arraggtest ( f1 INT[], f2 TEXT[][], f3 FLOAT[]);
index daf805c38271d60cef3bec3878f24c27382304ca..50aa539fdc1e252e3edcc2cad496921e4125fd8d 100644 (file)
@@ -473,17 +473,45 @@ select 'foo' ilike all (array['F%', '%O']); -- t
 
 -- none of the following should be accepted
 select '{{1,{2}},{2,3}}'::text[];
-select '{{},{}}'::text[];
 select E'{{1,2},\\{2,3}}'::text[];
+select '{"a"b}'::text[];
+select '{a"b"}'::text[];
+select '{"a""b"}'::text[];
 select '{{"1 2" x},{3}}'::text[];
+select '{{"1 2"} x,{3}}'::text[];
 select '{}}'::text[];
 select '{ }}'::text[];
+select '}{'::text[];
+select '{foo{}}'::text[];
+select '{"foo"{}}'::text[];
+select '{foo,,bar}'::text[];
+select '{{1},{{2}}}'::text[];
+select '{{{1}},{2}}'::text[];
+select '{{},{{}}}'::text[];
+select '{{{}},{}}'::text[];
+select '{{1},{}}'::text[];
+select '{{},{1}}'::text[];
+select '[1:0]={}'::int[];
+select '[2147483646:2147483647]={1,2}'::int[];
+select '[1:-1]={}'::int[];
+select '[2]={1}'::int[];
+select '[1:]={1}'::int[];
+select '[:1]={1}'::int[];
 select array[];
+select '{{1,},{1},}'::text[];
+select '{{1,},{1}}'::text[];
+select '{{1,}}'::text[];
+select '{1,}'::text[];
+select '[21474836488:21474836489]={1,2}'::int[];
+select '[-2147483649:-2147483648]={1,2}'::int[];
 -- none of the above should be accepted
 
 -- all of the following should be accepted
 select '{}'::text[];
+select '{{},{}}'::text[];
 select '{{{1,2,3,4},{2,3,4,5}},{{3,4,5,6},{4,5,6,7}}}'::text[];
+select '{null,n\ull,"null"}'::text[];
+select '{ ab\c , "ab\"c" }'::text[];
 select '{0 second  ,0 second}'::interval[];
 select '{ { "," } , { 3 } }'::text[];
 select '  {   {  "  0 second  "   ,  0 second  }   }'::text[];
@@ -492,7 +520,10 @@ select '{
            @ 1 hour @ 42 minutes @ 20 seconds
          }'::interval[];
 select array[]::text[];
+select '[2]={1,7}'::int[];
 select '[0:1]={1.1,2.2}'::float8[];
+select '[2147483646:2147483646]={1}'::int[];
+select '[-2147483648:-2147483647]={1,2}'::int[];
 -- all of the above should be accepted
 
 -- tests for array aggregates
index bf50a321198716810756819b300d24af82c08c91..92c0003ab19e15924af19e05069fb40b00d7ef7c 100644 (file)
@@ -148,8 +148,8 @@ ArrayIOData
 ArrayIterator
 ArrayMapState
 ArrayMetaState
-ArrayParseState
 ArraySubWorkspace
+ArrayToken
 ArrayType
 AsyncQueueControl
 AsyncQueueEntry