Collations with nondeterministic comparison

author Peter Eisentraut <[email protected]>
Fri, 22 Mar 2019 11:09:32 +0000 (12:09 +0100)
committer Peter Eisentraut <[email protected]>
Fri, 22 Mar 2019 11:12:43 +0000 (12:12 +0100)
diff --git a/contrib/bloom/bloom.h b/contrib/bloom/bloom.h
index d641361aef1f1003c2c37485665a8aae6d2b22d0..7c18eaa50866193f4b882e27169d3f1e7ecca459 100644 (file)
--- a/contrib/bloom/bloom.h
+++ b/contrib/bloom/bloom.h
@@ -137,6 +137,7 @@ typedef struct BloomMetaPageData
  typedef struct BloomState
  {
     FmgrInfo    hashFn[INDEX_MAX_KEYS];
+   Oid         collations[INDEX_MAX_KEYS];
     BloomOptions opts;          /* copy of options on index's metapage */
     int32       nColumns;
  
diff --git a/contrib/bloom/blutils.c b/contrib/bloom/blutils.c
index 645837657871575628d92e1a83927b8341478a57..d078dfbd469bc470ba34179e1477efccf3084bad 100644 (file)
--- a/contrib/bloom/blutils.c
+++ b/contrib/bloom/blutils.c
@@ -163,6 +163,7 @@ initBloomState(BloomState *state, Relation index)
         fmgr_info_copy(&(state->hashFn[i]),
                        index_getprocinfo(index, i + 1, BLOOM_HASH_PROC),
                        CurrentMemoryContext);
+       state->collations[i] = index->rd_indcollation[i];
     }
  
     /* Initialize amcache if needed with options from metapage */
@@ -267,7 +268,7 @@ signValue(BloomState *state, BloomSignatureWord *sign, Datum value, int attno)
      * different columns will be mapped into different bits because of step
      * above
      */
-   hashVal = DatumGetInt32(FunctionCall1(&state->hashFn[attno], value));
+   hashVal = DatumGetInt32(FunctionCall1Coll(&state->hashFn[attno], state->collations[attno], value));
     mySrand(hashVal ^ myRand());
  
     for (j = 0; j < state->opts.bitSize[attno]; j++)
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index 0fd792ff1a2f92adc451be4329df3a56a9c1b90d..45ed077654e3086a0587854ad2f50563198f6ebc 100644 (file)
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -2077,6 +2077,13 @@ SCRAM-SHA-256$<replaceable>&lt;iteration count&gt;</replaceable>:<replaceable>&l
         default, <literal>c</literal> = libc, <literal>i</literal> = icu</entry>
       </row>
  
+     <row>
+      <entry><structfield>collisdeterministic</structfield></entry>
+      <entry><type>bool</type></entry>
+      <entry></entry>
+      <entry>Is the collation deterministic?</entry>
+     </row>
+
       <row>
        <entry><structfield>collencoding</structfield></entry>
        <entry><type>int4</type></entry>
diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml
index a6143ef8a744e8db8c9024e07902a14a47a3da59..555d1b4ac6316a9d06f1039340a552b9e0116b2b 100644 (file)
--- a/doc/src/sgml/charset.sgml
+++ b/doc/src/sgml/charset.sgml
@@ -847,11 +847,13 @@ CREATE COLLATION german (provider = libc, locale = 'de_DE');
  
     <para>
      Note that while this system allows creating collations that <quote>ignore
-    case</quote> or <quote>ignore accents</quote> or similar (using
-    the <literal>ks</literal> key), PostgreSQL does not at the moment allow
-    such collations to act in a truly case- or accent-insensitive manner.  Any
-    strings that compare equal according to the collation but are not
-    byte-wise equal will be sorted according to their byte values.
+    case</quote> or <quote>ignore accents</quote> or similar (using the
+    <literal>ks</literal> key), in order for such collations to act in a
+    truly case- or accent-insensitive manner, they also need to be declared as not
+    <firstterm>deterministic</firstterm> in <command>CREATE COLLATION</command>;
+    see <xref linkend="collation-nondeterministic"/>.
+    Otherwise, any strings that compare equal according to the collation but
+    are not byte-wise equal will be sorted according to their byte values.
     </para>
  
     <note>
@@ -883,6 +885,55 @@ CREATE COLLATION french FROM "fr-x-icu";
     </para>
     </sect4>
     </sect3>
+
+   <sect3 id="collation-nondeterministic">
+    <title>Nondeterminstic Collations</title>
+
+    <para>
+     A collation is either <firstterm>deterministic</firstterm> or
+     <firstterm>nondeterministic</firstterm>.  A deterministic collation uses
+     deterministic comparisons, which means that it considers strings to be
+     equal only if they consist of the same byte sequence.  Nondeterministic
+     comparison may determine strings to be equal even if they consist of
+     different bytes.  Typical situations include case-insensitive comparison,
+     accent-insensitive comparison, as well as comparion of strings in
+     different Unicode normal forms.  It is up to the collation provider to
+     actually implement such insensitive comparisons; the deterministic flag
+     only determines whether ties are to be broken using bytewise comparison.
+     See also <ulink url="https://unicode.org/reports/tr10">Unicode Technical
+     Standard 10</ulink> for more information on the terminology.
+    </para>
+
+    <para>
+     To create a nondeterministic collation, specify the property
+     <literal>deterministic = false</literal> to <command>CREATE
+     COLLATION</command>, for example:
+<programlisting>
+CREATE COLLATION ndcoll (provider = icu, locale = 'und', deterministic = false);
+</programlisting>
+     This example would use the standard Unicode collation in a
+     nondeterministic way.  In particular, this would allow strings in
+     different normal forms to be compared correctly.  More interesting
+     examples make use of the ICU customization facilities explained above.
+     For example:
+<programlisting>
+CREATE COLLATION case_insensitive (provider = icu, locale = 'und-u-ks-level2', deterministic = false);
+CREATE COLLATION ignore_accents (provider = icu, locale = 'und-u-ks-level1-kc-true', deterministic = false);
+</programlisting>
+    </para>
+
+    <para>
+     All standard and predefined collations are deterministic, all
+     user-defined collations are deterministic by default.  While
+     nondeterministic collations give a more <quote>correct</quote> behavior,
+     especially when considering the full power of Unicode and its many
+     special cases, they also have some drawbacks.  Foremost, their use leads
+     to a performance penalty.  Also, certain operations are not possible with
+     nondeterministic collations, such as pattern matching operations.
+     Therefore, they should be used only in cases where they are specifically
+     wanted.
+    </para>
+   </sect3>
    </sect2>
   </sect1>
  
diff --git a/doc/src/sgml/citext.sgml b/doc/src/sgml/citext.sgml
index b1fe7101b20907cc315b3d1f8d352eaffae44df4..85aa339d8bafa742b2a6665937d498c4ab1d85df 100644 (file)
--- a/doc/src/sgml/citext.sgml
+++ b/doc/src/sgml/citext.sgml
@@ -14,6 +14,16 @@
    exactly like <type>text</type>.
   </para>
  
+ <tip>
+  <para>
+   Consider using <firstterm>nondeterministic collations</firstterm> (see
+   <xref linkend="collation-nondeterministic"/>) instead of this module.  They
+   can be used for case-insensitive comparisons, accent-insensitive
+   comparisons, and other combinations, and they handle more Unicode special
+   cases correctly.
+  </para>
+ </tip>
+
   <sect2>
    <title>Rationale</title>
  
@@ -246,6 +256,17 @@ SELECT * FROM users WHERE nick = 'Larry';
        will be invoked instead.
      </para>
      </listitem>
+
+    <listitem>
+     <para>
+      The approach of lower-casing strings for comparison does not handle some
+      Unicode special cases correctly, for example when one upper-case letter
+      has two lower-case letter equivalents.  Unicode distinguishes between
+      <firstterm>case mapping</firstterm> and <firstterm>case
+      folding</firstterm> for this reason.  Use nondeterministic collations
+      instead of <type>citext</type> to handle that correctly.
+     </para>
+    </listitem>
     </itemizedlist>
   </sect2>
  
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 3a99e209a2b0b5df1679e435cf44a4f4b7630771..1a014732919b6ef03c73784424d43b036f86c325 100644 (file)
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -4065,6 +4065,12 @@ cast(-44 as bit(12))           <lineannotation>111111010100</lineannotation>
      </para>
     </caution>
  
+   <para>
+    The pattern matching operators of all three kinds do not support
+    nondeterministic collations.  If required, apply a different collation to
+    the expression to work around this limitation.
+   </para>
+
    <sect2 id="functions-like">
     <title><function>LIKE</function></title>
  
diff --git a/doc/src/sgml/ref/create_collation.sgml b/doc/src/sgml/ref/create_collation.sgml
index 038797fce116520361337c18ec17a1623047f0a6..def4dda6e8899be946b6b88e044871304262f259 100644 (file)
--- a/doc/src/sgml/ref/create_collation.sgml
+++ b/doc/src/sgml/ref/create_collation.sgml
@@ -23,6 +23,7 @@ CREATE COLLATION [ IF NOT EXISTS ] <replaceable>name</replaceable> (
      [ LC_COLLATE = <replaceable>lc_collate</replaceable>, ]
      [ LC_CTYPE = <replaceable>lc_ctype</replaceable>, ]
      [ PROVIDER = <replaceable>provider</replaceable>, ]
+    [ DETERMINISTIC = <replaceable>boolean</replaceable>, ]
      [ VERSION = <replaceable>version</replaceable> ]
  )
  CREATE COLLATION [ IF NOT EXISTS ] <replaceable>name</replaceable> FROM <replaceable>existing_collation</replaceable>
@@ -124,6 +125,27 @@ CREATE COLLATION [ IF NOT EXISTS ] <replaceable>name</replaceable> FROM <replace
       </listitem>
      </varlistentry>
  
+    <varlistentry>
+     <term><literal>DETERMINISTIC</literal></term>
+
+     <listitem>
+      <para>
+       Specifies whether the collation should use deterministic comparisons.
+       The default is true.  A deterministic comparison considers strings that
+       are not byte-wise equal to be unequal even if they are considered
+       logically equal by the comparison.  PostgreSQL breaks ties using a
+       byte-wise comparison.  Comparison that is not deterministic can make the
+       collation be, say, case- or accent-insensitive.  For that, you need to
+       choose an appropriate <literal>LC_COLLATE</literal> setting
+       <emphasis>and</emphasis> set the collation to not deterministic here.
+      </para>
+
+      <para>
+       Nondeterministic collations are only supported with the ICU provider.
+      </para>
+     </listitem>
+    </varlistentry>
+
      <varlistentry>
       <term><replaceable>version</replaceable></term>
  
diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c
index e5f3d42e0454859fff2c31cb52a6d5cc8b591f54..0bf15ae7236c559363ed9805248e0cf8d4f1275b 100644 (file)
--- a/src/backend/access/hash/hashfunc.c
+++ b/src/backend/access/hash/hashfunc.c
@@ -27,8 +27,10 @@
  #include "postgres.h"
  
  #include "access/hash.h"
+#include "catalog/pg_collation.h"
  #include "utils/builtins.h"
  #include "utils/hashutils.h"
+#include "utils/pg_locale.h"
  
  /*
   * Datatype-specific hash functions.
@@ -243,15 +245,51 @@ Datum
  hashtext(PG_FUNCTION_ARGS)
  {
     text       *key = PG_GETARG_TEXT_PP(0);
+   Oid         collid = PG_GET_COLLATION();
+   pg_locale_t mylocale = 0;
     Datum       result;
  
-   /*
-    * Note: this is currently identical in behavior to hashvarlena, but keep
-    * it as a separate function in case we someday want to do something
-    * different in non-C locales.  (See also hashbpchar, if so.)
-    */
-   result = hash_any((unsigned char *) VARDATA_ANY(key),
-                     VARSIZE_ANY_EXHDR(key));
+   if (!collid)
+       ereport(ERROR,
+               (errcode(ERRCODE_INDETERMINATE_COLLATION),
+                errmsg("could not determine which collation to use for string hashing"),
+                errhint("Use the COLLATE clause to set the collation explicitly.")));
+
+   if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
+       mylocale = pg_newlocale_from_collation(collid);
+
+   if (!mylocale || mylocale->deterministic)
+   {
+       result = hash_any((unsigned char *) VARDATA_ANY(key),
+                         VARSIZE_ANY_EXHDR(key));
+   }
+   else
+   {
+#ifdef USE_ICU
+       if (mylocale->provider == COLLPROVIDER_ICU)
+       {
+           int32_t     ulen = -1;
+           UChar      *uchar = NULL;
+           Size        bsize;
+           uint8_t    *buf;
+
+           ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
+
+           bsize = ucol_getSortKey(mylocale->info.icu.ucol,
+                                   uchar, ulen, NULL, 0);
+           buf = palloc(bsize);
+           ucol_getSortKey(mylocale->info.icu.ucol,
+                           uchar, ulen, buf, bsize);
+
+           result = hash_any(buf, bsize);
+
+           pfree(buf);
+       }
+       else
+#endif
+           /* shouldn't happen */
+           elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
+   }
  
     /* Avoid ing memory for toasted inputs */
     PG_FREE_IF_COPY(key, 0);
@@ -263,12 +301,52 @@ Datum
  hashtextextended(PG_FUNCTION_ARGS)
  {
     text       *key = PG_GETARG_TEXT_PP(0);
+   Oid         collid = PG_GET_COLLATION();
+   pg_locale_t mylocale = 0;
     Datum       result;
  
-   /* Same approach as hashtext */
-   result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
-                              VARSIZE_ANY_EXHDR(key),
-                              PG_GETARG_INT64(1));
+   if (!collid)
+       ereport(ERROR,
+               (errcode(ERRCODE_INDETERMINATE_COLLATION),
+                errmsg("could not determine which collation to use for string hashing"),
+                errhint("Use the COLLATE clause to set the collation explicitly.")));
+
+   if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
+       mylocale = pg_newlocale_from_collation(collid);
+
+   if (!mylocale || mylocale->deterministic)
+   {
+       result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
+                                  VARSIZE_ANY_EXHDR(key),
+                                  PG_GETARG_INT64(1));
+   }
+   else
+   {
+#ifdef USE_ICU
+       if (mylocale->provider == COLLPROVIDER_ICU)
+       {
+           int32_t     ulen = -1;
+           UChar      *uchar = NULL;
+           Size        bsize;
+           uint8_t    *buf;
+
+           ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
+
+           bsize = ucol_getSortKey(mylocale->info.icu.ucol,
+                                   uchar, ulen, NULL, 0);
+           buf = palloc(bsize);
+           ucol_getSortKey(mylocale->info.icu.ucol,
+                           uchar, ulen, buf, bsize);
+
+           result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1));
+
+           pfree(buf);
+       }
+       else
+#endif
+           /* shouldn't happen */
+           elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
+   }
  
     PG_FREE_IF_COPY(key, 0);
  
diff --git a/src/backend/access/spgist/spgtextproc.c b/src/backend/access/spgist/spgtextproc.c
index 39cd391529c2194fda1c055aefbd9faad556b694..d22998c54bf4424d991bf0cfa2f5cea4deec5f68 100644 (file)
--- a/src/backend/access/spgist/spgtextproc.c
+++ b/src/backend/access/spgist/spgtextproc.c
@@ -630,7 +630,8 @@ spg_text_leaf_consistent(PG_FUNCTION_ARGS)
              * query (prefix) string, so we don't need to check it again.
              */
             res = (level >= queryLen) ||
-               DatumGetBool(DirectFunctionCall2(text_starts_with,
+               DatumGetBool(DirectFunctionCall2Coll(text_starts_with,
+                                                    PG_GET_COLLATION(),
                                                  out->leafValue,
                                                  PointerGetDatum(query)));
  
diff --git a/src/backend/catalog/pg_collation.c b/src/backend/catalog/pg_collation.c
index 74e1e82cb9cac03ebc06414e196b62a059f6fdec..dd99d53547f3563e71a123bdff50ce450e6f77a0 100644 (file)
--- a/src/backend/catalog/pg_collation.c
+++ b/src/backend/catalog/pg_collation.c
@@ -46,6 +46,7 @@ Oid
  CollationCreate(const char *collname, Oid collnamespace,
                 Oid collowner,
                 char collprovider,
+               bool collisdeterministic,
                 int32 collencoding,
                 const char *collcollate, const char *collctype,
                 const char *collversion,
@@ -160,6 +161,7 @@ CollationCreate(const char *collname, Oid collnamespace,
     values[Anum_pg_collation_collnamespace - 1] = ObjectIdGetDatum(collnamespace);
     values[Anum_pg_collation_collowner - 1] = ObjectIdGetDatum(collowner);
     values[Anum_pg_collation_collprovider - 1] = CharGetDatum(collprovider);
+   values[Anum_pg_collation_collisdeterministic - 1] = BoolGetDatum(collisdeterministic);
     values[Anum_pg_collation_collencoding - 1] = Int32GetDatum(collencoding);
     namestrcpy(&name_collate, collcollate);
     values[Anum_pg_collation_collcollate - 1] = NameGetDatum(&name_collate);
diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c
index ed3f1c12e57e1189f84abc6314bf2c3d62bf2402..919e092483ae7caf99a81549bc9a88140eb3666e 100644 (file)
--- a/src/backend/commands/collationcmds.c
+++ b/src/backend/commands/collationcmds.c
@@ -59,10 +59,12 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
     DefElem    *lccollateEl = NULL;
     DefElem    *lcctypeEl = NULL;
     DefElem    *providerEl = NULL;
+   DefElem    *deterministicEl = NULL;
     DefElem    *versionEl = NULL;
     char       *collcollate = NULL;
     char       *collctype = NULL;
     char       *collproviderstr = NULL;
+   bool        collisdeterministic = true;
     int         collencoding = 0;
     char        collprovider = 0;
     char       *collversion = NULL;
@@ -91,6 +93,8 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
             defelp = &lcctypeEl;
         else if (strcmp(defel->defname, "provider") == 0)
             defelp = &providerEl;
+       else if (strcmp(defel->defname, "deterministic") == 0)
+           defelp = &deterministicEl;
         else if (strcmp(defel->defname, "version") == 0)
             defelp = &versionEl;
         else
@@ -125,6 +129,7 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
         collcollate = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collcollate));
         collctype = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collctype));
         collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider;
+       collisdeterministic = ((Form_pg_collation) GETSTRUCT(tp))->collisdeterministic;
         collencoding = ((Form_pg_collation) GETSTRUCT(tp))->collencoding;
  
         ReleaseSysCache(tp);
@@ -157,6 +162,9 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
     if (providerEl)
         collproviderstr = defGetString(providerEl);
  
+   if (deterministicEl)
+       collisdeterministic = defGetBoolean(deterministicEl);
+
     if (versionEl)
         collversion = defGetString(versionEl);
  
@@ -185,6 +193,16 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
                 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
                  errmsg("parameter \"lc_ctype\" must be specified")));
  
+   /*
+    * Nondeterministic collations are currently only supported with ICU
+    * because that's the only case where it can actually make a difference.
+    * So we can save writing the code for the other providers.
+    */
+   if (!collisdeterministic && collprovider != COLLPROVIDER_ICU)
+       ereport(ERROR,
+               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                errmsg("nondeterministic collations not supported with this provider")));
+
     if (!fromEl)
     {
         if (collprovider == COLLPROVIDER_ICU)
@@ -203,6 +221,7 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
                              collNamespace,
                              GetUserId(),
                              collprovider,
+                            collisdeterministic,
                              collencoding,
                              collcollate,
                              collctype,
@@ -586,7 +605,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
              * about existing ones.
              */
             collid = CollationCreate(localebuf, nspid, GetUserId(),
-                                    COLLPROVIDER_LIBC, enc,
+                                    COLLPROVIDER_LIBC, true, enc,
                                      localebuf, localebuf,
                                      get_collation_actual_version(COLLPROVIDER_LIBC, localebuf),
                                      true, true);
@@ -647,7 +666,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
             int         enc = aliases[i].enc;
  
             collid = CollationCreate(alias, nspid, GetUserId(),
-                                    COLLPROVIDER_LIBC, enc,
+                                    COLLPROVIDER_LIBC, true, enc,
                                      locale, locale,
                                      get_collation_actual_version(COLLPROVIDER_LIBC, locale),
                                      true, true);
@@ -709,7 +728,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
  
             collid = CollationCreate(psprintf("%s-x-icu", langtag),
                                      nspid, GetUserId(),
-                                    COLLPROVIDER_ICU, -1,
+                                    COLLPROVIDER_ICU, true, -1,
                                      collcollate, collcollate,
                                      get_collation_actual_version(COLLPROVIDER_ICU, collcollate),
                                      true, true);
diff --git a/src/backend/commands/extension.c b/src/backend/commands/extension.c
index daf3f516362d323b0e1e13ed3d12843f85e1de10..d4723fced898785d3f371315b457e340bb70749e 100644 (file)
--- a/src/backend/commands/extension.c
+++ b/src/backend/commands/extension.c
@@ -901,7 +901,8 @@ execute_extension_script(Oid extensionOid, ExtensionControlFile *control,
         {
             const char *qSchemaName = quote_identifier(schemaName);
  
-           t_sql = DirectFunctionCall3(replace_text,
+           t_sql = DirectFunctionCall3Coll(replace_text,
+                                           C_COLLATION_OID,
                                         t_sql,
                                         CStringGetTextDatum("@extschema@"),
                                         CStringGetTextDatum(qSchemaName));
@@ -913,7 +914,8 @@ execute_extension_script(Oid extensionOid, ExtensionControlFile *control,
          */
         if (control->module_pathname)
         {
-           t_sql = DirectFunctionCall3(replace_text,
+           t_sql = DirectFunctionCall3Coll(replace_text,
+                                           C_COLLATION_OID,
                                         t_sql,
                                         CStringGetTextDatum("MODULE_PATHNAME"),
                                         CStringGetTextDatum(control->module_pathname));
diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c
index 7cbf9d3bc1c786d0ce10fc5e7c9f4486107ac5dc..0fb31f5c3d395be5416d601b15d7256af262212a 100644 (file)
--- a/src/backend/executor/execExpr.c
+++ b/src/backend/executor/execExpr.c
@@ -3317,6 +3317,7 @@ ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc,
                        int numCols,
                        const AttrNumber *keyColIdx,
                        const Oid *eqfunctions,
+                      const Oid *collations,
                        PlanState *parent)
  {
     ExprState  *state = makeNode(ExprState);
@@ -3377,6 +3378,7 @@ ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc,
         Form_pg_attribute latt = TupleDescAttr(ldesc, attno - 1);
         Form_pg_attribute ratt = TupleDescAttr(rdesc, attno - 1);
         Oid         foid = eqfunctions[natt];
+       Oid         collid = collations[natt];
         FmgrInfo   *finfo;
         FunctionCallInfo fcinfo;
         AclResult   aclresult;
@@ -3394,7 +3396,7 @@ ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc,
         fmgr_info(foid, finfo);
         fmgr_info_set_expr(NULL, finfo);
         InitFunctionCallInfoData(*fcinfo, finfo, 2,
-                                InvalidOid, NULL, NULL);
+                                collid, NULL, NULL);
  
         /* left arg */
         scratch.opcode = EEOP_INNER_VAR;
diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c
index 417e971ec88db254254b67e3b6511396d01de1cf..14ee8db3f98b7d58396dbe52b19bf60b6bb1b960 100644 (file)
--- a/src/backend/executor/execGrouping.c
+++ b/src/backend/executor/execGrouping.c
@@ -60,6 +60,7 @@ execTuplesMatchPrepare(TupleDesc desc,
                        int numCols,
                        const AttrNumber *keyColIdx,
                        const Oid *eqOperators,
+                      const Oid *collations,
                        PlanState *parent)
  {
     Oid        *eqFunctions = (Oid *) palloc(numCols * sizeof(Oid));
@@ -75,7 +76,7 @@ execTuplesMatchPrepare(TupleDesc desc,
  
     /* build actual expression */
     expr = ExecBuildGroupingEqual(desc, desc, NULL, NULL,
-                                 numCols, keyColIdx, eqFunctions,
+                                 numCols, keyColIdx, eqFunctions, collations,
                                   parent);
  
     return expr;
@@ -155,6 +156,7 @@ BuildTupleHashTableExt(PlanState *parent,
                        int numCols, AttrNumber *keyColIdx,
                        const Oid *eqfuncoids,
                        FmgrInfo *hashfunctions,
+                      Oid *collations,
                        long nbuckets, Size additionalsize,
                        MemoryContext metacxt,
                        MemoryContext tablecxt,
@@ -177,6 +179,7 @@ BuildTupleHashTableExt(PlanState *parent,
     hashtable->numCols = numCols;
     hashtable->keyColIdx = keyColIdx;
     hashtable->tab_hash_funcs = hashfunctions;
+   hashtable->tab_collations = collations;
     hashtable->tablecxt = tablecxt;
     hashtable->tempcxt = tempcxt;
     hashtable->entrysize = entrysize;
@@ -212,7 +215,7 @@ BuildTupleHashTableExt(PlanState *parent,
     hashtable->tab_eq_func = ExecBuildGroupingEqual(inputDesc, inputDesc,
                                                     &TTSOpsMinimalTuple, &TTSOpsMinimalTuple,
                                                     numCols,
-                                                   keyColIdx, eqfuncoids,
+                                                   keyColIdx, eqfuncoids, collations,
                                                     NULL);
  
     /*
@@ -240,6 +243,7 @@ BuildTupleHashTable(PlanState *parent,
                     int numCols, AttrNumber *keyColIdx,
                     const Oid *eqfuncoids,
                     FmgrInfo *hashfunctions,
+                   Oid *collations,
                     long nbuckets, Size additionalsize,
                     MemoryContext tablecxt,
                     MemoryContext tempcxt,
@@ -250,6 +254,7 @@ BuildTupleHashTable(PlanState *parent,
                                   numCols, keyColIdx,
                                   eqfuncoids,
                                   hashfunctions,
+                                 collations,
                                   nbuckets, additionalsize,
                                   tablecxt,
                                   tablecxt,
@@ -421,8 +426,9 @@ TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple)
         {
             uint32      hkey;
  
-           hkey = DatumGetUInt32(FunctionCall1(&hashfunctions[i],
-                                               attr));
+           hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i],
+                                                   hashtable->tab_collations[i],
+                                                   attr));
             hashkey ^= hkey;
         }
     }
diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c
index 37e96a6013bed2caa4f1cc65b31dec232db0791a..cfad8a38f0f0701c16543cecb72bbc22f8b30438 100644 (file)
--- a/src/backend/executor/execPartition.c
+++ b/src/backend/executor/execPartition.c
@@ -1246,6 +1246,7 @@ get_partition_for_tuple(PartitionDis pd, Datum *values, bool *isnull)
                 greatest_modulus = get_hash_partition_greatest_modulus(boundinfo);
                 rowHash = compute_partition_hash_value(key->partnatts,
                                                        key->partsupfunc,
+                                                      key->partcollation,
                                                        values, isnull);
  
                 part_index = boundinfo->indexes[rowHash % greatest_modulus];
diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
index 95dfc4987de753409cb12ead18d1a409209e6f2a..c539bb5a3f65e5cc65b5e0f1cd0bf15f4ebc391a 100644 (file)
--- a/src/backend/executor/execReplication.c
+++ b/src/backend/executor/execReplication.c
@@ -96,6 +96,8 @@ build_replindex_scan_key(ScanKey skey, Relation rel, Relation idxrel,
                     regop,
                     searchslot->tts_values[mainattno - 1]);
  
+       skey[attoff].sk_collation = idxrel->rd_indcollation[attoff];
+
         /* Check for null value. */
         if (searchslot->tts_isnull[mainattno - 1])
         {
@@ -262,7 +264,8 @@ tuples_equal(TupleTableSlot *slot1, TupleTableSlot *slot2)
                      errmsg("could not identify an equality operator for type %s",
                             format_type_be(att->atttypid))));
  
-       if (!DatumGetBool(FunctionCall2(&typentry->eq_opr_finfo,
+       if (!DatumGetBool(FunctionCall2Coll(&typentry->eq_opr_finfo,
+                                           att->attcollation,
                                         slot1->tts_values[attrnum],
                                         slot2->tts_values[attrnum])))
             return false;
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index bae7989a4227a3c3a13d2c4d809c987c1bfb9d3d..47161afbd42da1752cf32761c76be944ee52baec 100644 (file)
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -746,15 +746,14 @@ process_ordered_aggregate_single(AggState *aggstate,
  
         /*
          * If DISTINCT mode, and not distinct from prior, skip it.
-        *
-        * Note: we assume equality functions don't care about collation.
          */
         if (isDistinct &&
             haveOldVal &&
             ((oldIsNull && *isNull) ||
              (!oldIsNull && !*isNull &&
               oldAbbrevVal == newAbbrevVal &&
-             DatumGetBool(FunctionCall2(&pertrans->equalfnOne,
+             DatumGetBool(FunctionCall2Coll(&pertrans->equalfnOne,
+                                            pertrans->aggCollation,
                                          oldVal, *newVal)))))
         {
             /* equal to prior, so forget this one */
@@ -1287,6 +1286,7 @@ build_hash_table(AggState *aggstate)
                                                         perhash->hashGrpColIdxHash,
                                                         perhash->eqfuncoids,
                                                         perhash->hashfunctions,
+                                                       perhash->aggnode->grpCollations,
                                                         perhash->aggnode->numGroups,
                                                         additionalsize,
                                                         aggstate->ss.ps.state->es_query_cxt,
@@ -2381,6 +2381,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
                                                length,
                                                aggnode->grpColIdx,
                                                aggnode->grpOperators,
+                                              aggnode->grpCollations,
                                                (PlanState *) aggstate);
                 }
  
@@ -2392,6 +2393,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
                                                aggnode->numCols,
                                                aggnode->grpColIdx,
                                                aggnode->grpOperators,
+                                              aggnode->grpCollations,
                                                (PlanState *) aggstate);
                 }
             }
@@ -3155,6 +3157,7 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans,
                                        numDistinctCols,
                                        pertrans->sortColIdx,
                                        ops,
+                                      pertrans->sortCollations,
                                        &aggstate->ss.ps);
         pfree(ops);
     }
diff --git a/src/backend/executor/nodeGroup.c b/src/backend/executor/nodeGroup.c
index 655084d7b564858b1a447b79be6905acd9352bf6..05f1d33150f84c6eae53ac9f0a62b1a72089b779 100644 (file)
--- a/src/backend/executor/nodeGroup.c
+++ b/src/backend/executor/nodeGroup.c
@@ -212,6 +212,7 @@ ExecInitGroup(Group *node, EState *estate, int eflags)
                                node->numCols,
                                node->grpColIdx,
                                node->grpOperators,
+                              node->grpCollations,
                                &grpstate->ss.ps);
  
     return grpstate;
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index 856daf6a7f31ed0d24d3e3056bf559df19c27a39..64eec91f8b8f970f9a8d3f2950b6832341b58f8f 100644 (file)
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -425,7 +425,7 @@ ExecEndHash(HashState *node)
   * ----------------------------------------------------------------
   */
  HashJoinTable
-ExecHashTableCreate(HashState *state, List *hashOperators, bool keepNulls)
+ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations, bool keepNulls)
  {
     Hash       *node;
     HashJoinTable hashtable;
@@ -439,6 +439,7 @@ ExecHashTableCreate(HashState *state, List *hashOperators, bool keepNulls)
     int         nkeys;
     int         i;
     ListCell   *ho;
+   ListCell   *hc;
     MemoryContext oldcxt;
  
     /*
@@ -541,8 +542,9 @@ ExecHashTableCreate(HashState *state, List *hashOperators, bool keepNulls)
     hashtable->inner_hashfunctions =
         (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo));
     hashtable->hashStrict = (bool *) palloc(nkeys * sizeof(bool));
+   hashtable->collations = (Oid *) palloc(nkeys * sizeof(Oid));
     i = 0;
-   foreach(ho, hashOperators)
+   forboth(ho, hashOperators, hc, hashCollations)
     {
         Oid         hashop = lfirst_oid(ho);
         Oid         left_hashfn;
@@ -554,6 +556,7 @@ ExecHashTableCreate(HashState *state, List *hashOperators, bool keepNulls)
         fmgr_info(left_hashfn, &hashtable->outer_hashfunctions[i]);
         fmgr_info(right_hashfn, &hashtable->inner_hashfunctions[i]);
         hashtable->hashStrict[i] = op_strict(hashop);
+       hashtable->collations[i] = lfirst_oid(hc);
         i++;
     }
  
@@ -1847,7 +1850,7 @@ ExecHashGetHashValue(HashJoinTable hashtable,
             /* Compute the hash function */
             uint32      hkey;
  
-           hkey = DatumGetUInt32(FunctionCall1(&hashfunctions[i], keyval));
+           hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i], hashtable->collations[i], keyval));
             hashkey ^= hkey;
         }
  
@@ -2303,8 +2306,9 @@ ExecHashBuildSkewHash(HashJoinTable hashtable, Hash *node, int mcvsToUse)
             uint32      hashvalue;
             int         bucket;
  
-           hashvalue = DatumGetUInt32(FunctionCall1(&hashfunctions[0],
-                                                    sslot.values[i]));
+           hashvalue = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[0],
+                                                        hashtable->collations[0],
+                                                        sslot.values[i]));
  
             /*
              * While we have not hit a hole in the hashtable and have not hit
diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c
index 209870886400645312e74c33ae6dbecb5149a3bb..aa43296e26cfe30bee27aa621cae9b44b92fedcb 100644 (file)
--- a/src/backend/executor/nodeHashjoin.c
+++ b/src/backend/executor/nodeHashjoin.c
@@ -278,6 +278,7 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel)
                  */
                 hashtable = ExecHashTableCreate(hashNode,
                                                 node->hj_HashOperators,
+                                               node->hj_Collations,
                                                 HJ_FILL_INNER(node));
                 node->hj_HashTable = hashtable;
  
@@ -603,6 +604,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
     List       *rclauses;
     List       *rhclauses;
     List       *hoperators;
+   List       *hcollations;
     TupleDesc   outerDesc,
                 innerDesc;
     ListCell   *l;
@@ -738,6 +740,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
     rclauses = NIL;
     rhclauses = NIL;
     hoperators = NIL;
+   hcollations = NIL;
     foreach(l, node->hashclauses)
     {
         OpExpr     *hclause = lfirst_node(OpExpr, l);
@@ -749,10 +752,12 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
         rhclauses = lappend(rhclauses, ExecInitExpr(lsecond(hclause->args),
                                                    innerPlanState(hjstate)));
         hoperators = lappend_oid(hoperators, hclause->opno);
+       hcollations = lappend_oid(hcollations, hclause->inputcollid);
     }
     hjstate->hj_OuterHashKeys = lclauses;
     hjstate->hj_InnerHashKeys = rclauses;
     hjstate->hj_HashOperators = hoperators;
+   hjstate->hj_Collations = hcollations;
     /* child Hash node needs to evaluate inner hash keys, too */
     ((HashState *) innerPlanState(hjstate))->hashkeys = rhclauses;
  
diff --git a/src/backend/executor/nodeRecursiveunion.c b/src/backend/executor/nodeRecursiveunion.c
index 9b74ed3208c5f660132ca663b7f7a225db87f54d..9c5eed7def3971c537d57be575802438ee192cda 100644 (file)
--- a/src/backend/executor/nodeRecursiveunion.c
+++ b/src/backend/executor/nodeRecursiveunion.c
@@ -43,6 +43,7 @@ build_hash_table(RecursiveUnionState *rustate)
                                                 node->dupColIdx,
                                                 rustate->eqfuncoids,
                                                 rustate->hashfunctions,
+                                               node->dupCollations,
                                                 node->numGroups,
                                                 0,
                                                 rustate->ps.state->es_query_cxt,
diff --git a/src/backend/executor/nodeSetOp.c b/src/backend/executor/nodeSetOp.c
index 26aeaee08389b347aa67a76cd479b86c616e3d82..044246aa09ff34f109192569b922daf3b956094c 100644 (file)
--- a/src/backend/executor/nodeSetOp.c
+++ b/src/backend/executor/nodeSetOp.c
@@ -132,6 +132,7 @@ build_hash_table(SetOpState *setopstate)
                                                    node->dupColIdx,
                                                    setopstate->eqfuncoids,
                                                    setopstate->hashfunctions,
+                                                  node->dupCollations,
                                                    node->numGroups,
                                                    0,
                                                    setopstate->ps.state->es_query_cxt,
@@ -554,6 +555,7 @@ ExecInitSetOp(SetOp *node, EState *estate, int eflags)
                                    node->numCols,
                                    node->dupColIdx,
                                    node->dupOperators,
+                                  node->dupCollations,
                                    &setopstate->ps);
  
     if (node->strategy == SETOP_HASHED)
diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c
index d7d076758c323a571420077d1f381db4ba02f9a7..749b4eced34c4aea222603b1c0fa8ab51ce1f6f8 100644 (file)
--- a/src/backend/executor/nodeSubplan.c
+++ b/src/backend/executor/nodeSubplan.c
@@ -514,6 +514,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
                                                  node->keyColIdx,
                                                  node->tab_eq_funcoids,
                                                  node->tab_hash_funcs,
+                                                node->tab_collations,
                                                  nbuckets,
                                                  0,
                                                  node->planstate->state->es_query_cxt,
@@ -541,6 +542,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
                                                      node->keyColIdx,
                                                      node->tab_eq_funcoids,
                                                      node->tab_hash_funcs,
+                                                    node->tab_collations,
                                                      nbuckets,
                                                      0,
                                                      node->planstate->state->es_query_cxt,
@@ -642,6 +644,7 @@ execTuplesUnequal(TupleTableSlot *slot1,
                   int numCols,
                   AttrNumber *matchColIdx,
                   FmgrInfo *eqfunctions,
+                 const Oid *collations,
                   MemoryContext evalContext)
  {
     MemoryContext oldContext;
@@ -679,8 +682,8 @@ execTuplesUnequal(TupleTableSlot *slot1,
             continue;           /* can't prove anything here */
  
         /* Apply the type-specific equality function */
-
-       if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
+       if (!DatumGetBool(FunctionCall2Coll(&eqfunctions[i],
+                                           collations[i],
                                         attr1, attr2)))
         {
             result = true;      /* they are unequal */
@@ -722,6 +725,7 @@ findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot,
         if (!execTuplesUnequal(slot, hashtable->tableslot,
                                numCols, keyColIdx,
                                eqfunctions,
+                              hashtable->tab_collations,
                                hashtable->tempcxt))
         {
             TermTupleHashIterator(&hashiter);
@@ -817,6 +821,7 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent)
     sstate->tab_eq_funcoids = NULL;
     sstate->tab_hash_funcs = NULL;
     sstate->tab_eq_funcs = NULL;
+   sstate->tab_collations = NULL;
     sstate->lhs_hash_funcs = NULL;
     sstate->cur_eq_funcs = NULL;
  
@@ -915,6 +920,7 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent)
         sstate->tab_eq_funcoids = (Oid *) palloc(ncols * sizeof(Oid));
         sstate->tab_hash_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
         sstate->tab_eq_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
+       sstate->tab_collations = (Oid *) palloc(ncols * sizeof(Oid));
         sstate->lhs_hash_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
         sstate->cur_eq_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
         i = 1;
@@ -965,6 +971,9 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent)
             fmgr_info(left_hashfn, &sstate->lhs_hash_funcs[i - 1]);
             fmgr_info(right_hashfn, &sstate->tab_hash_funcs[i - 1]);
  
+           /* Set collation */
+           sstate->tab_collations[i - 1] = opexpr->inputcollid;
+
             i++;
         }
  
@@ -1001,6 +1010,7 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent)
                                                      ncols,
                                                      sstate->keyColIdx,
                                                      sstate->tab_eq_funcoids,
+                                                    sstate->tab_collations,
                                                      parent);
  
     }
diff --git a/src/backend/executor/nodeUnique.c b/src/backend/executor/nodeUnique.c
index ad7039937d0650370a1790929690ad1e736971a8..c553f150b8d25febdb8d73d362b3e0ad1223aa6f 100644 (file)
--- a/src/backend/executor/nodeUnique.c
+++ b/src/backend/executor/nodeUnique.c
@@ -152,6 +152,7 @@ ExecInitUnique(Unique *node, EState *estate, int eflags)
                                node->numCols,
                                node->uniqColIdx,
                                node->uniqOperators,
+                              node->uniqCollations,
                                &uniquestate->ps);
  
     return uniquestate;
diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c
index 157ac042b82a4ba648291ac8b4f677231ad24181..b090828c01ee59c5cf35d9e499fcc6af847b8766 100644 (file)
--- a/src/backend/executor/nodeWindowAgg.c
+++ b/src/backend/executor/nodeWindowAgg.c
@@ -2370,6 +2370,7 @@ ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags)
                                    node->partNumCols,
                                    node->partColIdx,
                                    node->partOperators,
+                                  node->partCollations,
                                    &winstate->ss.ps);
  
     if (node->ordNumCols > 0)
@@ -2378,6 +2379,7 @@ ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags)
                                    node->ordNumCols,
                                    node->ordColIdx,
                                    node->ordOperators,
+                                  node->ordCollations,
                                    &winstate->ss.ps);
  
     /*
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index c68bd7bcf73c2d5b1378ef49071255c31dd53e84..1ea6b845616f1bc7a8994a3afc58f0168c6c1150 100644 (file)
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -297,6 +297,7 @@ _copyRecursiveUnion(const RecursiveUnion *from)
     {
         COPY_POINTER_FIELD(dupColIdx, from->numCols * sizeof(AttrNumber));
         COPY_POINTER_FIELD(dupOperators, from->numCols * sizeof(Oid));
+       COPY_POINTER_FIELD(dupCollations, from->numCols * sizeof(Oid));
     }
     COPY_SCALAR_FIELD(numGroups);
  
@@ -956,6 +957,7 @@ _copyGroup(const Group *from)
     COPY_SCALAR_FIELD(numCols);
     COPY_POINTER_FIELD(grpColIdx, from->numCols * sizeof(AttrNumber));
     COPY_POINTER_FIELD(grpOperators, from->numCols * sizeof(Oid));
+   COPY_POINTER_FIELD(grpCollations, from->numCols * sizeof(Oid));
  
     return newnode;
  }
@@ -977,6 +979,7 @@ _copyAgg(const Agg *from)
     {
         COPY_POINTER_FIELD(grpColIdx, from->numCols * sizeof(AttrNumber));
         COPY_POINTER_FIELD(grpOperators, from->numCols * sizeof(Oid));
+       COPY_POINTER_FIELD(grpCollations, from->numCols * sizeof(Oid));
     }
     COPY_SCALAR_FIELD(numGroups);
     COPY_BITMAPSET_FIELD(aggParams);
@@ -1002,12 +1005,14 @@ _copyWindowAgg(const WindowAgg *from)
     {
         COPY_POINTER_FIELD(partColIdx, from->partNumCols * sizeof(AttrNumber));
         COPY_POINTER_FIELD(partOperators, from->partNumCols * sizeof(Oid));
+       COPY_POINTER_FIELD(partCollations, from->partNumCols * sizeof(Oid));
     }
     COPY_SCALAR_FIELD(ordNumCols);
     if (from->ordNumCols > 0)
     {
         COPY_POINTER_FIELD(ordColIdx, from->ordNumCols * sizeof(AttrNumber));
         COPY_POINTER_FIELD(ordOperators, from->ordNumCols * sizeof(Oid));
+       COPY_POINTER_FIELD(ordCollations, from->ordNumCols * sizeof(Oid));
     }
     COPY_SCALAR_FIELD(frameOptions);
     COPY_NODE_FIELD(startOffset);
@@ -1040,6 +1045,7 @@ _copyUnique(const Unique *from)
     COPY_SCALAR_FIELD(numCols);
     COPY_POINTER_FIELD(uniqColIdx, from->numCols * sizeof(AttrNumber));
     COPY_POINTER_FIELD(uniqOperators, from->numCols * sizeof(Oid));
+   COPY_POINTER_FIELD(uniqCollations, from->numCols * sizeof(Oid));
  
     return newnode;
  }
@@ -1089,6 +1095,7 @@ _copySetOp(const SetOp *from)
     COPY_SCALAR_FIELD(numCols);
     COPY_POINTER_FIELD(dupColIdx, from->numCols * sizeof(AttrNumber));
     COPY_POINTER_FIELD(dupOperators, from->numCols * sizeof(Oid));
+   COPY_POINTER_FIELD(dupCollations, from->numCols * sizeof(Oid));
     COPY_SCALAR_FIELD(flagColIdx);
     COPY_SCALAR_FIELD(firstFlag);
     COPY_SCALAR_FIELD(numGroups);
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 69179a07c375b252d4a35c854174521177ed1776..910a738c205de69d869d122c25e642af20947b30 100644 (file)
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -463,6 +463,7 @@ _outRecursiveUnion(StringInfo str, const RecursiveUnion *node)
     WRITE_INT_FIELD(numCols);
     WRITE_ATTRNUMBER_ARRAY(dupColIdx, node->numCols);
     WRITE_OID_ARRAY(dupOperators, node->numCols);
+   WRITE_OID_ARRAY(dupCollations, node->numCols);
     WRITE_LONG_FIELD(numGroups);
  }
  
@@ -774,6 +775,7 @@ _outAgg(StringInfo str, const Agg *node)
     WRITE_INT_FIELD(numCols);
     WRITE_ATTRNUMBER_ARRAY(grpColIdx, node->numCols);
     WRITE_OID_ARRAY(grpOperators, node->numCols);
+   WRITE_OID_ARRAY(grpCollations, node->numCols);
     WRITE_LONG_FIELD(numGroups);
     WRITE_BITMAPSET_FIELD(aggParams);
     WRITE_NODE_FIELD(groupingSets);
@@ -791,9 +793,11 @@ _outWindowAgg(StringInfo str, const WindowAgg *node)
     WRITE_INT_FIELD(partNumCols);
     WRITE_ATTRNUMBER_ARRAY(partColIdx, node->partNumCols);
     WRITE_OID_ARRAY(partOperators, node->partNumCols);
+   WRITE_OID_ARRAY(partCollations, node->partNumCols);
     WRITE_INT_FIELD(ordNumCols);
     WRITE_ATTRNUMBER_ARRAY(ordColIdx, node->ordNumCols);
     WRITE_OID_ARRAY(ordOperators, node->ordNumCols);
+   WRITE_OID_ARRAY(ordCollations, node->ordNumCols);
     WRITE_INT_FIELD(frameOptions);
     WRITE_NODE_FIELD(startOffset);
     WRITE_NODE_FIELD(endOffset);
@@ -814,6 +818,7 @@ _outGroup(StringInfo str, const Group *node)
     WRITE_INT_FIELD(numCols);
     WRITE_ATTRNUMBER_ARRAY(grpColIdx, node->numCols);
     WRITE_OID_ARRAY(grpOperators, node->numCols);
+   WRITE_OID_ARRAY(grpCollations, node->numCols);
  }
  
  static void
@@ -848,6 +853,7 @@ _outUnique(StringInfo str, const Unique *node)
     WRITE_INT_FIELD(numCols);
     WRITE_ATTRNUMBER_ARRAY(uniqColIdx, node->numCols);
     WRITE_OID_ARRAY(uniqOperators, node->numCols);
+   WRITE_OID_ARRAY(uniqCollations, node->numCols);
  }
  
  static void
@@ -875,6 +881,7 @@ _outSetOp(StringInfo str, const SetOp *node)
     WRITE_INT_FIELD(numCols);
     WRITE_ATTRNUMBER_ARRAY(dupColIdx, node->numCols);
     WRITE_OID_ARRAY(dupOperators, node->numCols);
+   WRITE_OID_ARRAY(dupCollations, node->numCols);
     WRITE_INT_FIELD(flagColIdx);
     WRITE_INT_FIELD(firstFlag);
     WRITE_LONG_FIELD(numGroups);
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index 4b845b1bb71cb0ed833dfcef7639b8a76ac20b75..eff98febf1fa80748acf50e1b8817b1ddbaee9ab 100644 (file)
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -1677,6 +1677,7 @@ _readRecursiveUnion(void)
     READ_INT_FIELD(numCols);
     READ_ATTRNUMBER_ARRAY(dupColIdx, local_node->numCols);
     READ_OID_ARRAY(dupOperators, local_node->numCols);
+   READ_OID_ARRAY(dupCollations, local_node->numCols);
     READ_LONG_FIELD(numGroups);
  
     READ_DONE();
@@ -2143,6 +2144,7 @@ _readGroup(void)
     READ_INT_FIELD(numCols);
     READ_ATTRNUMBER_ARRAY(grpColIdx, local_node->numCols);
     READ_OID_ARRAY(grpOperators, local_node->numCols);
+   READ_OID_ARRAY(grpCollations, local_node->numCols);
  
     READ_DONE();
  }
@@ -2162,6 +2164,7 @@ _readAgg(void)
     READ_INT_FIELD(numCols);
     READ_ATTRNUMBER_ARRAY(grpColIdx, local_node->numCols);
     READ_OID_ARRAY(grpOperators, local_node->numCols);
+   READ_OID_ARRAY(grpCollations, local_node->numCols);
     READ_LONG_FIELD(numGroups);
     READ_BITMAPSET_FIELD(aggParams);
     READ_NODE_FIELD(groupingSets);
@@ -2184,9 +2187,11 @@ _readWindowAgg(void)
     READ_INT_FIELD(partNumCols);
     READ_ATTRNUMBER_ARRAY(partColIdx, local_node->partNumCols);
     READ_OID_ARRAY(partOperators, local_node->partNumCols);
+   READ_OID_ARRAY(partCollations, local_node->partNumCols);
     READ_INT_FIELD(ordNumCols);
     READ_ATTRNUMBER_ARRAY(ordColIdx, local_node->ordNumCols);
     READ_OID_ARRAY(ordOperators, local_node->ordNumCols);
+   READ_OID_ARRAY(ordCollations, local_node->ordNumCols);
     READ_INT_FIELD(frameOptions);
     READ_NODE_FIELD(startOffset);
     READ_NODE_FIELD(endOffset);
@@ -2212,6 +2217,7 @@ _readUnique(void)
     READ_INT_FIELD(numCols);
     READ_ATTRNUMBER_ARRAY(uniqColIdx, local_node->numCols);
     READ_OID_ARRAY(uniqOperators, local_node->numCols);
+   READ_OID_ARRAY(uniqCollations, local_node->numCols);
  
     READ_DONE();
  }
@@ -2290,6 +2296,7 @@ _readSetOp(void)
     READ_INT_FIELD(numCols);
     READ_ATTRNUMBER_ARRAY(dupColIdx, local_node->numCols);
     READ_OID_ARRAY(dupOperators, local_node->numCols);
+   READ_OID_ARRAY(dupCollations, local_node->numCols);
     READ_INT_FIELD(flagColIdx);
     READ_INT_FIELD(firstFlag);
     READ_LONG_FIELD(numGroups);
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 9fbe5b2a5fbedc2acd5f62ca1263cdc250d91ea7..93c56c657ce2e10aefd81ebbe4fdab453c3a7002 100644 (file)
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -260,14 +260,14 @@ static Sort *make_sort_from_groupcols(List *groupcls,
                          Plan *lefttree);
  static Material *make_material(Plan *lefttree);
  static WindowAgg *make_windowagg(List *tlist, Index winref,
-              int partNumCols, AttrNumber *partColIdx, Oid *partOperators,
-              int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators,
+              int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations,
+              int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations,
                int frameOptions, Node *startOffset, Node *endOffset,
                Oid startInRangeFunc, Oid endInRangeFunc,
                Oid inRangeColl, bool inRangeAsc, bool inRangeNullsFirst,
                Plan *lefttree);
  static Group *make_group(List *tlist, List *qual, int numGroupCols,
-          AttrNumber *grpColIdx, Oid *grpOperators,
+          AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations,
            Plan *lefttree);
  static Unique *make_unique_from_sortclauses(Plan *lefttree, List *distinctList);
  static Unique *make_unique_from_pathkeys(Plan *lefttree,
@@ -1387,6 +1387,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags)
     bool        newitems;
     int         numGroupCols;
     AttrNumber *groupColIdx;
+   Oid        *groupCollations;
     int         groupColPos;
     ListCell   *l;
  
@@ -1453,6 +1454,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags)
     newtlist = subplan->targetlist;
     numGroupCols = list_length(uniq_exprs);
     groupColIdx = (AttrNumber *) palloc(numGroupCols * sizeof(AttrNumber));
+   groupCollations = (Oid *) palloc(numGroupCols * sizeof(Oid));
  
     groupColPos = 0;
     foreach(l, uniq_exprs)
@@ -1463,7 +1465,9 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags)
         tle = tlist_member(uniqexpr, newtlist);
         if (!tle)               /* shouldn't happen */
             elog(ERROR, "failed to find unique expression in subplan tlist");
-       groupColIdx[groupColPos++] = tle->resno;
+       groupColIdx[groupColPos] = tle->resno;
+       groupCollations[groupColPos] = exprCollation((Node *) tle->expr);
+       groupColPos++;
     }
  
     if (best_path->umethod == UNIQUE_PATH_HASH)
@@ -1501,6 +1505,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags)
                                  numGroupCols,
                                  groupColIdx,
                                  groupOperators,
+                                groupCollations,
                                  NIL,
                                  NIL,
                                  best_path->path.rows,
@@ -1883,6 +1888,8 @@ create_group_plan(PlannerInfo *root, GroupPath *best_path)
                       extract_grouping_cols(best_path->groupClause,
                                             subplan->targetlist),
                       extract_grouping_ops(best_path->groupClause),
+                     extract_grouping_collations(best_path->groupClause,
+                                                 subplan->targetlist),
                       subplan);
  
     copy_generic_path_info(&plan->plan, (Path *) best_path);
@@ -1949,6 +1956,8 @@ create_agg_plan(PlannerInfo *root, AggPath *best_path)
                     extract_grouping_cols(best_path->groupClause,
                                           subplan->targetlist),
                     extract_grouping_ops(best_path->groupClause),
+                   extract_grouping_collations(best_path->groupClause,
+                                               subplan->targetlist),
                     NIL,
                     NIL,
                     best_path->numGroups,
@@ -2110,6 +2119,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
                                          list_length((List *) linitial(rollup->gsets)),
                                          new_grpColIdx,
                                          extract_grouping_ops(rollup->groupClause),
+                                        extract_grouping_collations(rollup->groupClause, subplan->targetlist),
                                          rollup->gsets,
                                          NIL,
                                          rollup->numGroups,
@@ -2147,6 +2157,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
                         numGroupCols,
                         top_grpColIdx,
                         extract_grouping_ops(rollup->groupClause),
+                       extract_grouping_collations(rollup->groupClause, subplan->targetlist),
                         rollup->gsets,
                         chain,
                         rollup->numGroups,
@@ -2246,9 +2257,11 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path)
     int         partNumCols;
     AttrNumber *partColIdx;
     Oid        *partOperators;
+   Oid        *partCollations;
     int         ordNumCols;
     AttrNumber *ordColIdx;
     Oid        *ordOperators;
+   Oid        *ordCollations;
     ListCell   *lc;
  
     /*
@@ -2270,6 +2283,7 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path)
      */
     partColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numPart);
     partOperators = (Oid *) palloc(sizeof(Oid) * numPart);
+   partCollations = (Oid *) palloc(sizeof(Oid) * numPart);
  
     partNumCols = 0;
     foreach(lc, wc->partitionClause)
@@ -2280,11 +2294,13 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path)
         Assert(OidIsValid(sgc->eqop));
         partColIdx[partNumCols] = tle->resno;
         partOperators[partNumCols] = sgc->eqop;
+       partCollations[partNumCols] = exprCollation((Node *) tle->expr);
         partNumCols++;
     }
  
     ordColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numOrder);
     ordOperators = (Oid *) palloc(sizeof(Oid) * numOrder);
+   ordCollations = (Oid *) palloc(sizeof(Oid) * numOrder);
  
     ordNumCols = 0;
     foreach(lc, wc->orderClause)
@@ -2295,6 +2311,7 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path)
         Assert(OidIsValid(sgc->eqop));
         ordColIdx[ordNumCols] = tle->resno;
         ordOperators[ordNumCols] = sgc->eqop;
+       ordCollations[ordNumCols] = exprCollation((Node *) tle->expr);
         ordNumCols++;
     }
  
@@ -2304,9 +2321,11 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path)
                           partNumCols,
                           partColIdx,
                           partOperators,
+                         partCollations,
                           ordNumCols,
                           ordColIdx,
                           ordOperators,
+                         ordCollations,
                           wc->frameOptions,
                           wc->startOffset,
                           wc->endOffset,
@@ -5326,10 +5345,12 @@ make_recursive_union(List *tlist,
         int         keyno = 0;
         AttrNumber *dupColIdx;
         Oid        *dupOperators;
+       Oid        *dupCollations;
         ListCell   *slitem;
  
         dupColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
         dupOperators = (Oid *) palloc(sizeof(Oid) * numCols);
+       dupCollations = (Oid *) palloc(sizeof(Oid) * numCols);
  
         foreach(slitem, distinctList)
         {
@@ -5339,11 +5360,13 @@ make_recursive_union(List *tlist,
  
             dupColIdx[keyno] = tle->resno;
             dupOperators[keyno] = sortcl->eqop;
+           dupCollations[keyno] = exprCollation((Node *) tle->expr);
             Assert(OidIsValid(dupOperators[keyno]));
             keyno++;
         }
         node->dupColIdx = dupColIdx;
         node->dupOperators = dupOperators;
+       node->dupCollations = dupCollations;
     }
     node->numGroups = numGroups;
  
@@ -6015,7 +6038,7 @@ materialize_finished_plan(Plan *subplan)
  Agg *
  make_agg(List *tlist, List *qual,
          AggStrategy aggstrategy, AggSplit aggsplit,
-        int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators,
+        int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations,
          List *groupingSets, List *chain,
          double dNumGroups, Plan *lefttree)
  {
@@ -6031,6 +6054,7 @@ make_agg(List *tlist, List *qual,
     node->numCols = numGroupCols;
     node->grpColIdx = grpColIdx;
     node->grpOperators = grpOperators;
+   node->grpCollations = grpCollations;
     node->numGroups = numGroups;
     node->aggParams = NULL;     /* SS_finalize_plan() will fill this */
     node->groupingSets = groupingSets;
@@ -6046,8 +6070,8 @@ make_agg(List *tlist, List *qual,
  
  static WindowAgg *
  make_windowagg(List *tlist, Index winref,
-              int partNumCols, AttrNumber *partColIdx, Oid *partOperators,
-              int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators,
+              int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations,
+              int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations,
                int frameOptions, Node *startOffset, Node *endOffset,
                Oid startInRangeFunc, Oid endInRangeFunc,
                Oid inRangeColl, bool inRangeAsc, bool inRangeNullsFirst,
@@ -6060,9 +6084,11 @@ make_windowagg(List *tlist, Index winref,
     node->partNumCols = partNumCols;
     node->partColIdx = partColIdx;
     node->partOperators = partOperators;
+   node->partCollations = partCollations;
     node->ordNumCols = ordNumCols;
     node->ordColIdx = ordColIdx;
     node->ordOperators = ordOperators;
+   node->ordCollations = ordCollations;
     node->frameOptions = frameOptions;
     node->startOffset = startOffset;
     node->endOffset = endOffset;
@@ -6087,6 +6113,7 @@ make_group(List *tlist,
            int numGroupCols,
            AttrNumber *grpColIdx,
            Oid *grpOperators,
+          Oid *grpCollations,
            Plan *lefttree)
  {
     Group      *node = makeNode(Group);
@@ -6095,6 +6122,7 @@ make_group(List *tlist,
     node->numCols = numGroupCols;
     node->grpColIdx = grpColIdx;
     node->grpOperators = grpOperators;
+   node->grpCollations = grpCollations;
  
     plan->qual = qual;
     plan->targetlist = tlist;
@@ -6118,6 +6146,7 @@ make_unique_from_sortclauses(Plan *lefttree, List *distinctList)
     int         keyno = 0;
     AttrNumber *uniqColIdx;
     Oid        *uniqOperators;
+   Oid        *uniqCollations;
     ListCell   *slitem;
  
     plan->targetlist = lefttree->targetlist;
@@ -6132,6 +6161,7 @@ make_unique_from_sortclauses(Plan *lefttree, List *distinctList)
     Assert(numCols > 0);
     uniqColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
     uniqOperators = (Oid *) palloc(sizeof(Oid) * numCols);
+   uniqCollations = (Oid *) palloc(sizeof(Oid) * numCols);
  
     foreach(slitem, distinctList)
     {
@@ -6140,6 +6170,7 @@ make_unique_from_sortclauses(Plan *lefttree, List *distinctList)
  
         uniqColIdx[keyno] = tle->resno;
         uniqOperators[keyno] = sortcl->eqop;
+       uniqCollations[keyno] = exprCollation((Node *) tle->expr);
         Assert(OidIsValid(uniqOperators[keyno]));
         keyno++;
     }
@@ -6147,6 +6178,7 @@ make_unique_from_sortclauses(Plan *lefttree, List *distinctList)
     node->numCols = numCols;
     node->uniqColIdx = uniqColIdx;
     node->uniqOperators = uniqOperators;
+   node->uniqCollations = uniqCollations;
  
     return node;
  }
@@ -6162,6 +6194,7 @@ make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols)
     int         keyno = 0;
     AttrNumber *uniqColIdx;
     Oid        *uniqOperators;
+   Oid        *uniqCollations;
     ListCell   *lc;
  
     plan->targetlist = lefttree->targetlist;
@@ -6177,6 +6210,7 @@ make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols)
     Assert(numCols >= 0 && numCols <= list_length(pathkeys));
     uniqColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
     uniqOperators = (Oid *) palloc(sizeof(Oid) * numCols);
+   uniqCollations = (Oid *) palloc(sizeof(Oid) * numCols);
  
     foreach(lc, pathkeys)
     {
@@ -6245,6 +6279,7 @@ make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols)
  
         uniqColIdx[keyno] = tle->resno;
         uniqOperators[keyno] = eqop;
+       uniqCollations[keyno] = ec->ec_collation;
  
         keyno++;
     }
@@ -6252,6 +6287,7 @@ make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols)
     node->numCols = numCols;
     node->uniqColIdx = uniqColIdx;
     node->uniqOperators = uniqOperators;
+   node->uniqCollations = uniqCollations;
  
     return node;
  }
@@ -6296,6 +6332,7 @@ make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree,
     int         keyno = 0;
     AttrNumber *dupColIdx;
     Oid        *dupOperators;
+   Oid        *dupCollations;
     ListCell   *slitem;
  
     plan->targetlist = lefttree->targetlist;
@@ -6309,6 +6346,7 @@ make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree,
      */
     dupColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
     dupOperators = (Oid *) palloc(sizeof(Oid) * numCols);
+   dupCollations = (Oid *) palloc(sizeof(Oid) * numCols);
  
     foreach(slitem, distinctList)
     {
@@ -6317,6 +6355,7 @@ make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree,
  
         dupColIdx[keyno] = tle->resno;
         dupOperators[keyno] = sortcl->eqop;
+       dupCollations[keyno] = exprCollation((Node *) tle->expr);
         Assert(OidIsValid(dupOperators[keyno]));
         keyno++;
     }
@@ -6326,6 +6365,7 @@ make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree,
     node->numCols = numCols;
     node->dupColIdx = dupColIdx;
     node->dupOperators = dupOperators;
+   node->dupCollations = dupCollations;
     node->flagColIdx = flagColIdx;
     node->firstFlag = firstFlag;
     node->numGroups = numGroups;
diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c
index 14d1c67a940c837fdb1b0d01f0098884b39959c4..bb3b7969f26e4a1f7627f14a1622e293575f2376 100644 (file)
--- a/src/backend/optimizer/util/tlist.c
+++ b/src/backend/optimizer/util/tlist.c
@@ -503,6 +503,31 @@ extract_grouping_ops(List *groupClause)
     return groupOperators;
  }
  
+/*
+ * extract_grouping_collations - make an array of the grouping column collations
+ *     for a SortGroupClause list
+ */
+Oid *
+extract_grouping_collations(List *groupClause, List *tlist)
+{
+   int         numCols = list_length(groupClause);
+   int         colno = 0;
+   Oid        *grpCollations;
+   ListCell   *glitem;
+
+   grpCollations = (Oid *) palloc(sizeof(Oid) * numCols);
+
+   foreach(glitem, groupClause)
+   {
+       SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
+       TargetEntry *tle = get_sortgroupclause_tle(groupcl, tlist);
+
+       grpCollations[colno++] = exprCollation((Node *) tle->expr);
+   }
+
+   return grpCollations;
+}
+
  /*
   * extract_grouping_cols - make an array of the grouping column resnos
   *     for a SortGroupClause list
diff --git a/src/backend/partitioning/partbounds.c b/src/backend/partitioning/partbounds.c
index 5b897d50eed7ca254916bf7a7cb389d84968b36b..803c23aaf50eda1a6b1f77b59f3b0a68c6a27325 100644 (file)
--- a/src/backend/partitioning/partbounds.c
+++ b/src/backend/partitioning/partbounds.c
@@ -2657,7 +2657,7 @@ get_range_nulltest(PartitionKey key)
   * Compute the hash value for given partition key values.
   */
  uint64
-compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc,
+compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc, Oid *partcollation,
                              Datum *values, bool *isnull)
  {
     int         i;
@@ -2678,7 +2678,7 @@ compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc,
              * datatype-specific hash functions of each partition key
              * attribute.
              */
-           hash = FunctionCall2(&partsupfunc[i], values[i], seed);
+           hash = FunctionCall2Coll(&partsupfunc[i], partcollation[i], values[i], seed);
  
             /* Form a single 64-bit hash value */
             rowHash = hash_combine64(rowHash, DatumGetUInt64(hash));
diff --git a/src/backend/partitioning/partprune.c b/src/backend/partitioning/partprune.c
index b5c0889935b26b449ec65f7f3df8a8d52b78fe3d..31e0164ea99af4c33726ef9d1cddbad0bb86e326 100644 (file)
--- a/src/backend/partitioning/partprune.c
+++ b/src/backend/partitioning/partprune.c
@@ -2159,6 +2159,7 @@ get_matching_hash_bounds(PartitionPruneContext *context,
     int         i;
     uint64      rowHash;
     int         greatest_modulus;
+   Oid        *partcollation = context->partcollation;
  
     Assert(context->strategy == PARTITION_STRATEGY_HASH);
  
@@ -2179,7 +2180,7 @@ get_matching_hash_bounds(PartitionPruneContext *context,
             isnull[i] = bms_is_member(i, nullkeys);
  
         greatest_modulus = get_hash_partition_greatest_modulus(boundinfo);
-       rowHash = compute_partition_hash_value(partnatts, partsupfunc,
+       rowHash = compute_partition_hash_value(partnatts, partsupfunc, partcollation,
                                                values, isnull);
  
         if (partindices[rowHash % greatest_modulus] >= 0)
diff --git a/src/backend/regex/regc_pg_locale.c b/src/backend/regex/regc_pg_locale.c
index a8c0b156fa42fb0a1b4471a93951f8cd1a1e28e4..4a808b7606cf5da4cc49a80e87c2bdc57aaf71a9 100644 (file)
--- a/src/backend/regex/regc_pg_locale.c
+++ b/src/backend/regex/regc_pg_locale.c
@@ -263,6 +263,11 @@ pg_set_regex_collation(Oid collation)
                      errhint("Use the COLLATE clause to set the collation explicitly.")));
         }
  
+       if (pg_regex_locale && !pg_regex_locale->deterministic)
+           ereport(ERROR,
+                   (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                    errmsg("nondeterministic collations are not supported for regular expressions")));
+
  #ifdef USE_ICU
         if (pg_regex_locale && pg_regex_locale->provider == COLLPROVIDER_ICU)
             pg_regex_strategy = PG_REGEX_LOCALE_ICU;
diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index a34605ac94adc56169f838d64ecef8ebd05cea24..9cef018c0b4e139da4fb919cef5c8ea78b870985 100644 (file)
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -3957,7 +3957,7 @@ hash_array(PG_FUNCTION_ARGS)
      * apply the hash function to each array element.
      */
     InitFunctionCallInfoData(*locfcinfo, &typentry->hash_proc_finfo, 1,
-                            InvalidOid, NULL, NULL);
+                            PG_GET_COLLATION(), NULL, NULL);
  
     /* Loop over source data */
     nitems = ArrayGetNItems(ndims, dims);
diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c
index 853c9c01e925620e14f3a85f240539ffafe076f9..704e5720cf5b6153085a872b91751748f36b8c27 100644 (file)
--- a/src/backend/utils/adt/like.c
+++ b/src/backend/utils/adt/like.c
@@ -45,7 +45,7 @@ static int UTF8_MatchText(const char *t, int tlen, const char *p, int plen,
  static int SB_IMatchText(const char *t, int tlen, const char *p, int plen,
               pg_locale_t locale, bool locale_is_c);
  
-static int GenericMatchText(const char *s, int slen, const char *p, int plen);
+static int GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation);
  static int Generic_Text_IC_like(text *str, text *pat, Oid collation);
  
  /*--------------------
@@ -148,8 +148,18 @@ SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
  
  /* Generic for all cases not requiring inline case-folding */
  static inline int
-GenericMatchText(const char *s, int slen, const char *p, int plen)
+GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation)
  {
+   if (collation && !lc_ctype_is_c(collation) && collation != DEFAULT_COLLATION_OID)
+   {
+       pg_locale_t     locale = pg_newlocale_from_collation(collation);
+
+       if (locale && !locale->deterministic)
+           ereport(ERROR,
+                   (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                    errmsg("nondeterministic collations are not supported for LIKE")));
+   }
+
     if (pg_database_encoding_max_length() == 1)
         return SB_MatchText(s, slen, p, plen, 0, true);
     else if (GetDatabaseEncoding() == PG_UTF8)
@@ -184,6 +194,11 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
                      errhint("Use the COLLATE clause to set the collation explicitly.")));
         }
         locale = pg_newlocale_from_collation(collation);
+
+       if (locale && !locale->deterministic)
+           ereport(ERROR,
+                   (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                    errmsg("nondeterministic collations are not supported for ILIKE")));
     }
  
     /*
@@ -240,7 +255,7 @@ namelike(PG_FUNCTION_ARGS)
     p = VARDATA_ANY(pat);
     plen = VARSIZE_ANY_EXHDR(pat);
  
-   result = (GenericMatchText(s, slen, p, plen) == LIKE_TRUE);
+   result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE);
  
     PG_RETURN_BOOL(result);
  }
@@ -261,7 +276,7 @@ namenlike(PG_FUNCTION_ARGS)
     p = VARDATA_ANY(pat);
     plen = VARSIZE_ANY_EXHDR(pat);
  
-   result = (GenericMatchText(s, slen, p, plen) != LIKE_TRUE);
+   result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE);
  
     PG_RETURN_BOOL(result);
  }
@@ -282,7 +297,7 @@ textlike(PG_FUNCTION_ARGS)
     p = VARDATA_ANY(pat);
     plen = VARSIZE_ANY_EXHDR(pat);
  
-   result = (GenericMatchText(s, slen, p, plen) == LIKE_TRUE);
+   result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE);
  
     PG_RETURN_BOOL(result);
  }
@@ -303,7 +318,7 @@ textnlike(PG_FUNCTION_ARGS)
     p = VARDATA_ANY(pat);
     plen = VARSIZE_ANY_EXHDR(pat);
  
-   result = (GenericMatchText(s, slen, p, plen) != LIKE_TRUE);
+   result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE);
  
     PG_RETURN_BOOL(result);
  }
diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c
index 69509811ef78a746de793d96a1525215debfba17..a65e63736c45db897ab6c80daa64639e51289df4 100644 (file)
--- a/src/backend/utils/adt/like_support.c
+++ b/src/backend/utils/adt/like_support.c
@@ -257,6 +257,20 @@ match_pattern_prefix(Node *leftop,
         return NIL;
     patt = (Const *) rightop;
  
+   /*
+    * Not supported if the expression collation is nondeterministic.  The
+    * optimized equality or prefix tests use bytewise comparisons, which is
+    * not consistent with nondeterministic collations.  The actual
+    * pattern-matching implementation functions will later error out that
+    * pattern-matching is not supported with nondeterministic collations.
+    * (We could also error out here, but by doing it later we get more
+    * precise error messages.)  (It should be possible to support at least
+    * Pattern_Prefix_Exact, but no point as along as the actual
+    * pattern-matching implementations don't support it.)
+    */
+   if (!get_collation_isdeterministic(expr_coll))
+       return NIL;
+
     /*
      * Try to extract a fixed prefix from the pattern.
      */
diff --git a/src/backend/utils/adt/name.c b/src/backend/utils/adt/name.c
index 3a7887d4553b72436b7bc15c8b15e883aad92371..54425925edbe79a0c4093f0a77a01b53717e8024 100644 (file)
--- a/src/backend/utils/adt/name.c
+++ b/src/backend/utils/adt/name.c
@@ -131,14 +131,26 @@ namesend(PG_FUNCTION_ARGS)
   * have a '\0' terminator.  Whatever might be past the terminator is not
   * considered relevant to comparisons.
   */
+static int
+namecmp(Name arg1, Name arg2, Oid collid)
+{
+   /* Fast path for common case used in system catalogs */
+   if (collid == C_COLLATION_OID)
+       return strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN);
+
+   /* Else rely on the varstr infrastructure */
+   return varstr_cmp(NameStr(*arg1), strlen(NameStr(*arg1)),
+                     NameStr(*arg2), strlen(NameStr(*arg2)),
+                     collid);
+}
+
  Datum
  nameeq(PG_FUNCTION_ARGS)
  {
     Name        arg1 = PG_GETARG_NAME(0);
     Name        arg2 = PG_GETARG_NAME(1);
  
-   /* Collation doesn't matter: equal only if bitwise-equal */
-   PG_RETURN_BOOL(strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN) == 0);
+   PG_RETURN_BOOL(namecmp(arg1, arg2, PG_GET_COLLATION()) == 0);
  }
  
  Datum
@@ -147,21 +159,7 @@ namene(PG_FUNCTION_ARGS)
     Name        arg1 = PG_GETARG_NAME(0);
     Name        arg2 = PG_GETARG_NAME(1);
  
-   /* Collation doesn't matter: equal only if bitwise-equal */
-   PG_RETURN_BOOL(strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN) != 0);
-}
-
-static int
-namecmp(Name arg1, Name arg2, Oid collid)
-{
-   /* Fast path for common case used in system catalogs */
-   if (collid == C_COLLATION_OID)
-       return strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN);
-
-   /* Else rely on the varstr infrastructure */
-   return varstr_cmp(NameStr(*arg1), strlen(NameStr(*arg1)),
-                     NameStr(*arg2), strlen(NameStr(*arg2)),
-                     collid);
+   PG_RETURN_BOOL(namecmp(arg1, arg2, PG_GET_COLLATION()) != 0);
  }
  
  Datum
diff --git a/src/backend/utils/adt/orderedsetaggs.c b/src/backend/utils/adt/orderedsetaggs.c
index 2d384a99447c93f45682822881dab6b5fb047f42..4db2d0d0e1764a79a7a83b3e9643a56e30ec9024 100644 (file)
--- a/src/backend/utils/adt/orderedsetaggs.c
+++ b/src/backend/utils/adt/orderedsetaggs.c
@@ -1084,7 +1084,7 @@ mode_final(PG_FUNCTION_ARGS)
             last_abbrev_val = abbrev_val;
         }
         else if (abbrev_val == last_abbrev_val &&
-                DatumGetBool(FunctionCall2(equalfn, val, last_val)))
+                DatumGetBool(FunctionCall2Coll(equalfn, PG_GET_COLLATION(), val, last_val)))
         {
             /* value equal to previous value, count it */
             if (last_val_is_mode)
@@ -1345,6 +1345,7 @@ hypothetical_dense_rank_final(PG_FUNCTION_ARGS)
                                               numDistinctCols,
                                               sortColIdx,
                                               osastate->qstate->eqOperators,
+                                             osastate->qstate->sortCollations,
                                               NULL);
         MemoryContextSwitchTo(oldContext);
         osastate->qstate->compareTuple = compareTuple;
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 7fe10e284aa09b3ed16fdc0c7cd52ddfe9a0e418..6e33d6534052559e5a4f840df0c15c08439794ce 100644 (file)
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -1312,6 +1312,7 @@ pg_newlocale_from_collation(Oid collid)
         /* We'll fill in the result struct locally before allocating memory */
         memset(&result, 0, sizeof(result));
         result.provider = collform->collprovider;
+       result.deterministic = collform->collisdeterministic;
  
         if (collform->collprovider == COLLPROVIDER_LIBC)
         {
diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c
index 6d443db7e2fa8386cf7ee0aa1475b1dc9ba75e60..72f8a9d69cff69aff2f379120325c9913d88324c 100644 (file)
--- a/src/backend/utils/adt/ri_triggers.c
+++ b/src/backend/utils/adt/ri_triggers.c
@@ -676,6 +676,8 @@ ri_restrict(TriggerData *trigdata, bool is_no_action)
         {
             Oid         pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]);
             Oid         fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]);
+           Oid         pk_coll = RIAttCollation(pk_rel, riinfo->pk_attnums[i]);
+           Oid         fk_coll = RIAttCollation(fk_rel, riinfo->fk_attnums[i]);
  
             quoteOneName(attname,
                          RIAttName(fk_rel, riinfo->fk_attnums[i]));
@@ -684,6 +686,8 @@ ri_restrict(TriggerData *trigdata, bool is_no_action)
                             paramname, pk_type,
                             riinfo->pf_eq_oprs[i],
                             attname, fk_type);
+           if (pk_coll != fk_coll && !get_collation_isdeterministic(pk_coll))
+               ri_GenerateQualCollation(&querybuf, pk_coll);
             querysep = "AND";
             queryoids[i] = pk_type;
         }
@@ -778,6 +782,8 @@ RI_FKey_cascade_del(PG_FUNCTION_ARGS)
         {
             Oid         pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]);
             Oid         fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]);
+           Oid         pk_coll = RIAttCollation(pk_rel, riinfo->pk_attnums[i]);
+           Oid         fk_coll = RIAttCollation(fk_rel, riinfo->fk_attnums[i]);
  
             quoteOneName(attname,
                          RIAttName(fk_rel, riinfo->fk_attnums[i]));
@@ -786,6 +792,8 @@ RI_FKey_cascade_del(PG_FUNCTION_ARGS)
                             paramname, pk_type,
                             riinfo->pf_eq_oprs[i],
                             attname, fk_type);
+           if (pk_coll != fk_coll && !get_collation_isdeterministic(pk_coll))
+               ri_GenerateQualCollation(&querybuf, pk_coll);
             querysep = "AND";
             queryoids[i] = pk_type;
         }
@@ -890,6 +898,8 @@ RI_FKey_cascade_upd(PG_FUNCTION_ARGS)
         {
             Oid         pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]);
             Oid         fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]);
+           Oid         pk_coll = RIAttCollation(pk_rel, riinfo->pk_attnums[i]);
+           Oid         fk_coll = RIAttCollation(fk_rel, riinfo->fk_attnums[i]);
  
             quoteOneName(attname,
                          RIAttName(fk_rel, riinfo->fk_attnums[i]));
@@ -901,6 +911,8 @@ RI_FKey_cascade_upd(PG_FUNCTION_ARGS)
                             paramname, pk_type,
                             riinfo->pf_eq_oprs[i],
                             attname, fk_type);
+           if (pk_coll != fk_coll && !get_collation_isdeterministic(pk_coll))
+               ri_GenerateQualCollation(&querybuf, pk_coll);
             querysep = ",";
             qualsep = "AND";
             queryoids[i] = pk_type;
@@ -1065,6 +1077,8 @@ ri_set(TriggerData *trigdata, bool is_set_null)
         {
             Oid         pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]);
             Oid         fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]);
+           Oid         pk_coll = RIAttCollation(pk_rel, riinfo->pk_attnums[i]);
+           Oid         fk_coll = RIAttCollation(fk_rel, riinfo->fk_attnums[i]);
  
             quoteOneName(attname,
                          RIAttName(fk_rel, riinfo->fk_attnums[i]));
@@ -1077,6 +1091,8 @@ ri_set(TriggerData *trigdata, bool is_set_null)
                             paramname, pk_type,
                             riinfo->pf_eq_oprs[i],
                             attname, fk_type);
+           if (pk_coll != fk_coll && !get_collation_isdeterministic(pk_coll))
+               ri_GenerateQualCollation(&querybuf, pk_coll);
             querysep = ",";
             qualsep = "AND";
             queryoids[i] = pk_type;
@@ -2496,11 +2512,20 @@ ri_AttributesEqual(Oid eq_opr, Oid typeid,
     }
  
     /*
-    * Apply the comparison operator.  We assume it doesn't care about
-    * collations.
+    * Apply the comparison operator.
+    *
+    * Note: This function is part of a call stack that determines whether an
+    * update to a row is significant enough that it needs checking or action
+    * on the other side of a foreign-key constraint.  Therefore, the
+    * comparison here would need to be done with the collation of the *other*
+    * table.  For simplicity (e.g., we might not even have the other table
+    * open), we'll just use the default collation here, which could lead to
+    * some false negatives.  All this would break if we ever allow
+    * database-wide collations to be nondeterministic.
      */
-   return DatumGetBool(FunctionCall2(&entry->eq_opr_finfo,
-                                     oldvalue, newvalue));
+   return DatumGetBool(FunctionCall2Coll(&entry->eq_opr_finfo,
+                                         DEFAULT_COLLATION_OID,
+                                         oldvalue, newvalue));
  }
  
  /*
diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c
index 440fc8ed66362dbf1c4b45964b98f88a242dfd0a..4003631d8f5f64a0fa6d19a3f1086e057c9ec6b8 100644 (file)
--- a/src/backend/utils/adt/varchar.c
+++ b/src/backend/utils/adt/varchar.c
@@ -23,6 +23,8 @@
  #include "utils/array.h"
  #include "utils/builtins.h"
  #include "utils/hashutils.h"
+#include "utils/lsyscache.h"
+#include "utils/pg_locale.h"
  #include "utils/varlena.h"
  #include "mb/pg_wchar.h"
  
@@ -717,6 +719,22 @@ bpcharoctetlen(PG_FUNCTION_ARGS)
   * need to be so careful.
   *****************************************************************************/
  
+static void
+check_collation_set(Oid collid)
+{
+   if (!OidIsValid(collid))
+   {
+       /*
+        * This typically means that the parser could not resolve a conflict
+        * of implicit collations, so report it that way.
+        */
+       ereport(ERROR,
+               (errcode(ERRCODE_INDETERMINATE_COLLATION),
+                errmsg("could not determine which collation to use for string comparison"),
+                errhint("Use the COLLATE clause to set the collation explicitly.")));
+   }
+}
+
  Datum
  bpchareq(PG_FUNCTION_ARGS)
  {
@@ -725,18 +743,31 @@ bpchareq(PG_FUNCTION_ARGS)
     int         len1,
                 len2;
     bool        result;
+   Oid         collid = PG_GET_COLLATION();
+
+   check_collation_set(collid);
  
     len1 = bcTruelen(arg1);
     len2 = bcTruelen(arg2);
  
-   /*
-    * Since we only care about equality or not-equality, we can avoid all the
-    * expense of strcoll() here, and just do bitwise comparison.
-    */
-   if (len1 != len2)
-       result = false;
+   if (lc_collate_is_c(collid) ||
+       collid == DEFAULT_COLLATION_OID ||
+       pg_newlocale_from_collation(collid)->deterministic)
+   {
+       /*
+        * Since we only care about equality or not-equality, we can avoid all the
+        * expense of strcoll() here, and just do bitwise comparison.
+        */
+       if (len1 != len2)
+           result = false;
+       else
+           result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) == 0);
+   }
     else
-       result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) == 0);
+   {
+       result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
+                            collid) == 0);
+   }
  
     PG_FREE_IF_COPY(arg1, 0);
     PG_FREE_IF_COPY(arg2, 1);
@@ -752,18 +783,29 @@ bpcharne(PG_FUNCTION_ARGS)
     int         len1,
                 len2;
     bool        result;
+   Oid         collid = PG_GET_COLLATION();
  
     len1 = bcTruelen(arg1);
     len2 = bcTruelen(arg2);
  
-   /*
-    * Since we only care about equality or not-equality, we can avoid all the
-    * expense of strcoll() here, and just do bitwise comparison.
-    */
-   if (len1 != len2)
-       result = true;
+   if (lc_collate_is_c(collid) ||
+       collid == DEFAULT_COLLATION_OID ||
+       pg_newlocale_from_collation(collid)->deterministic)
+   {
+       /*
+        * Since we only care about equality or not-equality, we can avoid all the
+        * expense of strcoll() here, and just do bitwise comparison.
+        */
+       if (len1 != len2)
+           result = true;
+       else
+           result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) != 0);
+   }
     else
-       result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) != 0);
+   {
+       result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
+                            collid) != 0);
+   }
  
     PG_FREE_IF_COPY(arg1, 0);
     PG_FREE_IF_COPY(arg2, 1);
@@ -933,23 +975,60 @@ bpchar_smaller(PG_FUNCTION_ARGS)
  /*
   * bpchar needs a specialized hash function because we want to ignore
   * trailing blanks in comparisons.
- *
- * Note: currently there is no need for locale-specific behavior here,
- * but if we ever change the semantics of bpchar comparison to trust
- * strcoll() completely, we'd need to do something different in non-C locales.
   */
  Datum
  hashbpchar(PG_FUNCTION_ARGS)
  {
     BpChar     *key = PG_GETARG_BPCHAR_PP(0);
+   Oid         collid = PG_GET_COLLATION();
     char       *keydata;
     int         keylen;
+   pg_locale_t mylocale = 0;
     Datum       result;
  
+   if (!collid)
+       ereport(ERROR,
+               (errcode(ERRCODE_INDETERMINATE_COLLATION),
+                errmsg("could not determine which collation to use for string hashing"),
+                errhint("Use the COLLATE clause to set the collation explicitly.")));
+
     keydata = VARDATA_ANY(key);
     keylen = bcTruelen(key);
  
-   result = hash_any((unsigned char *) keydata, keylen);
+   if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
+       mylocale = pg_newlocale_from_collation(collid);
+
+   if (!mylocale || mylocale->deterministic)
+   {
+       result = hash_any((unsigned char *) keydata, keylen);
+   }
+   else
+   {
+#ifdef USE_ICU
+       if (mylocale->provider == COLLPROVIDER_ICU)
+       {
+           int32_t     ulen = -1;
+           UChar      *uchar = NULL;
+           Size        bsize;
+           uint8_t    *buf;
+
+           ulen = icu_to_uchar(&uchar, keydata, keylen);
+
+           bsize = ucol_getSortKey(mylocale->info.icu.ucol,
+                                   uchar, ulen, NULL, 0);
+           buf = palloc(bsize);
+           ucol_getSortKey(mylocale->info.icu.ucol,
+                           uchar, ulen, buf, bsize);
+
+           result = hash_any(buf, bsize);
+
+           pfree(buf);
+       }
+       else
+#endif
+           /* shouldn't happen */
+           elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
+   }
  
     /* Avoid ing memory for toasted inputs */
     PG_FREE_IF_COPY(key, 0);
@@ -961,15 +1040,56 @@ Datum
  hashbpcharextended(PG_FUNCTION_ARGS)
  {
     BpChar     *key = PG_GETARG_BPCHAR_PP(0);
+   Oid         collid = PG_GET_COLLATION();
     char       *keydata;
     int         keylen;
+   pg_locale_t mylocale = 0;
     Datum       result;
  
+   if (!collid)
+       ereport(ERROR,
+               (errcode(ERRCODE_INDETERMINATE_COLLATION),
+                errmsg("could not determine which collation to use for string hashing"),
+                errhint("Use the COLLATE clause to set the collation explicitly.")));
+
     keydata = VARDATA_ANY(key);
     keylen = bcTruelen(key);
  
-   result = hash_any_extended((unsigned char *) keydata, keylen,
-                              PG_GETARG_INT64(1));
+   if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
+       mylocale = pg_newlocale_from_collation(collid);
+
+   if (!mylocale || mylocale->deterministic)
+   {
+       result = hash_any_extended((unsigned char *) keydata, keylen,
+                                  PG_GETARG_INT64(1));
+   }
+   else
+   {
+#ifdef USE_ICU
+       if (mylocale->provider == COLLPROVIDER_ICU)
+       {
+           int32_t     ulen = -1;
+           UChar      *uchar = NULL;
+           Size        bsize;
+           uint8_t    *buf;
+
+           ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
+
+           bsize = ucol_getSortKey(mylocale->info.icu.ucol,
+                                   uchar, ulen, NULL, 0);
+           buf = palloc(bsize);
+           ucol_getSortKey(mylocale->info.icu.ucol,
+                           uchar, ulen, buf, bsize);
+
+           result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1));
+
+           pfree(buf);
+       }
+       else
+#endif
+           /* shouldn't happen */
+           elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
+   }
  
     PG_FREE_IF_COPY(key, 0);
  
@@ -985,12 +1105,23 @@ hashbpcharextended(PG_FUNCTION_ARGS)
   */
  
  static int
-internal_bpchar_pattern_compare(BpChar *arg1, BpChar *arg2)
+internal_bpchar_pattern_compare(BpChar *arg1, BpChar *arg2, Oid collid)
  {
     int         result;
     int         len1,
                 len2;
  
+   check_collation_set(collid);
+
+   /*
+    * see internal_text_pattern_compare()
+    */
+   if (!get_collation_isdeterministic(collid))
+       ereport(ERROR,
+               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                errmsg("nondeterministic collations are not supported for operator class \"%s\"",
+                       "bpchar_pattern_ops")));
+
     len1 = bcTruelen(arg1);
     len2 = bcTruelen(arg2);
  
@@ -1013,7 +1144,7 @@ bpchar_pattern_lt(PG_FUNCTION_ARGS)
     BpChar     *arg2 = PG_GETARG_BPCHAR_PP(1);
     int         result;
  
-   result = internal_bpchar_pattern_compare(arg1, arg2);
+   result = internal_bpchar_pattern_compare(arg1, arg2, PG_GET_COLLATION());
  
     PG_FREE_IF_COPY(arg1, 0);
     PG_FREE_IF_COPY(arg2, 1);
@@ -1029,7 +1160,7 @@ bpchar_pattern_le(PG_FUNCTION_ARGS)
     BpChar     *arg2 = PG_GETARG_BPCHAR_PP(1);
     int         result;
  
-   result = internal_bpchar_pattern_compare(arg1, arg2);
+   result = internal_bpchar_pattern_compare(arg1, arg2, PG_GET_COLLATION());
  
     PG_FREE_IF_COPY(arg1, 0);
     PG_FREE_IF_COPY(arg2, 1);
@@ -1045,7 +1176,7 @@ bpchar_pattern_ge(PG_FUNCTION_ARGS)
     BpChar     *arg2 = PG_GETARG_BPCHAR_PP(1);
     int         result;
  
-   result = internal_bpchar_pattern_compare(arg1, arg2);
+   result = internal_bpchar_pattern_compare(arg1, arg2, PG_GET_COLLATION());
  
     PG_FREE_IF_COPY(arg1, 0);
     PG_FREE_IF_COPY(arg2, 1);
@@ -1061,7 +1192,7 @@ bpchar_pattern_gt(PG_FUNCTION_ARGS)
     BpChar     *arg2 = PG_GETARG_BPCHAR_PP(1);
     int         result;
  
-   result = internal_bpchar_pattern_compare(arg1, arg2);
+   result = internal_bpchar_pattern_compare(arg1, arg2, PG_GET_COLLATION());
  
     PG_FREE_IF_COPY(arg1, 0);
     PG_FREE_IF_COPY(arg2, 1);
@@ -1077,7 +1208,7 @@ btbpchar_pattern_cmp(PG_FUNCTION_ARGS)
     BpChar     *arg2 = PG_GETARG_BPCHAR_PP(1);
     int         result;
  
-   result = internal_bpchar_pattern_compare(arg1, arg2);
+   result = internal_bpchar_pattern_compare(arg1, arg2, PG_GET_COLLATION());
  
     PG_FREE_IF_COPY(arg1, 0);
     PG_FREE_IF_COPY(arg2, 1);
@@ -1090,8 +1221,17 @@ Datum
  btbpchar_pattern_sortsupport(PG_FUNCTION_ARGS)
  {
     SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
+   Oid         collid = ssup->ssup_collation;
     MemoryContext oldcontext;
  
+   check_collation_set(collid);
+
+   if (!get_collation_isdeterministic(collid))
+       ereport(ERROR,
+               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                errmsg("nondeterministic collations are not supported for operator class \"%s\"",
+                       "bpchar_pattern_ops")));
+
     oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
  
     /* Use generic string SortSupport, forcing "C" collation */
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 39c394331b6d65c7d1c9ae1820ec03a86942fbb3..68a6e49aeb4076e21f69f1a7ba8bce87bbb30ae0 100644 (file)
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -122,13 +122,14 @@ static text *text_substring(Datum str,
                int32 length,
                bool length_not_specified);
  static text *text_overlay(text *t1, text *t2, int sp, int sl);
-static int text_position(text *t1, text *t2);
-static void text_position_setup(text *t1, text *t2, TextPositionState *state);
+static int text_position(text *t1, text *t2, Oid collid);
+static void text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state);
  static bool text_position_next(TextPositionState *state);
  static char *text_position_next_internal(char *start_ptr, TextPositionState *state);
  static char *text_position_get_match_ptr(TextPositionState *state);
  static int text_position_get_match_pos(TextPositionState *state);
  static void text_position_cleanup(TextPositionState *state);
+static void check_collation_set(Oid collid);
  static int text_cmp(text *arg1, text *arg2, Oid collid);
  static bytea *bytea_catenate(bytea *t1, bytea *t2);
  static bytea *bytea_substring(Datum str,
@@ -1094,7 +1095,7 @@ textpos(PG_FUNCTION_ARGS)
     text       *str = PG_GETARG_TEXT_PP(0);
     text       *search_str = PG_GETARG_TEXT_PP(1);
  
-   PG_RETURN_INT32((int32) text_position(str, search_str));
+   PG_RETURN_INT32((int32) text_position(str, search_str, PG_GET_COLLATION()));
  }
  
  /*
@@ -1112,7 +1113,7 @@ textpos(PG_FUNCTION_ARGS)
   * functions.
   */
  static int
-text_position(text *t1, text *t2)
+text_position(text *t1, text *t2, Oid collid)
  {
     TextPositionState state;
     int         result;
@@ -1120,7 +1121,7 @@ text_position(text *t1, text *t2)
     if (VARSIZE_ANY_EXHDR(t1) < 1 || VARSIZE_ANY_EXHDR(t2) < 1)
         return 0;
  
-   text_position_setup(t1, t2, &state);
+   text_position_setup(t1, t2, collid, &state);
     if (!text_position_next(&state))
         result = 0;
     else
@@ -1147,10 +1148,21 @@ text_position(text *t1, text *t2)
   */
  
  static void
-text_position_setup(text *t1, text *t2, TextPositionState *state)
+text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state)
  {
     int         len1 = VARSIZE_ANY_EXHDR(t1);
     int         len2 = VARSIZE_ANY_EXHDR(t2);
+   pg_locale_t mylocale = 0;
+
+   check_collation_set(collid);
+
+   if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
+       mylocale = pg_newlocale_from_collation(collid);
+
+   if (mylocale && !mylocale->deterministic)
+       ereport(ERROR,
+               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                errmsg("nondeterministic collations are not supported for substring searches")));
  
     Assert(len1 > 0);
     Assert(len2 > 0);
@@ -1429,6 +1441,22 @@ text_position_cleanup(TextPositionState *state)
     /* no cleanup needed */
  }
  
+static void
+check_collation_set(Oid collid)
+{
+   if (!OidIsValid(collid))
+   {
+       /*
+        * This typically means that the parser could not resolve a conflict
+        * of implicit collations, so report it that way.
+        */
+       ereport(ERROR,
+               (errcode(ERRCODE_INDETERMINATE_COLLATION),
+                errmsg("could not determine which collation to use for string comparison"),
+                errhint("Use the COLLATE clause to set the collation explicitly.")));
+   }
+}
+
  /* varstr_cmp()
   * Comparison function for text strings with given lengths.
   * Includes locale support, but must copy strings to temporary memory
@@ -1441,6 +1469,8 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
  {
     int         result;
  
+   check_collation_set(collid);
+
     /*
      * Unfortunately, there is no strncoll(), so in the non-C locale case we
      * have to do some memory copying.  This turns out to be significantly
@@ -1462,20 +1492,7 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
         pg_locale_t mylocale = 0;
  
         if (collid != DEFAULT_COLLATION_OID)
-       {
-           if (!OidIsValid(collid))
-           {
-               /*
-                * This typically means that the parser could not resolve a
-                * conflict of implicit collations, so report it that way.
-                */
-               ereport(ERROR,
-                       (errcode(ERRCODE_INDETERMINATE_COLLATION),
-                        errmsg("could not determine which collation to use for string comparison"),
-                        errhint("Use the COLLATE clause to set the collation explicitly.")));
-           }
             mylocale = pg_newlocale_from_collation(collid);
-       }
  
         /*
          * memcmp() can't tell us which of two unequal strings sorts first,
@@ -1558,13 +1575,9 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
                 ereport(ERROR,
                         (errmsg("could not compare Unicode strings: %m")));
  
-           /*
-            * In some locales wcscoll() can claim that nonidentical strings
-            * are equal.  Believing that would be bad news for a number of
-            * reasons, so we follow Perl's lead and sort "equal" strings
-            * according to strcmp (on the UTF-8 representation).
-            */
-           if (result == 0)
+           /* Break tie if necessary. */
+           if (result == 0 &&
+               (!mylocale || mylocale->deterministic))
             {
                 result = memcmp(arg1, arg2, Min(len1, len2));
                 if ((result == 0) && (len1 != len2))
@@ -1649,13 +1662,9 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
         else
             result = strcoll(a1p, a2p);
  
-       /*
-        * In some locales strcoll() can claim that nonidentical strings are
-        * equal.  Believing that would be bad news for a number of reasons,
-        * so we follow Perl's lead and sort "equal" strings according to
-        * strcmp().
-        */
-       if (result == 0)
+       /* Break tie if necessary. */
+       if (result == 0 &&
+           (!mylocale || mylocale->deterministic))
             result = strcmp(a1p, a2p);
  
         if (a1p != a1buf)
@@ -1699,33 +1708,52 @@ text_cmp(text *arg1, text *arg2, Oid collid)
  Datum
  texteq(PG_FUNCTION_ARGS)
  {
-   Datum       arg1 = PG_GETARG_DATUM(0);
-   Datum       arg2 = PG_GETARG_DATUM(1);
+   Oid         collid = PG_GET_COLLATION();
     bool        result;
-   Size        len1,
-               len2;
  
-   /*
-    * Since we only care about equality or not-equality, we can avoid all the
-    * expense of strcoll() here, and just do bitwise comparison.  In fact, we
-    * don't even have to do a bitwise comparison if we can show the lengths
-    * of the strings are unequal; which might save us from having to detoast
-    * one or both values.
-    */
-   len1 = toast_raw_datum_size(arg1);
-   len2 = toast_raw_datum_size(arg2);
-   if (len1 != len2)
-       result = false;
+   check_collation_set(collid);
+
+   if (lc_collate_is_c(collid) ||
+       collid == DEFAULT_COLLATION_OID ||
+       pg_newlocale_from_collation(collid)->deterministic)
+   {
+       Datum       arg1 = PG_GETARG_DATUM(0);
+       Datum       arg2 = PG_GETARG_DATUM(1);
+       Size        len1,
+                   len2;
+
+       /*
+        * Since we only care about equality or not-equality, we can avoid all the
+        * expense of strcoll() here, and just do bitwise comparison.  In fact, we
+        * don't even have to do a bitwise comparison if we can show the lengths
+        * of the strings are unequal; which might save us from having to detoast
+        * one or both values.
+        */
+       len1 = toast_raw_datum_size(arg1);
+       len2 = toast_raw_datum_size(arg2);
+       if (len1 != len2)
+           result = false;
+       else
+       {
+           text       *targ1 = DatumGetTextPP(arg1);
+           text       *targ2 = DatumGetTextPP(arg2);
+
+           result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
+                            len1 - VARHDRSZ) == 0);
+
+           PG_FREE_IF_COPY(targ1, 0);
+           PG_FREE_IF_COPY(targ2, 1);
+       }
+   }
     else
     {
-       text       *targ1 = DatumGetTextPP(arg1);
-       text       *targ2 = DatumGetTextPP(arg2);
+       text       *arg1 = PG_GETARG_TEXT_PP(0);
+       text       *arg2 = PG_GETARG_TEXT_PP(1);
  
-       result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
-                        len1 - VARHDRSZ) == 0);
+       result = (text_cmp(arg1, arg2, collid) == 0);
  
-       PG_FREE_IF_COPY(targ1, 0);
-       PG_FREE_IF_COPY(targ2, 1);
+       PG_FREE_IF_COPY(arg1, 0);
+       PG_FREE_IF_COPY(arg2, 1);
     }
  
     PG_RETURN_BOOL(result);
@@ -1734,27 +1762,46 @@ texteq(PG_FUNCTION_ARGS)
  Datum
  textne(PG_FUNCTION_ARGS)
  {
-   Datum       arg1 = PG_GETARG_DATUM(0);
-   Datum       arg2 = PG_GETARG_DATUM(1);
+   Oid         collid = PG_GET_COLLATION();
     bool        result;
-   Size        len1,
-               len2;
  
-   /* See comment in texteq() */
-   len1 = toast_raw_datum_size(arg1);
-   len2 = toast_raw_datum_size(arg2);
-   if (len1 != len2)
-       result = true;
+   check_collation_set(collid);
+
+   if (lc_collate_is_c(collid) ||
+       collid == DEFAULT_COLLATION_OID ||
+       pg_newlocale_from_collation(collid)->deterministic)
+   {
+       Datum       arg1 = PG_GETARG_DATUM(0);
+       Datum       arg2 = PG_GETARG_DATUM(1);
+       Size        len1,
+                   len2;
+
+       /* See comment in texteq() */
+       len1 = toast_raw_datum_size(arg1);
+       len2 = toast_raw_datum_size(arg2);
+       if (len1 != len2)
+           result = true;
+       else
+       {
+           text       *targ1 = DatumGetTextPP(arg1);
+           text       *targ2 = DatumGetTextPP(arg2);
+
+           result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
+                            len1 - VARHDRSZ) != 0);
+
+           PG_FREE_IF_COPY(targ1, 0);
+           PG_FREE_IF_COPY(targ2, 1);
+       }
+   }
     else
     {
-       text       *targ1 = DatumGetTextPP(arg1);
-       text       *targ2 = DatumGetTextPP(arg2);
+       text       *arg1 = PG_GETARG_TEXT_PP(0);
+       text       *arg2 = PG_GETARG_TEXT_PP(1);
  
-       result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
-                        len1 - VARHDRSZ) != 0);
+       result = (text_cmp(arg1, arg2, collid) != 0);
  
-       PG_FREE_IF_COPY(targ1, 0);
-       PG_FREE_IF_COPY(targ2, 1);
+       PG_FREE_IF_COPY(arg1, 0);
+       PG_FREE_IF_COPY(arg2, 1);
     }
  
     PG_RETURN_BOOL(result);
@@ -1825,10 +1872,22 @@ text_starts_with(PG_FUNCTION_ARGS)
  {
     Datum       arg1 = PG_GETARG_DATUM(0);
     Datum       arg2 = PG_GETARG_DATUM(1);
+   Oid         collid = PG_GET_COLLATION();
+   pg_locale_t mylocale = 0;
     bool        result;
     Size        len1,
                 len2;
  
+   check_collation_set(collid);
+
+   if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
+       mylocale = pg_newlocale_from_collation(collid);
+
+   if (mylocale && !mylocale->deterministic)
+       ereport(ERROR,
+               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                errmsg("nondeterministic collations are not supported for substring searches")));
+
     len1 = toast_raw_datum_size(arg1);
     len2 = toast_raw_datum_size(arg2);
     if (len2 > len1)
@@ -1898,6 +1957,8 @@ varstr_sortsupport(SortSupport ssup, Oid typid, Oid collid)
     VarStringSortSupport *sss;
     pg_locale_t locale = 0;
  
+   check_collation_set(collid);
+
     /*
      * If possible, set ssup->comparator to a function which can be used to
      * directly compare two datums.  If we can do this, we'll avoid the
@@ -1934,20 +1995,7 @@ varstr_sortsupport(SortSupport ssup, Oid typid, Oid collid)
          * result.
          */
         if (collid != DEFAULT_COLLATION_OID)
-       {
-           if (!OidIsValid(collid))
-           {
-               /*
-                * This typically means that the parser could not resolve a
-                * conflict of implicit collations, so report it that way.
-                */
-               ereport(ERROR,
-                       (errcode(ERRCODE_INDETERMINATE_COLLATION),
-                        errmsg("could not determine which collation to use for string comparison"),
-                        errhint("Use the COLLATE clause to set the collation explicitly.")));
-           }
             locale = pg_newlocale_from_collation(collid);
-       }
  
         /*
          * There is a further exception on Windows.  When the database
@@ -2328,12 +2376,9 @@ varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup)
     else
         result = strcoll(sss->buf1, sss->buf2);
  
-   /*
-    * In some locales strcoll() can claim that nonidentical strings are
-    * equal. Believing that would be bad news for a number of reasons, so we
-    * follow Perl's lead and sort "equal" strings according to strcmp().
-    */
-   if (result == 0)
+   /* Break tie if necessary. */
+   if (result == 0 &&
+       (!sss->locale || sss->locale->deterministic))
         result = strcmp(sss->buf1, sss->buf2);
  
     /* Cache result, perhaps saving an expensive strcoll() call next time */
@@ -2760,10 +2805,18 @@ nameeqtext(PG_FUNCTION_ARGS)
     text       *arg2 = PG_GETARG_TEXT_PP(1);
     size_t      len1 = strlen(NameStr(*arg1));
     size_t      len2 = VARSIZE_ANY_EXHDR(arg2);
+   Oid         collid = PG_GET_COLLATION();
     bool        result;
  
-   result = (len1 == len2 &&
-             memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0);
+   check_collation_set(collid);
+
+   if (collid == C_COLLATION_OID)
+       result = (len1 == len2 &&
+                 memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0);
+   else
+       result = (varstr_cmp(NameStr(*arg1), len1,
+                            VARDATA_ANY(arg2), len2,
+                            collid) == 0);
  
     PG_FREE_IF_COPY(arg2, 1);
  
@@ -2777,10 +2830,18 @@ texteqname(PG_FUNCTION_ARGS)
     Name        arg2 = PG_GETARG_NAME(1);
     size_t      len1 = VARSIZE_ANY_EXHDR(arg1);
     size_t      len2 = strlen(NameStr(*arg2));
+   Oid         collid = PG_GET_COLLATION();
     bool        result;
  
-   result = (len1 == len2 &&
-             memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0);
+   check_collation_set(collid);
+
+   if (collid == C_COLLATION_OID)
+       result = (len1 == len2 &&
+                 memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0);
+   else
+       result = (varstr_cmp(VARDATA_ANY(arg1), len1,
+                            NameStr(*arg2), len2,
+                            collid) == 0);
  
     PG_FREE_IF_COPY(arg1, 0);
  
@@ -2794,10 +2855,18 @@ namenetext(PG_FUNCTION_ARGS)
     text       *arg2 = PG_GETARG_TEXT_PP(1);
     size_t      len1 = strlen(NameStr(*arg1));
     size_t      len2 = VARSIZE_ANY_EXHDR(arg2);
+   Oid         collid = PG_GET_COLLATION();
     bool        result;
  
-   result = !(len1 == len2 &&
-              memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0);
+   check_collation_set(collid);
+
+   if (collid == C_COLLATION_OID)
+       result = !(len1 == len2 &&
+                  memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0);
+   else
+       result = !(varstr_cmp(NameStr(*arg1), len1,
+                             VARDATA_ANY(arg2), len2,
+                             collid) == 0);
  
     PG_FREE_IF_COPY(arg2, 1);
  
@@ -2811,10 +2880,18 @@ textnename(PG_FUNCTION_ARGS)
     Name        arg2 = PG_GETARG_NAME(1);
     size_t      len1 = VARSIZE_ANY_EXHDR(arg1);
     size_t      len2 = strlen(NameStr(*arg2));
+   Oid         collid = PG_GET_COLLATION();
     bool        result;
  
-   result = !(len1 == len2 &&
-              memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0);
+   check_collation_set(collid);
+
+   if (collid == C_COLLATION_OID)
+       result = !(len1 == len2 &&
+                  memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0);
+   else
+       result = !(varstr_cmp(VARDATA_ANY(arg1), len1,
+                             NameStr(*arg2), len2,
+                             collid) == 0);
  
     PG_FREE_IF_COPY(arg1, 0);
  
@@ -2919,12 +2996,34 @@ textgename(PG_FUNCTION_ARGS)
   */
  
  static int
-internal_text_pattern_compare(text *arg1, text *arg2)
+internal_text_pattern_compare(text *arg1, text *arg2, Oid collid)
  {
     int         result;
     int         len1,
                 len2;
  
+   check_collation_set(collid);
+
+   /*
+    * XXX We cannot use a text_pattern_ops index for nondeterministic
+    * collations, because these operators intentionally ignore the collation.
+    * However, the planner has no way to know that, so it might choose such
+    * an index for an "=" clause, which would lead to wrong results.  This
+    * check here doesn't prevent choosing the index, but it will at least
+    * error out if the index is chosen.  A text_pattern_ops index on a column
+    * with nondeterministic collation is pretty useless anyway, since LIKE
+    * etc. won't work there either.  A future possibility would be to
+    * annotate the operator class or its members in the catalog to avoid the
+    * index.  Another alternative is to stay away from the *_pattern_ops
+    * operator classes and prefer creating LIKE-supporting indexes with
+    * COLLATE "C".
+    */
+   if (!get_collation_isdeterministic(collid))
+       ereport(ERROR,
+               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                errmsg("nondeterministic collations are not supported for operator class \"%s\"",
+                       "text_pattern_ops")));
+
     len1 = VARSIZE_ANY_EXHDR(arg1);
     len2 = VARSIZE_ANY_EXHDR(arg2);
  
@@ -2947,7 +3046,7 @@ text_pattern_lt(PG_FUNCTION_ARGS)
     text       *arg2 = PG_GETARG_TEXT_PP(1);
     int         result;
  
-   result = internal_text_pattern_compare(arg1, arg2);
+   result = internal_text_pattern_compare(arg1, arg2, PG_GET_COLLATION());
  
     PG_FREE_IF_COPY(arg1, 0);
     PG_FREE_IF_COPY(arg2, 1);
@@ -2963,7 +3062,7 @@ text_pattern_le(PG_FUNCTION_ARGS)
     text       *arg2 = PG_GETARG_TEXT_PP(1);
     int         result;
  
-   result = internal_text_pattern_compare(arg1, arg2);
+   result = internal_text_pattern_compare(arg1, arg2, PG_GET_COLLATION());
  
     PG_FREE_IF_COPY(arg1, 0);
     PG_FREE_IF_COPY(arg2, 1);
@@ -2979,7 +3078,7 @@ text_pattern_ge(PG_FUNCTION_ARGS)
     text       *arg2 = PG_GETARG_TEXT_PP(1);
     int         result;
  
-   result = internal_text_pattern_compare(arg1, arg2);
+   result = internal_text_pattern_compare(arg1, arg2, PG_GET_COLLATION());
  
     PG_FREE_IF_COPY(arg1, 0);
     PG_FREE_IF_COPY(arg2, 1);
@@ -2995,7 +3094,7 @@ text_pattern_gt(PG_FUNCTION_ARGS)
     text       *arg2 = PG_GETARG_TEXT_PP(1);
     int         result;
  
-   result = internal_text_pattern_compare(arg1, arg2);
+   result = internal_text_pattern_compare(arg1, arg2, PG_GET_COLLATION());
  
     PG_FREE_IF_COPY(arg1, 0);
     PG_FREE_IF_COPY(arg2, 1);
@@ -3011,7 +3110,7 @@ bttext_pattern_cmp(PG_FUNCTION_ARGS)
     text       *arg2 = PG_GETARG_TEXT_PP(1);
     int         result;
  
-   result = internal_text_pattern_compare(arg1, arg2);
+   result = internal_text_pattern_compare(arg1, arg2, PG_GET_COLLATION());
  
     PG_FREE_IF_COPY(arg1, 0);
     PG_FREE_IF_COPY(arg2, 1);
@@ -3024,8 +3123,17 @@ Datum
  bttext_pattern_sortsupport(PG_FUNCTION_ARGS)
  {
     SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
+   Oid         collid = ssup->ssup_collation;
     MemoryContext oldcontext;
  
+   check_collation_set(collid);
+
+   if (!get_collation_isdeterministic(collid))
+       ereport(ERROR,
+               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                errmsg("nondeterministic collations are not supported for operator class \"%s\"",
+                       "text_pattern_ops")));
+
     oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
  
     /* Use generic string SortSupport, forcing "C" collation */
@@ -4121,7 +4229,7 @@ replace_text(PG_FUNCTION_ARGS)
         PG_RETURN_TEXT_P(src_text);
     }
  
-   text_position_setup(src_text, from_sub_text, &state);
+   text_position_setup(src_text, from_sub_text, PG_GET_COLLATION(), &state);
  
     found = text_position_next(&state);
  
@@ -4482,7 +4590,7 @@ split_text(PG_FUNCTION_ARGS)
             PG_RETURN_TEXT_P(cstring_to_text(""));
     }
  
-   text_position_setup(inputstring, fldsep, &state);
+   text_position_setup(inputstring, fldsep, PG_GET_COLLATION(), &state);
  
     /* identify bounds of first field */
     start_ptr = VARDATA_ANY(inputstring);
@@ -4538,11 +4646,12 @@ split_text(PG_FUNCTION_ARGS)
   * Convenience function to return true when two text params are equal.
   */
  static bool
-text_isequal(text *txt1, text *txt2)
+text_isequal(text *txt1, text *txt2, Oid collid)
  {
-   return DatumGetBool(DirectFunctionCall2(texteq,
-                                           PointerGetDatum(txt1),
-                                           PointerGetDatum(txt2)));
+   return DatumGetBool(DirectFunctionCall2Coll(texteq,
+                                               collid,
+                                               PointerGetDatum(txt1),
+                                               PointerGetDatum(txt2)));
  }
  
  /*
@@ -4633,7 +4742,7 @@ text_to_array_internal(PG_FUNCTION_ARGS)
             int         lbs[1];
  
             /* single element can be a NULL too */
-           is_null = null_string ? text_isequal(inputstring, null_string) : false;
+           is_null = null_string ? text_isequal(inputstring, null_string, PG_GET_COLLATION()) : false;
  
             elems[0] = PointerGetDatum(inputstring);
             nulls[0] = is_null;
@@ -4645,7 +4754,7 @@ text_to_array_internal(PG_FUNCTION_ARGS)
                                                      TEXTOID, -1, false, 'i'));
         }
  
-       text_position_setup(inputstring, fldsep, &state);
+       text_position_setup(inputstring, fldsep, PG_GET_COLLATION(), &state);
  
         start_ptr = VARDATA_ANY(inputstring);
  
@@ -4673,7 +4782,7 @@ text_to_array_internal(PG_FUNCTION_ARGS)
  
             /* must build a temp text datum to pass to accumArrayResult */
             result_text = cstring_to_text_with_len(start_ptr, chunk_len);
-           is_null = null_string ? text_isequal(result_text, null_string) : false;
+           is_null = null_string ? text_isequal(result_text, null_string, PG_GET_COLLATION()) : false;
  
             /* stash away this field */
             astate = accumArrayResult(astate,
@@ -4715,7 +4824,7 @@ text_to_array_internal(PG_FUNCTION_ARGS)
  
             /* must build a temp text datum to pass to accumArrayResult */
             result_text = cstring_to_text_with_len(start_ptr, chunk_len);
-           is_null = null_string ? text_isequal(result_text, null_string) : false;
+           is_null = null_string ? text_isequal(result_text, null_string, PG_GET_COLLATION()) : false;
  
             /* stash away this field */
             astate = accumArrayResult(astate,
diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c
index 07e1cd76962fd02136484287cd8d14d9417ef68a..d05930bc4cf669ff2a5b77224adbc991ab2387a9 100644 (file)
--- a/src/backend/utils/cache/catcache.c
+++ b/src/backend/utils/cache/catcache.c
@@ -170,13 +170,18 @@ int4hashfast(Datum datum)
  static bool
  texteqfast(Datum a, Datum b)
  {
-   return DatumGetBool(DirectFunctionCall2(texteq, a, b));
+   /*
+    * The use of DEFAULT_COLLATION_OID is fairly arbitrary here.  We just
+    * want to take the fast "deterministic" path in texteq().
+    */
+   return DatumGetBool(DirectFunctionCall2Coll(texteq, DEFAULT_COLLATION_OID, a, b));
  }
  
  static uint32
  texthashfast(Datum datum)
  {
-   return DatumGetInt32(DirectFunctionCall1(hashtext, datum));
+   /* analogously here as in texteqfast() */
+   return DatumGetInt32(DirectFunctionCall1Coll(hashtext, DEFAULT_COLLATION_OID, datum));
  }
  
  static bool
diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c
index e88c45d268a5cf933f0d59ccdc513c66f250f515..59e6bcd856cece7528cb22c10e43263ffbe5a941 100644 (file)
--- a/src/backend/utils/cache/lsyscache.c
+++ b/src/backend/utils/cache/lsyscache.c
@@ -908,6 +908,22 @@ get_collation_name(Oid colloid)
         return NULL;
  }
  
+bool
+get_collation_isdeterministic(Oid colloid)
+{
+   HeapTuple   tp;
+   Form_pg_collation colltup;
+   bool        result;
+
+   tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(colloid));
+   if (!HeapTupleIsValid(tp))
+       elog(ERROR, "cache lookup failed for collation %u", colloid);
+   colltup = (Form_pg_collation) GETSTRUCT(tp);
+   result = colltup->collisdeterministic;
+   ReleaseSysCache(tp);
+   return result;
+}
+
  /*             ---------- CONSTRAINT CACHE ----------                   */
  
  /*
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index fd50a809ea4f0096c27b7f082dc9576f372e379e..4886090132e08ed35935bda109560e949ee4c5e9 100644 (file)
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -1765,8 +1765,8 @@ setup_collation(FILE *cmdfd)
      * in pg_collation.h.  But add it before reading system collations, so
      * that it wins if libc defines a locale named ucs_basic.
      */
-   PG_CMD_PRINTF3("INSERT INTO pg_collation (oid, collname, collnamespace, collowner, collprovider, collencoding, collcollate, collctype)"
-                  "VALUES (pg_nextoid('pg_catalog.pg_collation', 'oid', 'pg_catalog.pg_collation_oid_index'), 'ucs_basic', 'pg_catalog'::regnamespace, %u, '%c', %d, 'C', 'C');\n\n",
+   PG_CMD_PRINTF3("INSERT INTO pg_collation (oid, collname, collnamespace, collowner, collprovider, collisdeterministic, collencoding, collcollate, collctype)"
+                  "VALUES (pg_nextoid('pg_catalog.pg_collation', 'oid', 'pg_catalog.pg_collation_oid_index'), 'ucs_basic', 'pg_catalog'::regnamespace, %u, '%c', true, %d, 'C', 'C');\n\n",
                    BOOTSTRAP_SUPERUSERID, COLLPROVIDER_LIBC, PG_UTF8);
  
     /* Now import all collations we can find in the operating system */
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 4c98ae4d7f4848084febb27907d31b0d68b0bcca..63699932c15d3a1cbb9e3a75ad91786acda91f1c 100644 (file)
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -13417,6 +13417,7 @@ dumpCollation(Archive *fout, CollInfo *collinfo)
     char       *qcollname;
     PGresult   *res;
     int         i_collprovider;
+   int         i_collisdeterministic;
     int         i_collcollate;
     int         i_collctype;
     const char *collprovider;
@@ -13434,28 +13435,35 @@ dumpCollation(Archive *fout, CollInfo *collinfo)
     qcollname = pg_strdup(fmtId(collinfo->dobj.name));
  
     /* Get collation-specific details */
+   appendPQExpBuffer(query, "SELECT ");
+
     if (fout->remoteVersion >= 100000)
-       appendPQExpBuffer(query, "SELECT "
+       appendPQExpBuffer(query,
                           "collprovider, "
-                         "collcollate, "
-                         "collctype, "
-                         "collversion "
-                         "FROM pg_catalog.pg_collation c "
-                         "WHERE c.oid = '%u'::pg_catalog.oid",
-                         collinfo->dobj.catId.oid);
+                         "collversion, ");
     else
-       appendPQExpBuffer(query, "SELECT "
+       appendPQExpBuffer(query,
                           "'c' AS collprovider, "
-                         "collcollate, "
-                         "collctype, "
-                         "NULL AS collversion "
-                         "FROM pg_catalog.pg_collation c "
-                         "WHERE c.oid = '%u'::pg_catalog.oid",
-                         collinfo->dobj.catId.oid);
+                         "NULL AS collversion, ");
+
+   if (fout->remoteVersion >= 120000)
+       appendPQExpBuffer(query,
+                         "collisdeterministic, ");
+   else
+       appendPQExpBuffer(query,
+                         "true AS collisdeterministic, ");
+
+   appendPQExpBuffer(query,
+                     "collcollate, "
+                     "collctype "
+                     "FROM pg_catalog.pg_collation c "
+                     "WHERE c.oid = '%u'::pg_catalog.oid",
+                     collinfo->dobj.catId.oid);
  
     res = ExecuteSqlQueryForSingleRow(fout, query->data);
  
     i_collprovider = PQfnumber(res, "collprovider");
+   i_collisdeterministic = PQfnumber(res, "collisdeterministic");
     i_collcollate = PQfnumber(res, "collcollate");
     i_collctype = PQfnumber(res, "collctype");
  
@@ -13482,6 +13490,9 @@ dumpCollation(Archive *fout, CollInfo *collinfo)
                       "unrecognized collation provider: %s\n",
                       collprovider);
  
+   if (strcmp(PQgetvalue(res, 0, i_collisdeterministic), "f") == 0)
+       appendPQExpBufferStr(q, ", deterministic = false");
+
     if (strcmp(collcollate, collctype) == 0)
     {
         appendPQExpBufferStr(q, ", locale = ");
diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c
index 779e48437cd218e5be9e052c4dfe5384fb29f1c3..fd8ebee8cd30471fa072c3264b44d9a9ee97474e 100644 (file)
--- a/src/bin/psql/describe.c
+++ b/src/bin/psql/describe.c
@@ -4106,7 +4106,7 @@ listCollations(const char *pattern, bool verbose, bool showSystem)
     PQExpBufferData buf;
     PGresult   *res;
     printQueryOpt myopt = pset.popt;
-   static const bool translate_columns[] = {false, false, false, false, false, false};
+   static const bool translate_columns[] = {false, false, false, false, false, true, false};
  
     if (pset.sversion < 90100)
     {
@@ -4134,6 +4134,21 @@ listCollations(const char *pattern, bool verbose, bool showSystem)
         appendPQExpBuffer(&buf,
                           ",\n       CASE c.collprovider WHEN 'd' THEN 'default' WHEN 'c' THEN 'libc' WHEN 'i' THEN 'icu' END AS \"%s\"",
                           gettext_noop("Provider"));
+   else
+       appendPQExpBuffer(&buf,
+                         ",\n       'libc' AS \"%s\"",
+                         gettext_noop("Provider"));
+
+   if (pset.sversion >= 120000)
+       appendPQExpBuffer(&buf,
+                         ",\n       CASE WHEN c.collisdeterministic THEN '%s' ELSE '%s' END AS \"%s\"",
+                         gettext_noop("yes"), gettext_noop("no"),
+                         gettext_noop("Deterministic?"));
+   else
+       appendPQExpBuffer(&buf,
+                         ",\n       '%s' AS \"%s\"",
+                         gettext_noop("yes"),
+                         gettext_noop("Deterministic?"));
  
     if (verbose)
         appendPQExpBuffer(&buf,
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 45c33b63d615935d9d70dc3c9dd3f8df0804317b..d4dfe237c9907467faec8d58f72e62a814da9f76 100644 (file)
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
   */
  
  /*                         yyyymmddN */
-#define CATALOG_VERSION_NO 201903211
+#define CATALOG_VERSION_NO 201903221
  
  #endif
diff --git a/src/include/catalog/pg_collation.h b/src/include/catalog/pg_collation.h
index 10fe711a9127d57833fa33f1200ebace1688e0fb..4d2fcb3858a6dff8e3a0818e5cc5d366d368da11 100644 (file)
--- a/src/include/catalog/pg_collation.h
+++ b/src/include/catalog/pg_collation.h
@@ -33,6 +33,7 @@ CATALOG(pg_collation,3456,CollationRelationId)
     Oid         collnamespace;  /* OID of namespace containing collation */
     Oid         collowner;      /* owner of collation */
     char        collprovider;   /* see constants below */
+   bool        collisdeterministic BKI_DEFAULT(t);
     int32       collencoding;   /* encoding for this collation; -1 = "all" */
     NameData    collcollate;    /* LC_COLLATE setting */
     NameData    collctype;      /* LC_CTYPE setting */
@@ -61,6 +62,7 @@ typedef FormData_pg_collation *Form_pg_collation;
  extern Oid CollationCreate(const char *collname, Oid collnamespace,
                 Oid collowner,
                 char collprovider,
+               bool collisdeterministic,
                 int32 collencoding,
                 const char *collcollate, const char *collctype,
                 const char *collversion,
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 9003f2ce583539ab2c327b459f131028f5e04767..0cf7aa3495f1ccabd8c8bddac43feacaec34e045 100644 (file)
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -111,6 +111,7 @@ extern ExprState *execTuplesMatchPrepare(TupleDesc desc,
                        int numCols,
                        const AttrNumber *keyColIdx,
                        const Oid *eqOperators,
+                      const Oid *collations,
                        PlanState *parent);
  extern void execTuplesHashPrepare(int numCols,
                       const Oid *eqOperators,
@@ -121,6 +122,7 @@ extern TupleHashTable BuildTupleHashTable(PlanState *parent,
                     int numCols, AttrNumber *keyColIdx,
                     const Oid *eqfuncoids,
                     FmgrInfo *hashfunctions,
+                   Oid *collations,
                     long nbuckets, Size additionalsize,
                     MemoryContext tablecxt,
                     MemoryContext tempcxt, bool use_variable_hash_iv);
@@ -129,6 +131,7 @@ extern TupleHashTable BuildTupleHashTableExt(PlanState *parent,
                     int numCols, AttrNumber *keyColIdx,
                     const Oid *eqfuncoids,
                     FmgrInfo *hashfunctions,
+                   Oid *collations,
                     long nbuckets, Size additionalsize,
                     MemoryContext metacxt,
                     MemoryContext tablecxt,
@@ -257,6 +260,7 @@ extern ExprState *ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc,
                        int numCols,
                        const AttrNumber *keyColIdx,
                        const Oid *eqfunctions,
+                      const Oid *collations,
                        PlanState *parent);
  extern ProjectionInfo *ExecBuildProjectionInfo(List *targetList,
                         ExprContext *econtext,
diff --git a/src/include/executor/hashjoin.h b/src/include/executor/hashjoin.h
index e7bf158c1bdb5d1ded6a1b7e058ff84498903481..2c94b926d3764cd2317144536969374abb616c62 100644 (file)
--- a/src/include/executor/hashjoin.h
+++ b/src/include/executor/hashjoin.h
@@ -337,6 +337,7 @@ typedef struct HashJoinTableData
     FmgrInfo   *outer_hashfunctions;    /* lookup data for hash functions */
     FmgrInfo   *inner_hashfunctions;    /* lookup data for hash functions */
     bool       *hashStrict;     /* is each hash join operator strict? */
+   Oid        *collations;
  
     Size        spaceUsed;      /* memory space currently used by tuples */
     Size        spaceAllowed;   /* upper limit for space used */
diff --git a/src/include/executor/nodeHash.h b/src/include/executor/nodeHash.h
index 1309b32b9035b2b8001babad9d600c4f988c0a2c..12337660235625cb07e9a5a1eabcc2e01624ed48 100644 (file)
--- a/src/include/executor/nodeHash.h
+++ b/src/include/executor/nodeHash.h
@@ -24,7 +24,7 @@ extern Node *MultiExecHash(HashState *node);
  extern void ExecEndHash(HashState *node);
  extern void ExecReScanHash(HashState *node);
  
-extern HashJoinTable ExecHashTableCreate(HashState *state, List *hashOperators,
+extern HashJoinTable ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations,
                     bool keepNulls);
  extern void ExecParallelHashTableAlloc(HashJoinTable hashtable,
                            int batchno);
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 62eb1a06eefda4318582f0fa85fff559486b08f3..869c303e157b94212848b9c80e49ab84cac6c2c3 100644 (file)
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -693,6 +693,7 @@ typedef struct TupleHashTableData
     AttrNumber *keyColIdx;      /* attr numbers of key columns */
     FmgrInfo   *tab_hash_funcs; /* hash functions for table datatype(s) */
     ExprState  *tab_eq_func;    /* comparator for table datatype(s) */
+   Oid        *tab_collations; /* collations for hash and comparison */
     MemoryContext tablecxt;     /* memory context containing table */
     MemoryContext tempcxt;      /* context for function evaluations */
     Size        entrysize;      /* actual size to make each hash entry */
@@ -862,6 +863,7 @@ typedef struct SubPlanState
     AttrNumber *keyColIdx;      /* control data for hash tables */
     Oid        *tab_eq_funcoids;    /* equality func oids for table
                                      * datatype(s) */
+   Oid        *tab_collations; /* collations for hash and comparison */
     FmgrInfo   *tab_hash_funcs; /* hash functions for table datatype(s) */
     FmgrInfo   *tab_eq_funcs;   /* equality functions for table datatype(s) */
     FmgrInfo   *lhs_hash_funcs; /* hash functions for lefthand datatype(s) */
@@ -1872,6 +1874,7 @@ typedef struct HashJoinState
     List       *hj_OuterHashKeys;   /* list of ExprState nodes */
     List       *hj_InnerHashKeys;   /* list of ExprState nodes */
     List       *hj_HashOperators;   /* list of operator OIDs */
+   List       *hj_Collations;
     HashJoinTable hj_HashTable;
     uint32      hj_CurHashValue;
     int         hj_CurBucketNo;
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index d66a187a530933ec83d1873756b925acd4d7ac75..24740c31e3d35099cfbc4c84dde3745ae0a4c23d 100644 (file)
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -297,6 +297,7 @@ typedef struct RecursiveUnion
                                  * duplicate-ness */
     AttrNumber *dupColIdx;      /* their indexes in the target list */
     Oid        *dupOperators;   /* equality operators to compare with */
+   Oid        *dupCollations;
     long        numGroups;      /* estimated number of groups in input */
  } RecursiveUnion;
  
@@ -773,6 +774,7 @@ typedef struct Group
     int         numCols;        /* number of grouping columns */
     AttrNumber *grpColIdx;      /* their indexes in the target list */
     Oid        *grpOperators;   /* equality operators to compare with */
+   Oid        *grpCollations;
  } Group;
  
  /* ---------------
@@ -797,6 +799,7 @@ typedef struct Agg
     int         numCols;        /* number of grouping columns */
     AttrNumber *grpColIdx;      /* their indexes in the target list */
     Oid        *grpOperators;   /* equality operators to compare with */
+   Oid        *grpCollations;
     long        numGroups;      /* estimated number of groups in input */
     Bitmapset  *aggParams;      /* IDs of Params used in Aggref inputs */
     /* Note: planner provides numGroups & aggParams only in HASHED/MIXED case */
@@ -815,9 +818,11 @@ typedef struct WindowAgg
     int         partNumCols;    /* number of columns in partition clause */
     AttrNumber *partColIdx;     /* their indexes in the target list */
     Oid        *partOperators;  /* equality operators for partition columns */
+   Oid        *partCollations; /* collations for partition columns */
     int         ordNumCols;     /* number of columns in ordering clause */
     AttrNumber *ordColIdx;      /* their indexes in the target list */
     Oid        *ordOperators;   /* equality operators for ordering columns */
+   Oid        *ordCollations;  /* collations for ordering columns */
     int         frameOptions;   /* frame_clause options, see WindowDef */
     Node       *startOffset;    /* expression for starting bound, if any */
     Node       *endOffset;      /* expression for ending bound, if any */
@@ -839,6 +844,7 @@ typedef struct Unique
     int         numCols;        /* number of columns to check for uniqueness */
     AttrNumber *uniqColIdx;     /* their indexes in the target list */
     Oid        *uniqOperators;  /* equality operators to compare with */
+   Oid        *uniqCollations; /* collations for equality comparisons */
  } Unique;
  
  /* ------------
@@ -913,6 +919,7 @@ typedef struct SetOp
                                  * duplicate-ness */
     AttrNumber *dupColIdx;      /* their indexes in the target list */
     Oid        *dupOperators;   /* equality operators to compare with */
+   Oid        *dupCollations;
     AttrNumber  flagColIdx;     /* where is the flag column, if any */
     int         firstFlag;      /* flag value for first input relation */
     long        numGroups;      /* estimated number of groups in input */
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index 3bbdb5e2f7456c5a03ca7b20e9dcb56cc2e978c4..b093a3c8ac2b2cbc1f3573e04548247e86f85b08 100644 (file)
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -53,7 +53,7 @@ extern bool is_projection_capable_plan(Plan *plan);
  extern Sort *make_sort_from_sortclauses(List *sortcls, Plan *lefttree);
  extern Agg *make_agg(List *tlist, List *qual,
          AggStrategy aggstrategy, AggSplit aggsplit,
-        int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators,
+        int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations,
          List *groupingSets, List *chain,
          double dNumGroups, Plan *lefttree);
  extern Limit *make_limit(Plan *lefttree, Node *limitOffset, Node *limitCount);
diff --git a/src/include/optimizer/tlist.h b/src/include/optimizer/tlist.h
index 58db79203bc6e41c5499c10814f6fd9d37435871..46d614f4fb610560bd95df13c2d6f75a4d4cf762 100644 (file)
--- a/src/include/optimizer/tlist.h
+++ b/src/include/optimizer/tlist.h
@@ -32,6 +32,7 @@ extern bool tlist_same_collations(List *tlist, List *colCollations, bool junkOK)
  extern void apply_tlist_labeling(List *dest_tlist, List *src_tlist);
  
  extern Oid *extract_grouping_ops(List *groupClause);
+extern Oid *extract_grouping_collations(List *groupClause, List *tlist);
  extern AttrNumber *extract_grouping_cols(List *groupClause, List *tlist);
  extern bool grouping_is_sortable(List *groupClause);
  extern bool grouping_is_hashable(List *groupClause);
diff --git a/src/include/partitioning/partbounds.h b/src/include/partitioning/partbounds.h
index b1ae39ad6355585c7bb007bd72c37d49b321659a..683e1574eae18355f923d3b1d7a5544e6eaba5ba 100644 (file)
--- a/src/include/partitioning/partbounds.h
+++ b/src/include/partitioning/partbounds.h
@@ -77,6 +77,7 @@ typedef struct PartitionBoundInfoData
  
  extern int get_hash_partition_greatest_modulus(PartitionBoundInfo b);
  extern uint64 compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc,
+                            Oid *partcollation,
                              Datum *values, bool *isnull);
  extern List *get_qual_from_partbound(Relation rel, Relation parent,
                         PartitionBoundSpec *spec);
diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h
index 16b0b1d2dccaee89002974f42744514c9d3d5f31..b9a9ecb7cc3a6c6e69d0b57767d8dc78fc9ab46a 100644 (file)
--- a/src/include/utils/lsyscache.h
+++ b/src/include/utils/lsyscache.h
@@ -90,6 +90,7 @@ extern Oid    get_atttype(Oid relid, AttrNumber attnum);
  extern void get_atttypetypmodcoll(Oid relid, AttrNumber attnum,
                       Oid *typid, int32 *typmod, Oid *collid);
  extern char *get_collation_name(Oid colloid);
+extern bool get_collation_isdeterministic(Oid colloid);
  extern char *get_constraint_name(Oid conoid);
  extern char *get_language_name(Oid langoid, bool missing_ok);
  extern Oid get_opclass_family(Oid opclass);
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index 606952afd7d8ce97c326d07a5c8b6dc7be6ffd9a..a342a6254905e7d5941324a9c1d432122e5c2a17 100644 (file)
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -82,6 +82,7 @@ extern void cache_locale_time(void);
  struct pg_locale_struct
  {
     char        provider;
+   bool        deterministic;
     union
     {
  #ifdef HAVE_LOCALE_T
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out
index b66193d1be23e3fee21cb9a0eb8166f3992acfff..23d48f4ea35c9a373d3c8d3a0257420a898e6d15 100644 (file)
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@@ -1149,6 +1149,716 @@ SELECT 'Goldmann' < 'Götz' COLLATE "de-x-icu", 'Goldmann' > 'Götz' COLLATE tes
   t        | t
  (1 row)
  
+-- nondeterministic collations
+CREATE COLLATION ctest_det (provider = icu, locale = 'und', deterministic = true);
+CREATE COLLATION ctest_nondet (provider = icu, locale = 'und', deterministic = false);
+CREATE TABLE test6 (a int, b text);
+-- same string in different normal forms
+INSERT INTO test6 VALUES (1, U&'\00E4bc');
+INSERT INTO test6 VALUES (2, U&'\0061\0308bc');
+SELECT * FROM test6;
+ a |  b  
+---+-----
+ 1 | äbc
+ 2 | äbc
+(2 rows)
+
+SELECT * FROM test6 WHERE b = 'äbc' COLLATE ctest_det;
+ a |  b  
+---+-----
+ 1 | äbc
+(1 row)
+
+SELECT * FROM test6 WHERE b = 'äbc' COLLATE ctest_nondet;
+ a |  b  
+---+-----
+ 1 | äbc
+ 2 | äbc
+(2 rows)
+
+CREATE COLLATION case_sensitive (provider = icu, locale = 'und');
+CREATE COLLATION case_insensitive (provider = icu, locale = 'und-u-ks-level2', deterministic = false);
+SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive;
+ ?column? | ?column? 
+----------+----------
+ t        | f
+(1 row)
+
+SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive;
+ ?column? | ?column? 
+----------+----------
+ t        | t
+(1 row)
+
+CREATE TABLE test1cs (x text COLLATE case_sensitive);
+CREATE TABLE test2cs (x text COLLATE case_sensitive);
+CREATE TABLE test3cs (x text COLLATE case_sensitive);
+INSERT INTO test1cs VALUES ('abc'), ('def'), ('ghi');
+INSERT INTO test2cs VALUES ('ABC'), ('ghi');
+INSERT INTO test3cs VALUES ('abc'), ('ABC'), ('def'), ('ghi');
+SELECT x FROM test3cs WHERE x = 'abc';
+  x  
+-----
+ abc
+(1 row)
+
+SELECT x FROM test3cs WHERE x <> 'abc';
+  x  
+-----
+ ABC
+ def
+ ghi
+(3 rows)
+
+SELECT x FROM test3cs WHERE x LIKE 'a%';
+  x  
+-----
+ abc
+(1 row)
+
+SELECT x FROM test3cs WHERE x ILIKE 'a%';
+  x  
+-----
+ abc
+ ABC
+(2 rows)
+
+SELECT x FROM test3cs WHERE x SIMILAR TO 'a%';
+  x  
+-----
+ abc
+(1 row)
+
+SELECT x FROM test3cs WHERE x ~ 'a';
+  x  
+-----
+ abc
+(1 row)
+
+SELECT x FROM test1cs UNION SELECT x FROM test2cs ORDER BY x;
+  x  
+-----
+ abc
+ ABC
+ def
+ ghi
+(4 rows)
+
+SELECT x FROM test2cs UNION SELECT x FROM test1cs ORDER BY x;
+  x  
+-----
+ abc
+ ABC
+ def
+ ghi
+(4 rows)
+
+SELECT x FROM test1cs INTERSECT SELECT x FROM test2cs;
+  x  
+-----
+ ghi
+(1 row)
+
+SELECT x FROM test2cs INTERSECT SELECT x FROM test1cs;
+  x  
+-----
+ ghi
+(1 row)
+
+SELECT x FROM test1cs EXCEPT SELECT x FROM test2cs;
+  x  
+-----
+ abc
+ def
+(2 rows)
+
+SELECT x FROM test2cs EXCEPT SELECT x FROM test1cs;
+  x  
+-----
+ ABC
+(1 row)
+
+SELECT DISTINCT x FROM test3cs ORDER BY x;
+  x  
+-----
+ abc
+ ABC
+ def
+ ghi
+(4 rows)
+
+SELECT count(DISTINCT x) FROM test3cs;
+ count 
+-------
+     4
+(1 row)
+
+SELECT x, count(*) FROM test3cs GROUP BY x ORDER BY x;
+  x  | count 
+-----+-------
+ abc |     1
+ ABC |     1
+ def |     1
+ ghi |     1
+(4 rows)
+
+SELECT x, row_number() OVER (ORDER BY x), rank() OVER (ORDER BY x) FROM test3cs ORDER BY x;
+  x  | row_number | rank 
+-----+------------+------
+ abc |          1 |    1
+ ABC |          2 |    2
+ def |          3 |    3
+ ghi |          4 |    4
+(4 rows)
+
+CREATE UNIQUE INDEX ON test1cs (x);  -- ok
+INSERT INTO test1cs VALUES ('ABC');  -- ok
+CREATE UNIQUE INDEX ON test3cs (x);  -- ok
+SELECT string_to_array('ABC,DEF,GHI' COLLATE case_sensitive, ',', 'abc');
+ string_to_array 
+-----------------
+ {ABC,DEF,GHI}
+(1 row)
+
+SELECT string_to_array('ABCDEFGHI' COLLATE case_sensitive, NULL, 'b');
+   string_to_array   
+---------------------
+ {A,B,C,D,E,F,G,H,I}
+(1 row)
+
+CREATE TABLE test1ci (x text COLLATE case_insensitive);
+CREATE TABLE test2ci (x text COLLATE case_insensitive);
+CREATE TABLE test3ci (x text COLLATE case_insensitive);
+CREATE INDEX ON test3ci (x text_pattern_ops);  -- error
+ERROR:  nondeterministic collations are not supported for operator class "text_pattern_ops"
+INSERT INTO test1ci VALUES ('abc'), ('def'), ('ghi');
+INSERT INTO test2ci VALUES ('ABC'), ('ghi');
+INSERT INTO test3ci VALUES ('abc'), ('ABC'), ('def'), ('ghi');
+SELECT x FROM test3ci WHERE x = 'abc';
+  x  
+-----
+ abc
+ ABC
+(2 rows)
+
+SELECT x FROM test3ci WHERE x <> 'abc';
+  x  
+-----
+ def
+ ghi
+(2 rows)
+
+SELECT x FROM test3ci WHERE x LIKE 'a%';
+ERROR:  nondeterministic collations are not supported for LIKE
+SELECT x FROM test3ci WHERE x ILIKE 'a%';
+ERROR:  nondeterministic collations are not supported for ILIKE
+SELECT x FROM test3ci WHERE x SIMILAR TO 'a%';
+ERROR:  nondeterministic collations are not supported for regular expressions
+SELECT x FROM test3ci WHERE x ~ 'a';
+ERROR:  nondeterministic collations are not supported for regular expressions
+SELECT x FROM test1ci UNION SELECT x FROM test2ci ORDER BY x;
+  x  
+-----
+ abc
+ def
+ ghi
+(3 rows)
+
+SELECT x FROM test2ci UNION SELECT x FROM test1ci ORDER BY x;
+  x  
+-----
+ ABC
+ def
+ ghi
+(3 rows)
+
+SELECT x FROM test1ci INTERSECT SELECT x FROM test2ci;
+  x  
+-----
+ ghi
+ abc
+(2 rows)
+
+SELECT x FROM test2ci INTERSECT SELECT x FROM test1ci;
+  x  
+-----
+ ghi
+ ABC
+(2 rows)
+
+SELECT x FROM test1ci EXCEPT SELECT x FROM test2ci;
+  x  
+-----
+ def
+(1 row)
+
+SELECT x FROM test2ci EXCEPT SELECT x FROM test1ci;
+ x 
+---
+(0 rows)
+
+SELECT DISTINCT x FROM test3ci ORDER BY x;
+  x  
+-----
+ abc
+ def
+ ghi
+(3 rows)
+
+SELECT count(DISTINCT x) FROM test3ci;
+ count 
+-------
+     3
+(1 row)
+
+SELECT x, count(*) FROM test3ci GROUP BY x ORDER BY x;
+  x  | count 
+-----+-------
+ abc |     2
+ def |     1
+ ghi |     1
+(3 rows)
+
+SELECT x, row_number() OVER (ORDER BY x), rank() OVER (ORDER BY x) FROM test3ci ORDER BY x;
+  x  | row_number | rank 
+-----+------------+------
+ abc |          1 |    1
+ ABC |          2 |    1
+ def |          3 |    3
+ ghi |          4 |    4
+(4 rows)
+
+CREATE UNIQUE INDEX ON test1ci (x);  -- ok
+INSERT INTO test1ci VALUES ('ABC');  -- error
+ERROR:  duplicate key value violates unique constraint "test1ci_x_idx"
+DETAIL:  Key (x)=(ABC) already exists.
+CREATE UNIQUE INDEX ON test3ci (x);  -- error
+ERROR:  could not create unique index "test3ci_x_idx"
+DETAIL:  Key (x)=(abc) is duplicated.
+SELECT string_to_array('ABC,DEF,GHI' COLLATE case_insensitive, ',', 'abc');
+ERROR:  nondeterministic collations are not supported for substring searches
+SELECT string_to_array('ABCDEFGHI' COLLATE case_insensitive, NULL, 'b');
+    string_to_array     
+------------------------
+ {A,NULL,C,D,E,F,G,H,I}
+(1 row)
+
+-- bpchar
+CREATE TABLE test1bpci (x char(3) COLLATE case_insensitive);
+CREATE TABLE test2bpci (x char(3) COLLATE case_insensitive);
+CREATE TABLE test3bpci (x char(3) COLLATE case_insensitive);
+CREATE INDEX ON test3bpci (x bpchar_pattern_ops);  -- error
+ERROR:  nondeterministic collations are not supported for operator class "bpchar_pattern_ops"
+INSERT INTO test1bpci VALUES ('abc'), ('def'), ('ghi');
+INSERT INTO test2bpci VALUES ('ABC'), ('ghi');
+INSERT INTO test3bpci VALUES ('abc'), ('ABC'), ('def'), ('ghi');
+SELECT x FROM test3bpci WHERE x = 'abc';
+  x  
+-----
+ abc
+ ABC
+(2 rows)
+
+SELECT x FROM test3bpci WHERE x <> 'abc';
+  x  
+-----
+ def
+ ghi
+(2 rows)
+
+SELECT x FROM test3bpci WHERE x LIKE 'a%';
+ERROR:  nondeterministic collations are not supported for LIKE
+SELECT x FROM test3bpci WHERE x ILIKE 'a%';
+ERROR:  nondeterministic collations are not supported for ILIKE
+SELECT x FROM test3bpci WHERE x SIMILAR TO 'a%';
+ERROR:  nondeterministic collations are not supported for regular expressions
+SELECT x FROM test3bpci WHERE x ~ 'a';
+ERROR:  nondeterministic collations are not supported for regular expressions
+SELECT x FROM test1bpci UNION SELECT x FROM test2bpci ORDER BY x;
+  x  
+-----
+ abc
+ def
+ ghi
+(3 rows)
+
+SELECT x FROM test2bpci UNION SELECT x FROM test1bpci ORDER BY x;
+  x  
+-----
+ ABC
+ def
+ ghi
+(3 rows)
+
+SELECT x FROM test1bpci INTERSECT SELECT x FROM test2bpci;
+  x  
+-----
+ ghi
+ abc
+(2 rows)
+
+SELECT x FROM test2bpci INTERSECT SELECT x FROM test1bpci;
+  x  
+-----
+ ghi
+ ABC
+(2 rows)
+
+SELECT x FROM test1bpci EXCEPT SELECT x FROM test2bpci;
+  x  
+-----
+ def
+(1 row)
+
+SELECT x FROM test2bpci EXCEPT SELECT x FROM test1bpci;
+ x 
+---
+(0 rows)
+
+SELECT DISTINCT x FROM test3bpci ORDER BY x;
+  x  
+-----
+ abc
+ def
+ ghi
+(3 rows)
+
+SELECT count(DISTINCT x) FROM test3bpci;
+ count 
+-------
+     3
+(1 row)
+
+SELECT x, count(*) FROM test3bpci GROUP BY x ORDER BY x;
+  x  | count 
+-----+-------
+ abc |     2
+ def |     1
+ ghi |     1
+(3 rows)
+
+SELECT x, row_number() OVER (ORDER BY x), rank() OVER (ORDER BY x) FROM test3bpci ORDER BY x;
+  x  | row_number | rank 
+-----+------------+------
+ abc |          1 |    1
+ ABC |          2 |    1
+ def |          3 |    3
+ ghi |          4 |    4
+(4 rows)
+
+CREATE UNIQUE INDEX ON test1bpci (x);  -- ok
+INSERT INTO test1bpci VALUES ('ABC');  -- error
+ERROR:  duplicate key value violates unique constraint "test1bpci_x_idx"
+DETAIL:  Key (x)=(ABC) already exists.
+CREATE UNIQUE INDEX ON test3bpci (x);  -- error
+ERROR:  could not create unique index "test3bpci_x_idx"
+DETAIL:  Key (x)=(abc) is duplicated.
+SELECT string_to_array('ABC,DEF,GHI'::char(11) COLLATE case_insensitive, ',', 'abc');
+ERROR:  nondeterministic collations are not supported for substring searches
+SELECT string_to_array('ABCDEFGHI'::char(9) COLLATE case_insensitive, NULL, 'b');
+    string_to_array     
+------------------------
+ {A,NULL,C,D,E,F,G,H,I}
+(1 row)
+
+-- This tests the issue described in match_pattern_prefix().  In the
+-- absence of that check, the case_insensitive tests below would
+-- return no rows where they should logically return one.
+CREATE TABLE test4c (x text COLLATE "C");
+INSERT INTO test4c VALUES ('abc');
+CREATE INDEX ON test4c (x);
+SET enable_seqscan = off;
+SELECT x FROM test4c WHERE x LIKE 'ABC' COLLATE case_sensitive;  -- ok, no rows
+ x 
+---
+(0 rows)
+
+SELECT x FROM test4c WHERE x LIKE 'ABC%' COLLATE case_sensitive;  -- ok, no rows
+ x 
+---
+(0 rows)
+
+SELECT x FROM test4c WHERE x LIKE 'ABC' COLLATE case_insensitive;  -- error
+ERROR:  nondeterministic collations are not supported for LIKE
+SELECT x FROM test4c WHERE x LIKE 'ABC%' COLLATE case_insensitive;  -- error
+ERROR:  nondeterministic collations are not supported for LIKE
+RESET enable_seqscan;
+-- Unicode special case: different variants of Greek lower case sigma.
+-- A naive implementation like citext that just does lower(x) =
+-- lower(y) will do the wrong thing here, because lower('Σ') is 'σ'
+-- but upper('ς') is 'Σ'.
+SELECT 'ὀδυσσεύς' = 'ὈΔΥΣΣΕΎΣ' COLLATE case_sensitive;
+ ?column? 
+----------
+ f
+(1 row)
+
+SELECT 'ὀδυσσεύς' = 'ὈΔΥΣΣΕΎΣ' COLLATE case_insensitive;
+ ?column? 
+----------
+ t
+(1 row)
+
+-- name vs. text comparison operators
+SELECT relname FROM pg_class WHERE relname = 'PG_CLASS'::text COLLATE case_insensitive;
+ relname  
+----------
+ pg_class
+(1 row)
+
+SELECT relname FROM pg_class WHERE 'PG_CLASS'::text = relname COLLATE case_insensitive;
+ relname  
+----------
+ pg_class
+(1 row)
+
+SELECT typname FROM pg_type WHERE typname LIKE 'int_' AND typname <> 'INT2'::text COLLATE case_insensitive;
+ typname 
+---------
+ int4
+ int8
+(2 rows)
+
+SELECT typname FROM pg_type WHERE typname LIKE 'int_' AND 'INT2'::text <> typname COLLATE case_insensitive;;
+ typname 
+---------
+ int4
+ int8
+(2 rows)
+
+-- test case adapted from subselect.sql
+CREATE TEMP TABLE outer_text (f1 text COLLATE case_insensitive, f2 text);
+INSERT INTO outer_text VALUES ('a', 'a');
+INSERT INTO outer_text VALUES ('b', 'a');
+INSERT INTO outer_text VALUES ('A', NULL);
+INSERT INTO outer_text VALUES ('B', NULL);
+CREATE TEMP TABLE inner_text (c1 text COLLATE case_insensitive, c2 text);
+INSERT INTO inner_text VALUES ('a', NULL);
+SELECT * FROM outer_text WHERE (f1, f2) NOT IN (SELECT * FROM inner_text);
+ f1 | f2 
+----+----
+ b  | a
+ B  | 
+(2 rows)
+
+-- accents
+CREATE COLLATION ignore_accents (provider = icu, locale = 'und-u-ks-level1-kc-true', deterministic = false);
+CREATE TABLE test4 (a int, b text);
+INSERT INTO test4 VALUES (1, 'cote'), (2, 'côte'), (3, 'coté'), (4, 'côté');
+SELECT * FROM test4 WHERE b = 'cote';
+ a |  b   
+---+------
+ 1 | cote
+(1 row)
+
+SELECT * FROM test4 WHERE b = 'cote' COLLATE ignore_accents;
+ a |  b   
+---+------
+ 1 | cote
+ 2 | côte
+ 3 | coté
+ 4 | côté
+(4 rows)
+
+SELECT * FROM test4 WHERE b = 'Cote' COLLATE ignore_accents;  -- still case-sensitive
+ a | b 
+---+---
+(0 rows)
+
+SELECT * FROM test4 WHERE b = 'Cote' COLLATE case_insensitive;
+ a |  b   
+---+------
+ 1 | cote
+(1 row)
+
+-- foreign keys (should use collation of primary key)
+-- PK is case-sensitive, FK is case-insensitive
+CREATE TABLE test10pk (x text COLLATE case_sensitive PRIMARY KEY);
+INSERT INTO test10pk VALUES ('abc'), ('def'), ('ghi');
+CREATE TABLE test10fk (x text COLLATE case_insensitive REFERENCES test10pk (x) ON UPDATE CASCADE ON DELETE CASCADE);
+INSERT INTO test10fk VALUES ('abc');  -- ok
+INSERT INTO test10fk VALUES ('ABC');  -- error
+ERROR:  insert or update on table "test10fk" violates foreign key constraint "test10fk_x_fkey"
+DETAIL:  Key (x)=(ABC) is not present in table "test10pk".
+INSERT INTO test10fk VALUES ('xyz');  -- error
+ERROR:  insert or update on table "test10fk" violates foreign key constraint "test10fk_x_fkey"
+DETAIL:  Key (x)=(xyz) is not present in table "test10pk".
+SELECT * FROM test10pk;
+  x  
+-----
+ abc
+ def
+ ghi
+(3 rows)
+
+SELECT * FROM test10fk;
+  x  
+-----
+ abc
+(1 row)
+
+-- restrict update even though the values are "equal" in the FK table
+UPDATE test10fk SET x = 'ABC' WHERE x = 'abc';  -- error
+ERROR:  insert or update on table "test10fk" violates foreign key constraint "test10fk_x_fkey"
+DETAIL:  Key (x)=(ABC) is not present in table "test10pk".
+SELECT * FROM test10fk;
+  x  
+-----
+ abc
+(1 row)
+
+DELETE FROM test10pk WHERE x = 'abc';
+SELECT * FROM test10pk;
+  x  
+-----
+ def
+ ghi
+(2 rows)
+
+SELECT * FROM test10fk;
+ x 
+---
+(0 rows)
+
+-- PK is case-insensitive, FK is case-sensitive
+CREATE TABLE test11pk (x text COLLATE case_insensitive PRIMARY KEY);
+INSERT INTO test11pk VALUES ('abc'), ('def'), ('ghi');
+CREATE TABLE test11fk (x text COLLATE case_sensitive REFERENCES test11pk (x) ON UPDATE CASCADE ON DELETE CASCADE);
+INSERT INTO test11fk VALUES ('abc');  -- ok
+INSERT INTO test11fk VALUES ('ABC');  -- ok
+INSERT INTO test11fk VALUES ('xyz');  -- error
+ERROR:  insert or update on table "test11fk" violates foreign key constraint "test11fk_x_fkey"
+DETAIL:  Key (x)=(xyz) is not present in table "test11pk".
+SELECT * FROM test11pk;
+  x  
+-----
+ abc
+ def
+ ghi
+(3 rows)
+
+SELECT * FROM test11fk;
+  x  
+-----
+ abc
+ ABC
+(2 rows)
+
+-- cascade update even though the values are "equal" in the PK table
+UPDATE test11pk SET x = 'ABC' WHERE x = 'abc';
+SELECT * FROM test11fk;
+  x  
+-----
+ ABC
+ ABC
+(2 rows)
+
+DELETE FROM test11pk WHERE x = 'abc';
+SELECT * FROM test11pk;
+  x  
+-----
+ def
+ ghi
+(2 rows)
+
+SELECT * FROM test11fk;
+ x 
+---
+(0 rows)
+
+-- partitioning
+CREATE TABLE test20 (a int, b text COLLATE case_insensitive) PARTITION BY LIST (b);
+CREATE TABLE test20_1 PARTITION OF test20 FOR VALUES IN ('abc');
+INSERT INTO test20 VALUES (1, 'abc');
+INSERT INTO test20 VALUES (2, 'ABC');
+SELECT * FROM test20_1;
+ a |  b  
+---+-----
+ 1 | abc
+ 2 | ABC
+(2 rows)
+
+CREATE TABLE test21 (a int, b text COLLATE case_insensitive) PARTITION BY RANGE (b);
+CREATE TABLE test21_1 PARTITION OF test21 FOR VALUES FROM ('ABC') TO ('DEF');
+INSERT INTO test21 VALUES (1, 'abc');
+INSERT INTO test21 VALUES (2, 'ABC');
+SELECT * FROM test21_1;
+ a |  b  
+---+-----
+ 1 | abc
+ 2 | ABC
+(2 rows)
+
+CREATE TABLE test22 (a int, b text COLLATE case_sensitive) PARTITION BY HASH (b);
+CREATE TABLE test22_0 PARTITION OF test22 FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE TABLE test22_1 PARTITION OF test22 FOR VALUES WITH (MODULUS 2, REMAINDER 1);
+INSERT INTO test22 VALUES (1, 'def');
+INSERT INTO test22 VALUES (2, 'DEF');
+-- they end up in different partitions
+SELECT (SELECT count(*) FROM test22_0) = (SELECT count(*) FROM test22_1);
+ ?column? 
+----------
+ t
+(1 row)
+
+CREATE TABLE test23 (a int, b text COLLATE case_insensitive) PARTITION BY HASH (b);
+CREATE TABLE test23_0 PARTITION OF test23 FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE TABLE test23_1 PARTITION OF test23 FOR VALUES WITH (MODULUS 2, REMAINDER 1);
+INSERT INTO test23 VALUES (1, 'def');
+INSERT INTO test23 VALUES (2, 'DEF');
+-- they end up in the same partition (but it's platform-dependent which one)
+SELECT (SELECT count(*) FROM test23_0) <> (SELECT count(*) FROM test23_1);
+ ?column? 
+----------
+ t
+(1 row)
+
+CREATE TABLE test30 (a int, b char(3) COLLATE case_insensitive) PARTITION BY LIST (b);
+CREATE TABLE test30_1 PARTITION OF test30 FOR VALUES IN ('abc');
+INSERT INTO test30 VALUES (1, 'abc');
+INSERT INTO test30 VALUES (2, 'ABC');
+SELECT * FROM test30_1;
+ a |  b  
+---+-----
+ 1 | abc
+ 2 | ABC
+(2 rows)
+
+CREATE TABLE test31 (a int, b char(3) COLLATE case_insensitive) PARTITION BY RANGE (b);
+CREATE TABLE test31_1 PARTITION OF test31 FOR VALUES FROM ('ABC') TO ('DEF');
+INSERT INTO test31 VALUES (1, 'abc');
+INSERT INTO test31 VALUES (2, 'ABC');
+SELECT * FROM test31_1;
+ a |  b  
+---+-----
+ 1 | abc
+ 2 | ABC
+(2 rows)
+
+CREATE TABLE test32 (a int, b char(3) COLLATE case_sensitive) PARTITION BY HASH (b);
+CREATE TABLE test32_0 PARTITION OF test32 FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE TABLE test32_1 PARTITION OF test32 FOR VALUES WITH (MODULUS 2, REMAINDER 1);
+INSERT INTO test32 VALUES (1, 'def');
+INSERT INTO test32 VALUES (2, 'DEF');
+-- they end up in different partitions
+SELECT (SELECT count(*) FROM test32_0) = (SELECT count(*) FROM test32_1);
+ ?column? 
+----------
+ t
+(1 row)
+
+CREATE TABLE test33 (a int, b char(3) COLLATE case_insensitive) PARTITION BY HASH (b);
+CREATE TABLE test33_0 PARTITION OF test33 FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE TABLE test33_1 PARTITION OF test33 FOR VALUES WITH (MODULUS 2, REMAINDER 1);
+INSERT INTO test33 VALUES (1, 'def');
+INSERT INTO test33 VALUES (2, 'DEF');
+-- they end up in the same partition (but it's platform-dependent which one)
+SELECT (SELECT count(*) FROM test33_0) <> (SELECT count(*) FROM test33_1);
+ ?column? 
+----------
+ t
+(1 row)
+
  -- cleanup
  SET client_min_messages TO warning;
  DROP SCHEMA collate_tests CASCADE;
diff --git a/src/test/regress/expected/collate.linux.utf8.out b/src/test/regress/expected/collate.linux.utf8.out
index d33f04a3b5a191e5187bd0cde5f6cef4d3524172..15b322223987d41c7fa46c9939f708a5a5a12abc 100644 (file)
--- a/src/test/regress/expected/collate.linux.utf8.out
+++ b/src/test/regress/expected/collate.linux.utf8.out
@@ -1117,6 +1117,11 @@ select textrange_en_us('A','Z') @> 'b'::text;
  
  drop type textrange_c;
  drop type textrange_en_us;
+-- nondeterministic collations
+-- (not supported with libc provider)
+CREATE COLLATION ctest_det (locale = 'en_US.utf8', deterministic = true);
+CREATE COLLATION ctest_nondet (locale = 'en_US.utf8', deterministic = false);
+ERROR:  nondeterministic collations not supported with this provider
  -- cleanup
  SET client_min_messages TO warning;
  DROP SCHEMA collate_tests CASCADE;
diff --git a/src/test/regress/expected/collate.out b/src/test/regress/expected/collate.out
index fcbe3a5cc8234558bf60d28866fa310e62112d15..dbfa5c93489754299807c505a3cdf9629833047d 100644 (file)
--- a/src/test/regress/expected/collate.out
+++ b/src/test/regress/expected/collate.out
@@ -498,6 +498,21 @@ SELECT a, b, a < b as lt FROM
   A | b | t
  (2 rows)
  
+-- collation mismatch in subselects
+SELECT * FROM collate_test10 WHERE (x, y) NOT IN (SELECT y, x FROM collate_test10);
+ERROR:  could not determine which collation to use for string hashing
+HINT:  Use the COLLATE clause to set the collation explicitly.
+-- now it works with overrides
+SELECT * FROM collate_test10 WHERE (x COLLATE "POSIX", y COLLATE "C") NOT IN (SELECT y, x FROM collate_test10);
+ a | x | y 
+---+---+---
+(0 rows)
+
+SELECT * FROM collate_test10 WHERE (x, y) NOT IN (SELECT y COLLATE "C", x COLLATE "POSIX" FROM collate_test10);
+ a | x | y 
+---+---+---
+(0 rows)
+
  -- casting
  SELECT CAST('42' AS text COLLATE "C");
  ERROR:  syntax error at or near "COLLATE"
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index fe5fc6448032e1239f1862d9eabb7d293b1287bb..4a5410418222990d886c1b656760dc2052362ac9 100644 (file)
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -745,6 +745,25 @@ select * from outer_7597 where (f1, f2) not in (select * from inner_7597);
    1 |   
  (2 rows)
  
+--
+-- Similar test case using text that verifies that collation
+-- information is passed through by execTuplesEqual() in nodeSubplan.c
+-- (otherwise it would error in texteq())
+--
+create temp table outer_text (f1 text, f2 text);
+insert into outer_text values ('a', 'a');
+insert into outer_text values ('b', 'a');
+insert into outer_text values ('a', null);
+insert into outer_text values ('b', null);
+create temp table inner_text (c1 text, c2 text);
+insert into inner_text values ('a', null);
+select * from outer_text where (f1, f2) not in (select * from inner_text);
+ f1 | f2 
+----+----
+ b  | a
+ b  | 
+(2 rows)
+
  --
  -- Test case for premature memory release during hashing of subplan output
  --
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql
index 68c2d69659365480d8f49462d556a4f61a0c8b2e..42fb491df70ec60744e1fc86b8ad7f07c22027cb 100644 (file)
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@@ -453,6 +453,256 @@ CREATE COLLATION testcoll_de_phonebook (provider = icu, locale = 'de@collation=p
  SELECT 'Goldmann' < 'Götz' COLLATE "de-x-icu", 'Goldmann' > 'Götz' COLLATE testcoll_de_phonebook;
  
  
+-- nondeterministic collations
+
+CREATE COLLATION ctest_det (provider = icu, locale = 'und', deterministic = true);
+CREATE COLLATION ctest_nondet (provider = icu, locale = 'und', deterministic = false);
+
+CREATE TABLE test6 (a int, b text);
+-- same string in different normal forms
+INSERT INTO test6 VALUES (1, U&'\00E4bc');
+INSERT INTO test6 VALUES (2, U&'\0061\0308bc');
+SELECT * FROM test6;
+SELECT * FROM test6 WHERE b = 'äbc' COLLATE ctest_det;
+SELECT * FROM test6 WHERE b = 'äbc' COLLATE ctest_nondet;
+
+CREATE COLLATION case_sensitive (provider = icu, locale = 'und');
+CREATE COLLATION case_insensitive (provider = icu, locale = 'und-u-ks-level2', deterministic = false);
+
+SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive;
+SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive;
+
+CREATE TABLE test1cs (x text COLLATE case_sensitive);
+CREATE TABLE test2cs (x text COLLATE case_sensitive);
+CREATE TABLE test3cs (x text COLLATE case_sensitive);
+INSERT INTO test1cs VALUES ('abc'), ('def'), ('ghi');
+INSERT INTO test2cs VALUES ('ABC'), ('ghi');
+INSERT INTO test3cs VALUES ('abc'), ('ABC'), ('def'), ('ghi');
+
+SELECT x FROM test3cs WHERE x = 'abc';
+SELECT x FROM test3cs WHERE x <> 'abc';
+SELECT x FROM test3cs WHERE x LIKE 'a%';
+SELECT x FROM test3cs WHERE x ILIKE 'a%';
+SELECT x FROM test3cs WHERE x SIMILAR TO 'a%';
+SELECT x FROM test3cs WHERE x ~ 'a';
+SELECT x FROM test1cs UNION SELECT x FROM test2cs ORDER BY x;
+SELECT x FROM test2cs UNION SELECT x FROM test1cs ORDER BY x;
+SELECT x FROM test1cs INTERSECT SELECT x FROM test2cs;
+SELECT x FROM test2cs INTERSECT SELECT x FROM test1cs;
+SELECT x FROM test1cs EXCEPT SELECT x FROM test2cs;
+SELECT x FROM test2cs EXCEPT SELECT x FROM test1cs;
+SELECT DISTINCT x FROM test3cs ORDER BY x;
+SELECT count(DISTINCT x) FROM test3cs;
+SELECT x, count(*) FROM test3cs GROUP BY x ORDER BY x;
+SELECT x, row_number() OVER (ORDER BY x), rank() OVER (ORDER BY x) FROM test3cs ORDER BY x;
+CREATE UNIQUE INDEX ON test1cs (x);  -- ok
+INSERT INTO test1cs VALUES ('ABC');  -- ok
+CREATE UNIQUE INDEX ON test3cs (x);  -- ok
+SELECT string_to_array('ABC,DEF,GHI' COLLATE case_sensitive, ',', 'abc');
+SELECT string_to_array('ABCDEFGHI' COLLATE case_sensitive, NULL, 'b');
+
+CREATE TABLE test1ci (x text COLLATE case_insensitive);
+CREATE TABLE test2ci (x text COLLATE case_insensitive);
+CREATE TABLE test3ci (x text COLLATE case_insensitive);
+CREATE INDEX ON test3ci (x text_pattern_ops);  -- error
+INSERT INTO test1ci VALUES ('abc'), ('def'), ('ghi');
+INSERT INTO test2ci VALUES ('ABC'), ('ghi');
+INSERT INTO test3ci VALUES ('abc'), ('ABC'), ('def'), ('ghi');
+
+SELECT x FROM test3ci WHERE x = 'abc';
+SELECT x FROM test3ci WHERE x <> 'abc';
+SELECT x FROM test3ci WHERE x LIKE 'a%';
+SELECT x FROM test3ci WHERE x ILIKE 'a%';
+SELECT x FROM test3ci WHERE x SIMILAR TO 'a%';
+SELECT x FROM test3ci WHERE x ~ 'a';
+SELECT x FROM test1ci UNION SELECT x FROM test2ci ORDER BY x;
+SELECT x FROM test2ci UNION SELECT x FROM test1ci ORDER BY x;
+SELECT x FROM test1ci INTERSECT SELECT x FROM test2ci;
+SELECT x FROM test2ci INTERSECT SELECT x FROM test1ci;
+SELECT x FROM test1ci EXCEPT SELECT x FROM test2ci;
+SELECT x FROM test2ci EXCEPT SELECT x FROM test1ci;
+SELECT DISTINCT x FROM test3ci ORDER BY x;
+SELECT count(DISTINCT x) FROM test3ci;
+SELECT x, count(*) FROM test3ci GROUP BY x ORDER BY x;
+SELECT x, row_number() OVER (ORDER BY x), rank() OVER (ORDER BY x) FROM test3ci ORDER BY x;
+CREATE UNIQUE INDEX ON test1ci (x);  -- ok
+INSERT INTO test1ci VALUES ('ABC');  -- error
+CREATE UNIQUE INDEX ON test3ci (x);  -- error
+SELECT string_to_array('ABC,DEF,GHI' COLLATE case_insensitive, ',', 'abc');
+SELECT string_to_array('ABCDEFGHI' COLLATE case_insensitive, NULL, 'b');
+
+-- bpchar
+CREATE TABLE test1bpci (x char(3) COLLATE case_insensitive);
+CREATE TABLE test2bpci (x char(3) COLLATE case_insensitive);
+CREATE TABLE test3bpci (x char(3) COLLATE case_insensitive);
+CREATE INDEX ON test3bpci (x bpchar_pattern_ops);  -- error
+INSERT INTO test1bpci VALUES ('abc'), ('def'), ('ghi');
+INSERT INTO test2bpci VALUES ('ABC'), ('ghi');
+INSERT INTO test3bpci VALUES ('abc'), ('ABC'), ('def'), ('ghi');
+
+SELECT x FROM test3bpci WHERE x = 'abc';
+SELECT x FROM test3bpci WHERE x <> 'abc';
+SELECT x FROM test3bpci WHERE x LIKE 'a%';
+SELECT x FROM test3bpci WHERE x ILIKE 'a%';
+SELECT x FROM test3bpci WHERE x SIMILAR TO 'a%';
+SELECT x FROM test3bpci WHERE x ~ 'a';
+SELECT x FROM test1bpci UNION SELECT x FROM test2bpci ORDER BY x;
+SELECT x FROM test2bpci UNION SELECT x FROM test1bpci ORDER BY x;
+SELECT x FROM test1bpci INTERSECT SELECT x FROM test2bpci;
+SELECT x FROM test2bpci INTERSECT SELECT x FROM test1bpci;
+SELECT x FROM test1bpci EXCEPT SELECT x FROM test2bpci;
+SELECT x FROM test2bpci EXCEPT SELECT x FROM test1bpci;
+SELECT DISTINCT x FROM test3bpci ORDER BY x;
+SELECT count(DISTINCT x) FROM test3bpci;
+SELECT x, count(*) FROM test3bpci GROUP BY x ORDER BY x;
+SELECT x, row_number() OVER (ORDER BY x), rank() OVER (ORDER BY x) FROM test3bpci ORDER BY x;
+CREATE UNIQUE INDEX ON test1bpci (x);  -- ok
+INSERT INTO test1bpci VALUES ('ABC');  -- error
+CREATE UNIQUE INDEX ON test3bpci (x);  -- error
+SELECT string_to_array('ABC,DEF,GHI'::char(11) COLLATE case_insensitive, ',', 'abc');
+SELECT string_to_array('ABCDEFGHI'::char(9) COLLATE case_insensitive, NULL, 'b');
+
+-- This tests the issue described in match_pattern_prefix().  In the
+-- absence of that check, the case_insensitive tests below would
+-- return no rows where they should logically return one.
+CREATE TABLE test4c (x text COLLATE "C");
+INSERT INTO test4c VALUES ('abc');
+CREATE INDEX ON test4c (x);
+SET enable_seqscan = off;
+SELECT x FROM test4c WHERE x LIKE 'ABC' COLLATE case_sensitive;  -- ok, no rows
+SELECT x FROM test4c WHERE x LIKE 'ABC%' COLLATE case_sensitive;  -- ok, no rows
+SELECT x FROM test4c WHERE x LIKE 'ABC' COLLATE case_insensitive;  -- error
+SELECT x FROM test4c WHERE x LIKE 'ABC%' COLLATE case_insensitive;  -- error
+RESET enable_seqscan;
+
+-- Unicode special case: different variants of Greek lower case sigma.
+-- A naive implementation like citext that just does lower(x) =
+-- lower(y) will do the wrong thing here, because lower('Σ') is 'σ'
+-- but upper('ς') is 'Σ'.
+SELECT 'ὀδυσσεύς' = 'ὈΔΥΣΣΕΎΣ' COLLATE case_sensitive;
+SELECT 'ὀδυσσεύς' = 'ὈΔΥΣΣΕΎΣ' COLLATE case_insensitive;
+
+-- name vs. text comparison operators
+SELECT relname FROM pg_class WHERE relname = 'PG_CLASS'::text COLLATE case_insensitive;
+SELECT relname FROM pg_class WHERE 'PG_CLASS'::text = relname COLLATE case_insensitive;
+
+SELECT typname FROM pg_type WHERE typname LIKE 'int_' AND typname <> 'INT2'::text COLLATE case_insensitive;
+SELECT typname FROM pg_type WHERE typname LIKE 'int_' AND 'INT2'::text <> typname COLLATE case_insensitive;;
+
+-- test case adapted from subselect.sql
+CREATE TEMP TABLE outer_text (f1 text COLLATE case_insensitive, f2 text);
+INSERT INTO outer_text VALUES ('a', 'a');
+INSERT INTO outer_text VALUES ('b', 'a');
+INSERT INTO outer_text VALUES ('A', NULL);
+INSERT INTO outer_text VALUES ('B', NULL);
+
+CREATE TEMP TABLE inner_text (c1 text COLLATE case_insensitive, c2 text);
+INSERT INTO inner_text VALUES ('a', NULL);
+
+SELECT * FROM outer_text WHERE (f1, f2) NOT IN (SELECT * FROM inner_text);
+
+-- accents
+CREATE COLLATION ignore_accents (provider = icu, locale = 'und-u-ks-level1-kc-true', deterministic = false);
+
+CREATE TABLE test4 (a int, b text);
+INSERT INTO test4 VALUES (1, 'cote'), (2, 'côte'), (3, 'coté'), (4, 'côté');
+SELECT * FROM test4 WHERE b = 'cote';
+SELECT * FROM test4 WHERE b = 'cote' COLLATE ignore_accents;
+SELECT * FROM test4 WHERE b = 'Cote' COLLATE ignore_accents;  -- still case-sensitive
+SELECT * FROM test4 WHERE b = 'Cote' COLLATE case_insensitive;
+
+-- foreign keys (should use collation of primary key)
+
+-- PK is case-sensitive, FK is case-insensitive
+CREATE TABLE test10pk (x text COLLATE case_sensitive PRIMARY KEY);
+INSERT INTO test10pk VALUES ('abc'), ('def'), ('ghi');
+CREATE TABLE test10fk (x text COLLATE case_insensitive REFERENCES test10pk (x) ON UPDATE CASCADE ON DELETE CASCADE);
+INSERT INTO test10fk VALUES ('abc');  -- ok
+INSERT INTO test10fk VALUES ('ABC');  -- error
+INSERT INTO test10fk VALUES ('xyz');  -- error
+SELECT * FROM test10pk;
+SELECT * FROM test10fk;
+-- restrict update even though the values are "equal" in the FK table
+UPDATE test10fk SET x = 'ABC' WHERE x = 'abc';  -- error
+SELECT * FROM test10fk;
+DELETE FROM test10pk WHERE x = 'abc';
+SELECT * FROM test10pk;
+SELECT * FROM test10fk;
+
+-- PK is case-insensitive, FK is case-sensitive
+CREATE TABLE test11pk (x text COLLATE case_insensitive PRIMARY KEY);
+INSERT INTO test11pk VALUES ('abc'), ('def'), ('ghi');
+CREATE TABLE test11fk (x text COLLATE case_sensitive REFERENCES test11pk (x) ON UPDATE CASCADE ON DELETE CASCADE);
+INSERT INTO test11fk VALUES ('abc');  -- ok
+INSERT INTO test11fk VALUES ('ABC');  -- ok
+INSERT INTO test11fk VALUES ('xyz');  -- error
+SELECT * FROM test11pk;
+SELECT * FROM test11fk;
+-- cascade update even though the values are "equal" in the PK table
+UPDATE test11pk SET x = 'ABC' WHERE x = 'abc';
+SELECT * FROM test11fk;
+DELETE FROM test11pk WHERE x = 'abc';
+SELECT * FROM test11pk;
+SELECT * FROM test11fk;
+
+-- partitioning
+CREATE TABLE test20 (a int, b text COLLATE case_insensitive) PARTITION BY LIST (b);
+CREATE TABLE test20_1 PARTITION OF test20 FOR VALUES IN ('abc');
+INSERT INTO test20 VALUES (1, 'abc');
+INSERT INTO test20 VALUES (2, 'ABC');
+SELECT * FROM test20_1;
+
+CREATE TABLE test21 (a int, b text COLLATE case_insensitive) PARTITION BY RANGE (b);
+CREATE TABLE test21_1 PARTITION OF test21 FOR VALUES FROM ('ABC') TO ('DEF');
+INSERT INTO test21 VALUES (1, 'abc');
+INSERT INTO test21 VALUES (2, 'ABC');
+SELECT * FROM test21_1;
+
+CREATE TABLE test22 (a int, b text COLLATE case_sensitive) PARTITION BY HASH (b);
+CREATE TABLE test22_0 PARTITION OF test22 FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE TABLE test22_1 PARTITION OF test22 FOR VALUES WITH (MODULUS 2, REMAINDER 1);
+INSERT INTO test22 VALUES (1, 'def');
+INSERT INTO test22 VALUES (2, 'DEF');
+-- they end up in different partitions
+SELECT (SELECT count(*) FROM test22_0) = (SELECT count(*) FROM test22_1);
+
+CREATE TABLE test23 (a int, b text COLLATE case_insensitive) PARTITION BY HASH (b);
+CREATE TABLE test23_0 PARTITION OF test23 FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE TABLE test23_1 PARTITION OF test23 FOR VALUES WITH (MODULUS 2, REMAINDER 1);
+INSERT INTO test23 VALUES (1, 'def');
+INSERT INTO test23 VALUES (2, 'DEF');
+-- they end up in the same partition (but it's platform-dependent which one)
+SELECT (SELECT count(*) FROM test23_0) <> (SELECT count(*) FROM test23_1);
+
+CREATE TABLE test30 (a int, b char(3) COLLATE case_insensitive) PARTITION BY LIST (b);
+CREATE TABLE test30_1 PARTITION OF test30 FOR VALUES IN ('abc');
+INSERT INTO test30 VALUES (1, 'abc');
+INSERT INTO test30 VALUES (2, 'ABC');
+SELECT * FROM test30_1;
+
+CREATE TABLE test31 (a int, b char(3) COLLATE case_insensitive) PARTITION BY RANGE (b);
+CREATE TABLE test31_1 PARTITION OF test31 FOR VALUES FROM ('ABC') TO ('DEF');
+INSERT INTO test31 VALUES (1, 'abc');
+INSERT INTO test31 VALUES (2, 'ABC');
+SELECT * FROM test31_1;
+
+CREATE TABLE test32 (a int, b char(3) COLLATE case_sensitive) PARTITION BY HASH (b);
+CREATE TABLE test32_0 PARTITION OF test32 FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE TABLE test32_1 PARTITION OF test32 FOR VALUES WITH (MODULUS 2, REMAINDER 1);
+INSERT INTO test32 VALUES (1, 'def');
+INSERT INTO test32 VALUES (2, 'DEF');
+-- they end up in different partitions
+SELECT (SELECT count(*) FROM test32_0) = (SELECT count(*) FROM test32_1);
+
+CREATE TABLE test33 (a int, b char(3) COLLATE case_insensitive) PARTITION BY HASH (b);
+CREATE TABLE test33_0 PARTITION OF test33 FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE TABLE test33_1 PARTITION OF test33 FOR VALUES WITH (MODULUS 2, REMAINDER 1);
+INSERT INTO test33 VALUES (1, 'def');
+INSERT INTO test33 VALUES (2, 'DEF');
+-- they end up in the same partition (but it's platform-dependent which one)
+SELECT (SELECT count(*) FROM test33_0) <> (SELECT count(*) FROM test33_1);
+
+
  -- cleanup
  SET client_min_messages TO warning;
  DROP SCHEMA collate_tests CASCADE;
diff --git a/src/test/regress/sql/collate.linux.utf8.sql b/src/test/regress/sql/collate.linux.utf8.sql
index e8821532441c75f2801b94485218901148753766..4ca02b821d3858b4f428af2bea33d337f9d167fa 100644 (file)
--- a/src/test/regress/sql/collate.linux.utf8.sql
+++ b/src/test/regress/sql/collate.linux.utf8.sql
@@ -428,6 +428,13 @@ drop type textrange_c;
  drop type textrange_en_us;
  
  
+-- nondeterministic collations
+-- (not supported with libc provider)
+
+CREATE COLLATION ctest_det (locale = 'en_US.utf8', deterministic = true);
+CREATE COLLATION ctest_nondet (locale = 'en_US.utf8', deterministic = false);
+
+
  -- cleanup
  SET client_min_messages TO warning;
  DROP SCHEMA collate_tests CASCADE;
diff --git a/src/test/regress/sql/collate.sql b/src/test/regress/sql/collate.sql
index 4ddde95a5e8efed5ea9a07ba9c18a1610ec9eb13..cb2bc22155d864c9a0cfa5970b64599a3f846d81 100644 (file)
--- a/src/test/regress/sql/collate.sql
+++ b/src/test/regress/sql/collate.sql
@@ -163,6 +163,11 @@ SELECT * FROM foo;
  SELECT a, b, a < b as lt FROM
    (VALUES ('a', 'B'), ('A', 'b' COLLATE "C")) v(a,b);
  
+-- collation mismatch in subselects
+SELECT * FROM collate_test10 WHERE (x, y) NOT IN (SELECT y, x FROM collate_test10);
+-- now it works with overrides
+SELECT * FROM collate_test10 WHERE (x COLLATE "POSIX", y COLLATE "C") NOT IN (SELECT y, x FROM collate_test10);
+SELECT * FROM collate_test10 WHERE (x, y) NOT IN (SELECT y COLLATE "C", x COLLATE "POSIX" FROM collate_test10);
  
  -- casting
  
diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql
index b5931ee700e41d187ebbb97050ce9922bc80fee1..856bbff7328d4d5f759ea6437794ab9295748120 100644 (file)
--- a/src/test/regress/sql/subselect.sql
+++ b/src/test/regress/sql/subselect.sql
@@ -435,6 +435,23 @@ insert into inner_7597 values(0, null);
  
  select * from outer_7597 where (f1, f2) not in (select * from inner_7597);
  
+--
+-- Similar test case using text that verifies that collation
+-- information is passed through by execTuplesEqual() in nodeSubplan.c
+-- (otherwise it would error in texteq())
+--
+
+create temp table outer_text (f1 text, f2 text);
+insert into outer_text values ('a', 'a');
+insert into outer_text values ('b', 'a');
+insert into outer_text values ('a', null);
+insert into outer_text values ('b', null);
+
+create temp table inner_text (c1 text, c2 text);
+insert into inner_text values ('a', null);
+
+select * from outer_text where (f1, f2) not in (select * from inner_text);
+
  --
  -- Test case for premature memory release during hashing of subplan output
  --
diff --git a/src/test/subscription/Makefile b/src/test/subscription/Makefile
index e7bbb454c7ac6b4fe10094fb0a30cf5176f5be34..4378819530fd64026c069722f3de79247eaf7e18 100644 (file)
--- a/src/test/subscription/Makefile
+++ b/src/test/subscription/Makefile
@@ -15,6 +15,8 @@ include $(top_builddir)/src/Makefile.global
  
  EXTRA_INSTALL = contrib/hstore
  
+export with_icu
+
  check:
     $(prove_check)
  
diff --git a/src/test/subscription/t/012_collation.pl b/src/test/subscription/t/012_collation.pl
new file mode 100644 (file)
index 0000000..6c480de
--- /dev/null
+++ b/src/test/subscription/t/012_collation.pl
@@ -0,0 +1,103 @@
+# Test collations, in particular nondeterministic ones
+# (only works with ICU)
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More;
+
+if ($ENV{with_icu} eq 'yes')
+{
+   plan tests => 2;
+}
+else
+{
+   plan skip_all => 'ICU not supported by this build';
+}
+
+my $node_publisher = get_new_node('publisher');
+$node_publisher->init(allows_ => 'logical');
+$node_publisher->start;
+
+my $node_subscriber = get_new_node('subscriber');
+$node_subscriber->init(allows_ => 'logical');
+$node_subscriber->start;
+
+my $publisher_connstr = $node_publisher->connstr . ' dbname=postgres';
+
+# Test plan: Create a table with a nondeterministic collation in the
+# primary key column.  Pre-insert rows on the publisher and subscriber
+# that are collation-wise equal but byte-wise different.  (We use a
+# string in different normal forms for that.)  Set up publisher and
+# subscriber.  Update the row on the publisher, but don't change the
+# primary key column.  The subscriber needs to find the row to be
+# updated using the nondeterministic collation semantics.  We need to
+# test for both a replica identity index and for replica identity
+# full, since those have different code paths internally.
+
+$node_subscriber->safe_psql('postgres',
+   q{CREATE COLLATION ctest_nondet (provider = icu, locale = 'und', deterministic = false)});
+
+# table with replica identity index
+
+$node_publisher->safe_psql('postgres',
+   q{CREATE TABLE tab1 (a text PRIMARY KEY, b text)});
+
+$node_publisher->safe_psql('postgres',
+   q{INSERT INTO tab1 VALUES (U&'\00E4bc', 'foo')});
+
+$node_subscriber->safe_psql('postgres',
+   q{CREATE TABLE tab1 (a text COLLATE ctest_nondet PRIMARY KEY, b text)});
+
+$node_subscriber->safe_psql('postgres',
+   q{INSERT INTO tab1 VALUES (U&'\0061\0308bc', 'foo')});
+
+# table with replica identity full
+
+$node_publisher->safe_psql('postgres',
+   q{CREATE TABLE tab2 (a text, b text)});
+$node_publisher->safe_psql('postgres',
+   q{ALTER TABLE tab2 REPLICA IDENTITY FULL});
+
+$node_publisher->safe_psql('postgres',
+   q{INSERT INTO tab2 VALUES (U&'\00E4bc', 'foo')});
+
+$node_subscriber->safe_psql('postgres',
+   q{CREATE TABLE tab2 (a text COLLATE ctest_nondet, b text)});
+$node_subscriber->safe_psql('postgres',
+   q{ALTER TABLE tab2 REPLICA IDENTITY FULL});
+
+$node_subscriber->safe_psql('postgres',
+   q{INSERT INTO tab2 VALUES (U&'\0061\0308bc', 'foo')});
+
+# set up publication, subscription
+
+$node_publisher->safe_psql('postgres',
+   q{CREATE PUBLICATION pub1 FOR ALL TABLES});
+
+$node_subscriber->safe_psql('postgres',
+   qq{CREATE SUBSCRIPTION sub1 CONNECTION '$publisher_connstr' PUBLICATION pub1 WITH (copy_data = false)});
+
+$node_publisher->wait_for_catchup('sub1');
+
+# test with replica identity index
+
+$node_publisher->safe_psql('postgres',
+   q{UPDATE tab1 SET b = 'bar' WHERE b = 'foo'});
+
+$node_publisher->wait_for_catchup('sub1');
+
+is($node_subscriber->safe_psql('postgres', q{SELECT b FROM tab1}),
+   qq(bar),
+  'update with primary key with nondeterministic collation');
+
+# test with replica identity full
+
+$node_publisher->safe_psql('postgres',
+   q{UPDATE tab2 SET b = 'bar' WHERE b = 'foo'});
+
+$node_publisher->wait_for_catchup('sub1');
+
+is($node_subscriber->safe_psql('postgres', q{SELECT b FROM tab2}),
+   qq(bar),
+  'update with replica identity full with nondeterministic collation');
author	Peter Eisentraut <[email protected]>
	Fri, 22 Mar 2019 11:09:32 +0000 (12:09 +0100)
committer	Peter Eisentraut <[email protected]>
	Fri, 22 Mar 2019 11:12:43 +0000 (12:12 +0100)
contrib/bloom/bloom.h		\| blob \| blame \| history
contrib/bloom/blutils.c		\| blob \| blame \| history
doc/src/sgml/catalogs.sgml		\| blob \| blame \| history
doc/src/sgml/charset.sgml		\| blob \| blame \| history
doc/src/sgml/citext.sgml		\| blob \| blame \| history
doc/src/sgml/func.sgml		\| blob \| blame \| history
doc/src/sgml/ref/create_collation.sgml		\| blob \| blame \| history
src/backend/access/hash/hashfunc.c		\| blob \| blame \| history
src/backend/access/spgist/spgtextproc.c		\| blob \| blame \| history
src/backend/catalog/pg_collation.c		\| blob \| blame \| history
src/backend/commands/collationcmds.c		\| blob \| blame \| history
src/backend/commands/extension.c		\| blob \| blame \| history
src/backend/executor/execExpr.c		\| blob \| blame \| history
src/backend/executor/execGrouping.c		\| blob \| blame \| history
src/backend/executor/execPartition.c		\| blob \| blame \| history
src/backend/executor/execReplication.c		\| blob \| blame \| history
src/backend/executor/nodeAgg.c		\| blob \| blame \| history
src/backend/executor/nodeGroup.c		\| blob \| blame \| history
src/backend/executor/nodeHash.c		\| blob \| blame \| history
src/backend/executor/nodeHashjoin.c		\| blob \| blame \| history
src/backend/executor/nodeRecursiveunion.c		\| blob \| blame \| history
src/backend/executor/nodeSetOp.c		\| blob \| blame \| history
src/backend/executor/nodeSubplan.c		\| blob \| blame \| history
src/backend/executor/nodeUnique.c		\| blob \| blame \| history
src/backend/executor/nodeWindowAgg.c		\| blob \| blame \| history
src/backend/nodes/copyfuncs.c		\| blob \| blame \| history
src/backend/nodes/outfuncs.c		\| blob \| blame \| history
src/backend/nodes/readfuncs.c		\| blob \| blame \| history
src/backend/optimizer/plan/createplan.c		\| blob \| blame \| history
src/backend/optimizer/util/tlist.c		\| blob \| blame \| history
src/backend/partitioning/partbounds.c		\| blob \| blame \| history
src/backend/partitioning/partprune.c		\| blob \| blame \| history
src/backend/regex/regc_pg_locale.c		\| blob \| blame \| history
src/backend/utils/adt/arrayfuncs.c		\| blob \| blame \| history
src/backend/utils/adt/like.c		\| blob \| blame \| history
src/backend/utils/adt/like_support.c		\| blob \| blame \| history
src/backend/utils/adt/name.c		\| blob \| blame \| history
src/backend/utils/adt/orderedsetaggs.c		\| blob \| blame \| history
src/backend/utils/adt/pg_locale.c		\| blob \| blame \| history
src/backend/utils/adt/ri_triggers.c		\| blob \| blame \| history
src/backend/utils/adt/varchar.c		\| blob \| blame \| history
src/backend/utils/adt/varlena.c		\| blob \| blame \| history
src/backend/utils/cache/catcache.c		\| blob \| blame \| history
src/backend/utils/cache/lsyscache.c		\| blob \| blame \| history
src/bin/initdb/initdb.c		\| blob \| blame \| history
src/bin/pg_dump/pg_dump.c		\| blob \| blame \| history
src/bin/psql/describe.c		\| blob \| blame \| history
src/include/catalog/catversion.h		\| blob \| blame \| history
src/include/catalog/pg_collation.h		\| blob \| blame \| history
src/include/executor/executor.h		\| blob \| blame \| history
src/include/executor/hashjoin.h		\| blob \| blame \| history
src/include/executor/nodeHash.h		\| blob \| blame \| history
src/include/nodes/execnodes.h		\| blob \| blame \| history
src/include/nodes/plannodes.h		\| blob \| blame \| history
src/include/optimizer/planmain.h		\| blob \| blame \| history
src/include/optimizer/tlist.h		\| blob \| blame \| history
src/include/partitioning/partbounds.h		\| blob \| blame \| history
src/include/utils/lsyscache.h		\| blob \| blame \| history
src/include/utils/pg_locale.h		\| blob \| blame \| history
src/test/regress/expected/collate.icu.utf8.out		\| blob \| blame \| history
src/test/regress/expected/collate.linux.utf8.out		\| blob \| blame \| history
src/test/regress/expected/collate.out		\| blob \| blame \| history
src/test/regress/expected/subselect.out		\| blob \| blame \| history
src/test/regress/sql/collate.icu.utf8.sql		\| blob \| blame \| history
src/test/regress/sql/collate.linux.utf8.sql		\| blob \| blame \| history
src/test/regress/sql/collate.sql		\| blob \| blame \| history
src/test/regress/sql/subselect.sql		\| blob \| blame \| history
src/test/subscription/Makefile		\| blob \| blame \| history
src/test/subscription/t/012_collation.pl	[new file with mode: 0644]	\| blob