lowerstr() and lowerstr_with_len() in ts_locale.c do the same thing as
str_tolower() that the rest of the system uses, except that the former
don't use the common locale provider framework but instead use the
global libc locale settings.
This replaces uses of lowerstr*() with str_tolower(...,
DEFAULT_COLLATION_OID). For instances that use a libc locale
globally, this will result in exactly the same behavior. For
instances that use other locale providers, you now get consistent
behavior and are no longer dependent on the libc locale settings (for
this case; there are others).
Most uses of these functions are for processing dictionary and
configuration files. In those cases, using the default collation
seems appropriate. At least we don't have a more specific collation
available. But the code in contrib/pg_trgm should really depend on
the collation of the columns being processed. This is not done here,
this can be done in a separate .
(You can probably construct some edge cases where this change would
create some locale-related upgrade incompatibility, for example if
before you used a combination of ICU and a differently-behaving libc
locale. We can document this in the release notes, but I don't think
there is anything more we can do about this.)
Reviewed-by: Jeff Davis <[email protected]>Discussion: https://www.postgresql.org/message-id/flat/
653f3b84-fc87-45a7-9a0c-
bfb4fcab3e7d%40eisentraut.org
#include <ctype.h>
+#include "catalog/pg_collation_d.h"
#include "commands/defrem.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_public.h"
+#include "utils/formatting.h"
PG_MODULE_MAGIC;
if (*line == '\0')
continue;
- value = lowerstr(line);
+ value = str_tolower(line, strlen(line), DEFAULT_COLLATION_OID);
pfree(line);
pos = value;
{
char *temp = pnstrdup(in, length);
- word.key = lowerstr(temp);
+ word.key = str_tolower(temp, length, DEFAULT_COLLATION_OID);
pfree(temp);
word.value = NULL;
}
#include <ctype.h>
+#include "catalog/pg_collation_d.h"
#include "catalog/pg_type.h"
#include "common/int.h"
#include "lib/qunique.h"
#include "miscadmin.h"
#include "trgm.h"
#include "tsearch/ts_locale.h"
+#include "utils/formatting.h"
#include "utils/guc.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
while ((bword = find_word(eword, slen - (eword - str), &eword, &charlen)) != NULL)
{
#ifdef IGNORECASE
- bword = lowerstr_with_len(bword, eword - bword);
+ bword = str_tolower(bword, eword - bword, DEFAULT_COLLATION_OID);
bytelen = strlen(bword);
#else
bytelen = eword - bword;
buf, &bytelen, &charlen)) != NULL)
{
#ifdef IGNORECASE
- buf2 = lowerstr_with_len(buf, bytelen);
+ buf2 = str_tolower(buf, bytelen, DEFAULT_COLLATION_OID);
bytelen = strlen(buf2);
#else
buf2 = buf;
*/
#include "postgres.h"
+#include "catalog/pg_collation_d.h"
#include "regex/regexport.h"
#include "trgm.h"
#include "tsearch/ts_locale.h"
+#include "utils/formatting.h"
#include "utils/hsearch.h"
#include "utils/memutils.h"
#include "varatt.h"
* within each color, since we used the REG_ICASE option; so there's no
* need to process the uppercase version.
*
- * XXX this code is dependent on the assumption that lowerstr() works the
- * same as the regex engine's internal case folding machinery. Might be
- * wiser to expose pg_wc_tolower and test whether c == pg_wc_tolower(c).
- * On the other hand, the trigrams in the index were created using
- * lowerstr(), so we're probably screwed if there's any incompatibility
- * anyway.
+ * XXX this code is dependent on the assumption that str_tolower() works
+ * the same as the regex engine's internal case folding machinery. Might
+ * be wiser to expose pg_wc_tolower and test whether c ==
+ * pg_wc_tolower(c). On the other hand, the trigrams in the index were
+ * created using str_tolower(), so we're probably screwed if there's any
+ * incompatibility anyway.
*/
#ifdef IGNORECASE
{
- char *lowerCased = lowerstr(s);
+ char *lowerCased = str_tolower(s, strlen(s), DEFAULT_COLLATION_OID);
if (strcmp(lowerCased, s) != 0)
{
*/
#include "postgres.h"
+#include "catalog/pg_collation_d.h"
#include "commands/defrem.h"
-#include "tsearch/ts_locale.h"
+#include "mb/pg_wchar.h"
#include "tsearch/ts_public.h"
+#include "utils/formatting.h"
/* Some platforms define MAXINT and/or MININT, causing conflicts */
#ifdef MAXINT
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("multiple StopWords parameters")));
- readstoplist(defGetString(defel), &d->stoplist, lowerstr);
+ readstoplist(defGetString(defel), &d->stoplist, str_tolower);
stoploaded = true;
}
else if (strcmp(defel->defname, "language") == 0)
DictSnowball *d = (DictSnowball *) PG_GETARG_POINTER(0);
char *in = (char *) PG_GETARG_POINTER(1);
int32 len = PG_GETARG_INT32(2);
- char *txt = lowerstr_with_len(in, len);
+ char *txt = str_tolower(in, len, DEFAULT_COLLATION_OID);
TSLexeme *res = palloc0(sizeof(TSLexeme) * 2);
/*
*/
#include "postgres.h"
+#include "catalog/pg_collation_d.h"
#include "commands/defrem.h"
#include "tsearch/dicts/spell.h"
-#include "tsearch/ts_locale.h"
#include "tsearch/ts_public.h"
#include "utils/fmgrprotos.h"
+#include "utils/formatting.h"
typedef struct
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("multiple StopWords parameters")));
- readstoplist(defGetString(defel), &(d->stoplist), lowerstr);
+ readstoplist(defGetString(defel), &(d->stoplist), str_tolower);
stoploaded = true;
}
else
if (len <= 0)
PG_RETURN_POINTER(NULL);
- txt = lowerstr_with_len(in, len);
+ txt = str_tolower(in, len, DEFAULT_COLLATION_OID);
res = NINormalizeWord(&(d->obj), txt);
if (res == NULL)
*/
#include "postgres.h"
+#include "catalog/pg_collation_d.h"
#include "commands/defrem.h"
-#include "tsearch/ts_locale.h"
#include "tsearch/ts_public.h"
#include "utils/fmgrprotos.h"
+#include "utils/formatting.h"
typedef struct
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("multiple StopWords parameters")));
- readstoplist(defGetString(defel), &d->stoplist, lowerstr);
+ readstoplist(defGetString(defel), &d->stoplist, str_tolower);
stoploaded = true;
}
else if (strcmp(defel->defname, "accept") == 0)
char *txt;
TSLexeme *res;
- txt = lowerstr_with_len(in, len);
+ txt = str_tolower(in, len, DEFAULT_COLLATION_OID);
if (*txt == '\0' || searchstoplist(&(d->stoplist), txt))
{
*/
#include "postgres.h"
+#include "catalog/pg_collation_d.h"
#include "commands/defrem.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_public.h"
#include "utils/fmgrprotos.h"
+#include "utils/formatting.h"
typedef struct
{
}
else
{
- d->syn[cur].in = lowerstr(starti);
- d->syn[cur].out = lowerstr(starto);
+ d->syn[cur].in = str_tolower(starti, strlen(starti), DEFAULT_COLLATION_OID);
+ d->syn[cur].out = str_tolower(starto, strlen(starto), DEFAULT_COLLATION_OID);
}
d->syn[cur].outlen = strlen(starto);
if (d->case_sensitive)
key.in = pnstrdup(in, len);
else
- key.in = lowerstr_with_len(in, len);
+ key.in = str_tolower(in, len, DEFAULT_COLLATION_OID);
key.out = NULL;
#include "miscadmin.h"
#include "tsearch/dicts/spell.h"
#include "tsearch/ts_locale.h"
+#include "utils/formatting.h"
#include "utils/memutils.h"
/*
- * Apply lowerstr(), producing a temporary result (in the buildCxt).
+ * Apply str_tolower(), producing a temporary result (in the buildCxt).
*/
static char *
lowerstr_ctx(IspellDict *Conf, const char *src)
char *dst;
saveCtx = MemoryContextSwitchTo(Conf->buildCxt);
- dst = lowerstr(src);
+ dst = str_tolower(src, strlen(src), DEFAULT_COLLATION_OID);
MemoryContextSwitchTo(saveCtx);
return dst;
while ((recoded = tsearch_readline(&trst)) != NULL)
{
- pstr = lowerstr(recoded);
+ pstr = str_tolower(recoded, strlen(recoded), DEFAULT_COLLATION_OID);
/* Skip comments and empty lines */
if (*pstr == '#' || *pstr == '\n')
stp->lineno,
stp->filename);
}
-
-
-/*
- * lowerstr --- fold null-terminated string to lower case
- *
- * Returned string is palloc'd
- */
-char *
-lowerstr(const char *str)
-{
- return lowerstr_with_len(str, strlen(str));
-}
-
-/*
- * lowerstr_with_len --- fold string to lower case
- *
- * Input string need not be null-terminated.
- *
- * Returned string is palloc'd
- */
-char *
-lowerstr_with_len(const char *str, int len)
-{
- char *out;
- pg_locale_t mylocale = 0; /* TODO */
-
- if (len == 0)
- return pstrdup("");
-
- /*
- * Use wide char code only when max encoding length > 1 and ctype != C.
- * Some operating systems fail with multi-byte encodings and a C locale.
- * Also, for a C locale there is no need to process as multibyte. From
- * backend/utils/adt/oracle_compat.c Teodor
- */
- if (pg_database_encoding_max_length() > 1 && !database_ctype_is_c)
- {
- wchar_t *wstr,
- *wptr;
- int wlen;
-
- /*
- * alloc number of wchar_t for worst case, len contains number of
- * bytes >= number of characters and alloc 1 wchar_t for 0, because
- * wchar2char wants zero-terminated string
- */
- wptr = wstr = (wchar_t *) palloc(sizeof(wchar_t) * (len + 1));
-
- wlen = char2wchar(wstr, len + 1, str, len, mylocale);
- Assert(wlen <= len);
-
- while (*wptr)
- {
- *wptr = towlower((wint_t) *wptr);
- wptr++;
- }
-
- /*
- * Alloc result string for worst case + '\0'
- */
- len = pg_database_encoding_max_length() * wlen + 1;
- out = (char *) palloc(len);
-
- wlen = wchar2char(out, wstr, len, mylocale);
-
- pfree(wstr);
-
- if (wlen < 0)
- ereport(ERROR,
- (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
- errmsg("conversion from wchar_t to server encoding failed: %m")));
- Assert(wlen < len);
- }
- else
- {
- const char *ptr = str;
- char *outptr;
-
- outptr = out = (char *) palloc(sizeof(char) * (len + 1));
- while ((ptr - str) < len && *ptr)
- {
- *outptr++ = tolower(TOUCHAR(ptr));
- ptr++;
- }
- *outptr = '\0';
- }
-
- return out;
-}
#include <ctype.h>
+#include "catalog/pg_collation_d.h"
#include "miscadmin.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_public.h"
* or palloc a new version.
*/
void
-readstoplist(const char *fname, StopList *s, char *(*wordop) (const char *))
+readstoplist(const char *fname, StopList *s, char *(*wordop) (const char *, size_t, Oid))
{
char **stop = NULL;
if (wordop)
{
- stop[s->len] = wordop(line);
+ stop[s->len] = wordop(line, strlen(line), DEFAULT_COLLATION_OID);
if (stop[s->len] != line)
pfree(line);
}
extern int t_isalpha(const char *ptr);
extern int t_isalnum(const char *ptr);
-extern char *lowerstr(const char *str);
-extern char *lowerstr_with_len(const char *str, int len);
-
extern bool tsearch_readline_begin(tsearch_readline_state *stp,
const char *filename);
extern char *tsearch_readline(tsearch_readline_state *stp);
} StopList;
extern void readstoplist(const char *fname, StopList *s,
- char *(*wordop) (const char *));
+ char *(*wordop) (const char *, size_t, Oid));
extern bool searchstoplist(StopList *s, char *key);
/*