Older versions of ICU canonicalize "C" to "en-US-u-va-posix"; but
starting in ICU version 64, the "C" locale is considered
obsolete. Postgres commit
ea1db8ae70 introduced code to always
canonicalize "C" to "en-US-u-va-posix" for consistency and
convenience, but it was deemed too confusing.
This commit removes that code, so that "C" is treated like other ICU
locale names: canonicalization is attempted, and if it fails, the
behavior is controlled by icu_validation_level.
A similar change was previously committed as
f7faa9976c, then reverted
due to an ICU-version-dependent test failure. This commit un-reverts
it, omitting the test because we now expect the behavior to depend on
the version of ICU being used.
Discussion: https://postgr.es/m/
3a200aca-4672-4b37-fc91-
5d198a323503%40eisentraut.org
Discussion: https://postgr.es/m/
f83f089ee1e9acd5dbbbf3353294d24e1f196e95[email protected]Discussion: https://postgr.es/m/
37520ec1ae9591f83132f82dbd625f3fc2d69c16[email protected] {
#ifdef USE_ICU
UErrorCode status;
- char lang[ULOC_LANG_CAPACITY];
char *langtag;
size_t buflen = 32; /* arbitrary starting buffer size */
const bool strict = true;
- status = U_ZERO_ERROR;
- uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
- if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
- {
- if (elevel > 0)
- ereport(elevel,
- (errmsg("could not get language from locale \"%s\": %s",
- loc_str, u_errorName(status))));
- return NULL;
- }
-
- /* C/POSIX locales aren't handled by uloc_getLanguageTag() */
- if (strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0)
- return pstrdup("en-US-u-va-posix");
-
/*
* A BCP47 language tag doesn't have a clearly-defined upper limit (cf.
* RFC5646 section 4.4). Additionally, in older ICU versions,
/* check for special language name */
if (strcmp(lang, "") == 0 ||
- strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0 ||
- strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0)
+ strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0)
found = true;
/* search for matching language within ICU */
{
#ifdef USE_ICU
UErrorCode status;
- char lang[ULOC_LANG_CAPACITY];
char *langtag;
size_t buflen = 32; /* arbitrary starting buffer size */
const bool strict = true;
- status = U_ZERO_ERROR;
- uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
- if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
- {
- pg_fatal("could not get language from locale \"%s\": %s",
- loc_str, u_errorName(status));
- return NULL;
- }
-
- /* C/POSIX locales aren't handled by uloc_getLanguageTag() */
- if (strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0)
- return pstrdup("en-US-u-va-posix");
-
/*
* A BCP47 language tag doesn't have a clearly-defined upper limit (cf.
* RFC5646 section 4.4). Additionally, in older ICU versions,
/* check for special language name */
if (strcmp(lang, "") == 0 ||
- strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0 ||
- strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0)
+ strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0)
found = true;
/* search for matching language within ICU */
CREATE SCHEMA test_schema;
-- We need to do this this way to cope with varying names for encodings:
SET client_min_messages TO WARNING;
+SET icu_validation_level = disabled;
do $$
BEGIN
EXECUTE 'CREATE COLLATION test0 (provider = icu, locale = ' ||
quote_literal((SELECT CASE WHEN datlocprovider='i' THEN daticulocale ELSE datcollate END FROM pg_database WHERE datname = current_database())) || ');';
END
$$;
+RESET icu_validation_level;
RESET client_min_messages;
CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, needs "locale"
ERROR: parameter "locale" must be specified
-- We need to do this this way to cope with varying names for encodings:
SET client_min_messages TO WARNING;
+SET icu_validation_level = disabled;
do $$
BEGIN
END
$$;
+RESET icu_validation_level;
RESET client_min_messages;
CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, needs "locale"