Avoid character classification in regex escape parsing.
authorJeff Davis <[email protected]>
Fri, 21 Apr 2023 15:19:41 +0000 (08:19 -0700)
committerJeff Davis <[email protected]>
Fri, 21 Apr 2023 15:19:41 +0000 (08:19 -0700)
For regex escape sequences, just test directly for the relevant ASCII
characters rather than using locale-sensitive character
classification.

This fixes an assertion failure when a locale considers a non-ASCII
character, such as "൧", to be a digit.

Reported-by: Richard Guo
Discussion: https://postgr.es/m/CAMbWs49Q6UoKGeT8pBkMtJGJd+16CBFZaaWUk9Du+2ERE5g_YA@mail.gmail.com
Back-through: 11

src/backend/regex/regc_lex.c

index 4780d79f097fc2b02331efe0f9c44ff1f40f6f94..38c09b112321d25037ec1e638240bd5a79bf8355 100644 (file)
@@ -613,7 +613,11 @@ lexescape(struct vars *v)
 
    assert(!ATEOS());
    c = *v->now++;
-   if (!iscalnum(c))
+
+   /* if it's not alphanumeric ASCII, treat it as a plain character */
+   if (!('a' <= c && c <= 'z') &&
+       !('A' <= c && c <= 'Z') &&
+       !('0' <= c && c <= '9'))
        RETV(PLAIN, c);
 
    NOTE(REG_UNONPOSIX);
@@ -755,8 +759,11 @@ lexescape(struct vars *v)
            RETV(PLAIN, c);
            break;
        default:
-           assert(iscalpha(c));
-           FAILW(REG_EESCAPE); /* unknown alphabetic escape */
+           /*
+            * Throw an error for unrecognized ASCII alpha escape sequences,
+            * which reserves them for future use if needed.
+            */
+           FAILW(REG_EESCAPE);
            break;
    }
    assert(NOTREACHED);