These do the same thing as the standard isdigit(), isspace(), and
isprint() but with multibyte and encoding support. But all the
callers are only interested in analyzing single-byte ASCII characters.
So this extra layer is overkill and we can replace the uses with the
standard functions.
All the t_is*() functions in ts_locale.c are under scrutiny because
they don't use the common locale provider framework but instead use
the global libc locale settings. For the functions being touched by
this , we don't need all that anyway, as mentioned above, so the
simplest solution is to just remove them. The few remaining t_is*()
functions will need a different treatment in a separate .
pg_trgm has some compile-time options with macros such as
KEEPONLYALNUM. These are not documented, and the non-default variant
is not supported by any test cases. As part of this undertaking, I'm
removing the non-default variant, as it is in the way of cleanup. So
in this case, the not-KEEPONLYALNUM code path is gone.
Reviewed-by: Jeff Davis <[email protected]>Discussion: https://www.postgresql.org/message-id/flat/
653f3b84-fc87-45a7-9a0c-
bfb4fcab3e7d%40eisentraut.org
char *start;
*end = NULL;
- while (*in && t_isspace(in))
+ while (*in && isspace((unsigned char) *in))
in += pg_mblen(in);
if (!*in || *in == '#')
return NULL;
start = in;
- while (*in && !t_isspace(in))
+ while (*in && !isspace((unsigned char) *in))
in += pg_mblen(in);
*end = in;
case LQPRS_WAITFNUM:
if (t_iseq(ptr, ','))
state = LQPRS_WAITSNUM;
- else if (t_isdigit(ptr))
+ else if (isdigit((unsigned char) *ptr))
{
int low = atoi(ptr);
UNCHAR;
break;
case LQPRS_WAITSNUM:
- if (t_isdigit(ptr))
+ if (isdigit((unsigned char) *ptr))
{
int high = atoi(ptr);
case LQPRS_WAITCLOSE:
if (t_iseq(ptr, '}'))
state = LQPRS_WAITEND;
- else if (!t_isdigit(ptr))
+ else if (!isdigit((unsigned char) *ptr))
UNCHAR;
break;
case LQPRS_WAITND:
}
else if (t_iseq(ptr, ','))
state = LQPRS_WAITSNUM;
- else if (!t_isdigit(ptr))
+ else if (!isdigit((unsigned char) *ptr))
UNCHAR;
break;
case LQPRS_WAITEND:
*lenval = charlen;
*flag = 0;
}
- else if (!t_isspace(state->buf))
+ else if (!isspace((unsigned char) *state->buf))
ereturn(state->escontext, ERR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("operand syntax error")));
*/
#define LPADDING 2
#define RPADDING 1
-#define KEEPONLYALNUM
/*
* Caution: IGNORECASE macro means that trigrams are case-insensitive.
* If this macro is disabled, the ~* and ~~* operators must be removed from
*(((char*)(a))+2) = *(((char*)(b))+2); \
} while(0)
-#ifdef KEEPONLYALNUM
#define ISWORDCHR(c) (t_isalnum(c))
#define ISPRINTABLECHAR(a) ( isascii( *(unsigned char*)(a) ) && (isalnum( *(unsigned char*)(a) ) || *(unsigned char*)(a)==' ') )
-#else
-#define ISWORDCHR(c) (!t_isspace(c))
-#define ISPRINTABLECHAR(a) ( isascii( *(unsigned char*)(a) ) && isprint( *(unsigned char*)(a) ) )
-#endif
#define ISPRINTABLETRGM(t) ( ISPRINTABLECHAR( ((char*)(t)) ) && ISPRINTABLECHAR( ((char*)(t))+1 ) && ISPRINTABLECHAR( ((char*)(t))+2 ) )
#define ISESCAPECHAR(x) (*(x) == '\\') /* Wildcard escape character */
{
ptrlen = pg_mblen(ptr);
/* ignore whitespace, but end src or trg */
- if (t_isspace(ptr))
+ if (isspace((unsigned char) *ptr))
{
if (state == 1)
state = 2;
char *lastchar;
/* Skip leading spaces */
- while (*in && t_isspace(in))
+ while (*in && isspace((unsigned char) *in))
in += pg_mblen(in);
/* Return NULL on empty lines */
lastchar = start = in;
/* Find end of word */
- while (*in && !t_isspace(in))
+ while (*in && !isspace((unsigned char) *in))
{
lastchar = in;
in += pg_mblen(in);
ptr = line;
/* is it a comment? */
- while (*ptr && t_isspace(ptr))
+ while (*ptr && isspace((unsigned char) *ptr))
ptr += pg_mblen(ptr);
if (t_iseq(ptr, '#') || *ptr == '\0' ||
errmsg("unexpected delimiter")));
state = TR_WAITSUBS;
}
- else if (!t_isspace(ptr))
+ else if (!isspace((unsigned char) *ptr))
{
beginwrd = ptr;
state = TR_INLEX;
newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
state = TR_WAITSUBS;
}
- else if (t_isspace(ptr))
+ else if (isspace((unsigned char) *ptr))
{
newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
state = TR_WAITLEX;
state = TR_INSUBS;
beginwrd = ptr + pg_mblen(ptr);
}
- else if (!t_isspace(ptr))
+ else if (!isspace((unsigned char) *ptr))
{
useasis = false;
beginwrd = ptr;
}
else if (state == TR_INSUBS)
{
- if (t_isspace(ptr))
+ if (isspace((unsigned char) *ptr))
{
if (ptr == beginwrd)
ereport(ERROR,
*sflagset = next;
while (**sflagset)
{
- if (t_isdigit(*sflagset))
+ if (isdigit((unsigned char) **sflagset))
{
if (!met_comma)
ereport(ERROR,
*sflagset)));
met_comma = true;
}
- else if (!t_isspace(*sflagset))
+ else if (!isspace((unsigned char) **sflagset))
{
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
while (*s)
{
/* we allow only single encoded flags for faster works */
- if (pg_mblen(s) == 1 && t_isprint(s) && !t_isspace(s))
+ if (pg_mblen(s) == 1 && isprint((unsigned char) *s) && !isspace((unsigned char) *s))
s++;
else
{
s = line;
while (*s)
{
- if (t_isspace(s))
+ if (isspace((unsigned char) *s))
{
*s = '\0';
break;
{
if (t_iseq(*str, '#'))
return false;
- else if (!t_isspace(*str))
+ else if (!isspace((unsigned char) **str))
{
int clen = pg_mblen(*str);
}
else /* state == PAE_INMASK */
{
- if (t_isspace(*str))
+ if (isspace((unsigned char) **str))
{
*next = '\0';
return true;
{
if (t_iseq(str, '#'))
return false;
- else if (!t_isspace(str))
+ else if (!isspace((unsigned char) *str))
{
COPYCHAR(pmask, str);
pmask += pg_mblen(str);
*pmask = '\0';
state = PAE_WAIT_FIND;
}
- else if (!t_isspace(str))
+ else if (!isspace((unsigned char) *str))
{
COPYCHAR(pmask, str);
pmask += pg_mblen(str);
prepl += pg_mblen(str);
state = PAE_INREPL;
}
- else if (!t_isspace(str))
+ else if (!isspace((unsigned char) *str))
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("syntax error")));
COPYCHAR(pfind, str);
pfind += pg_mblen(str);
}
- else if (!t_isspace(str))
+ else if (!isspace((unsigned char) *str))
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("syntax error")));
prepl += pg_mblen(str);
state = PAE_INREPL;
}
- else if (!t_isspace(str))
+ else if (!isspace((unsigned char) *str))
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("syntax error")));
COPYCHAR(prepl, str);
prepl += pg_mblen(str);
}
- else if (!t_isspace(str))
+ else if (!isspace((unsigned char) *str))
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("syntax error")));
char *sflag;
int clen;
- while (*s && t_isspace(s))
+ while (*s && isspace((unsigned char) *s))
s += pg_mblen(s);
if (!*s)
/* Get flag without \n */
sflag = sbuf;
- while (*s && !t_isspace(s) && *s != '\n')
+ while (*s && !isspace((unsigned char) *s) && *s != '\n')
{
clen = pg_mblen(s);
COPYCHAR(sflag, s);
while ((recoded = tsearch_readline(&trst)) != NULL)
{
- if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#'))
+ if (*recoded == '\0' || isspace((unsigned char) *recoded) || t_iseq(recoded, '#'))
{
pfree(recoded);
continue;
{
char *s = recoded + strlen("FLAG");
- while (*s && t_isspace(s))
+ while (*s && isspace((unsigned char) *s))
s += pg_mblen(s);
if (*s)
{
int fields_read;
- if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#'))
+ if (*recoded == '\0' || isspace((unsigned char) *recoded) || t_iseq(recoded, '#'))
goto nextline;
fields_read = parse_ooaffentry(recoded, type, sflag, find, repl, mask);
s = findchar2(recoded, 'l', 'L');
if (s)
{
- while (*s && !t_isspace(s))
+ while (*s && !isspace((unsigned char) *s))
s += pg_mblen(s);
- while (*s && t_isspace(s))
+ while (*s && isspace((unsigned char) *s))
s += pg_mblen(s);
if (*s && pg_mblen(s) == 1)
s = recoded + 4; /* we need non-lowercased string */
flagflags = 0;
- while (*s && t_isspace(s))
+ while (*s && isspace((unsigned char) *s))
s += pg_mblen(s);
if (*s == '*')
s++;
if (*s == '\0' || *s == '#' || *s == '\n' || *s == ':' ||
- t_isspace(s))
+ isspace((unsigned char) *s))
{
oldformat = true;
goto nextline;
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("invalid affix alias \"%s\"",
Conf->Spell[i]->p.flag)));
- if (*end != '\0' && !t_isdigit(end) && !t_isspace(end))
+ if (*end != '\0' && !isdigit((unsigned char) *end) && !isspace((unsigned char) *end))
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("invalid affix alias \"%s\"",
*/
#define WC_BUF_LEN 3
-int
-t_isdigit(const char *ptr)
-{
- int clen = pg_mblen(ptr);
- wchar_t character[WC_BUF_LEN];
- pg_locale_t mylocale = 0; /* TODO */
-
- if (clen == 1 || database_ctype_is_c)
- return isdigit(TOUCHAR(ptr));
-
- char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
-
- return iswdigit((wint_t) character[0]);
-}
-
-int
-t_isspace(const char *ptr)
-{
- int clen = pg_mblen(ptr);
- wchar_t character[WC_BUF_LEN];
- pg_locale_t mylocale = 0; /* TODO */
-
- if (clen == 1 || database_ctype_is_c)
- return isspace(TOUCHAR(ptr));
-
- char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
-
- return iswspace((wint_t) character[0]);
-}
-
int
t_isalpha(const char *ptr)
{
return iswalnum((wint_t) character[0]);
}
-int
-t_isprint(const char *ptr)
-{
- int clen = pg_mblen(ptr);
- wchar_t character[WC_BUF_LEN];
- pg_locale_t mylocale = 0; /* TODO */
-
- if (clen == 1 || database_ctype_is_c)
- return isprint(TOUCHAR(ptr));
-
- char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
-
- return iswprint((wint_t) character[0]);
-}
-
/*
* Set up to read a file using tsearch_readline(). This facility is
char *pbuf = line;
/* Trim trailing space */
- while (*pbuf && !t_isspace(pbuf))
+ while (*pbuf && !isspace((unsigned char) *pbuf))
pbuf += pg_mblen(pbuf);
*pbuf = '\0';
continue;
}
- if (!t_isdigit(ptr))
+ if (!isdigit((unsigned char) *ptr))
return false;
errno = 0;
* So we still treat OR literal as operation with possibly incorrect
* operand and will not search it as lexeme
*/
- if (!t_isspace(ptr))
+ if (!isspace((unsigned char) *ptr))
break;
}
/* generic syntax error message is fine */
return PT_ERR;
}
- else if (!t_isspace(state->buf))
+ else if (!isspace((unsigned char) *state->buf))
{
/*
* We rely on the tsvector parser to parse the value for
{
return (state->count) ? PT_ERR : PT_END;
}
- else if (!t_isspace(state->buf))
+ else if (!isspace((unsigned char) *state->buf))
{
return PT_ERR;
}
state->state = WAITOPERAND;
continue;
}
- else if (!t_isspace(state->buf))
+ else if (!isspace((unsigned char) *state->buf))
{
/*
* We rely on the tsvector parser to parse the value for
state->buf++;
continue;
}
- else if (!t_isspace(state->buf))
+ else if (!isspace((unsigned char) *state->buf))
{
/* insert implicit AND between operands */
state->state = WAITOPERAND;
else if ((state->oprisdelim && ISOPERATOR(state->prsbuf)) ||
(state->is_web && t_iseq(state->prsbuf, '"')))
PRSSYNTAXERROR;
- else if (!t_isspace(state->prsbuf))
+ else if (!isspace((unsigned char) *state->prsbuf))
{
COPYCHAR(curpos, state->prsbuf);
curpos += pg_mblen(state->prsbuf);
statecode = WAITNEXTCHAR;
oldstate = WAITENDWORD;
}
- else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' ||
+ else if (isspace((unsigned char) *state->prsbuf) || *(state->prsbuf) == '\0' ||
(state->oprisdelim && ISOPERATOR(state->prsbuf)) ||
(state->is_web && t_iseq(state->prsbuf, '"')))
{
}
else if (statecode == INPOSINFO)
{
- if (t_isdigit(state->prsbuf))
+ if (isdigit((unsigned char) *state->prsbuf))
{
if (posalen == 0)
{
PRSSYNTAXERROR;
WEP_SETWEIGHT(pos[npos - 1], 0);
}
- else if (t_isspace(state->prsbuf) ||
+ else if (isspace((unsigned char) *state->prsbuf) ||
*(state->prsbuf) == '\0')
RETURN_TOKEN;
- else if (!t_isdigit(state->prsbuf))
+ else if (!isdigit((unsigned char) *state->prsbuf))
PRSSYNTAXERROR;
}
else /* internal error */
#define COPYCHAR(d,s) memcpy(d, s, pg_mblen(s))
-extern int t_isdigit(const char *ptr);
-extern int t_isspace(const char *ptr);
extern int t_isalpha(const char *ptr);
extern int t_isalnum(const char *ptr);
-extern int t_isprint(const char *ptr);
extern char *lowerstr(const char *str);
extern char *lowerstr_with_len(const char *str, int len);