Commit 4ceeeb09 authored by Benjamin Peterson's avatar Benjamin Peterson

ensure that the locale does not affect the tokenization of identifiers

parent ab8b9cae
...@@ -12,6 +12,8 @@ What's New in Python 2.7 beta 1? ...@@ -12,6 +12,8 @@ What's New in Python 2.7 beta 1?
Core and Builtins Core and Builtins
----------------- -----------------
- Ensure that tokenization of identifiers is not affected by locale.
- Issue #1222585: Added LDCXXSHARED for C++ support. Patch by Arfrever. - Issue #1222585: Added LDCXXSHARED for C++ support. Patch by Arfrever.
- Raise a TypeError when trying to delete a T_STRING_INPLACE struct member. - Raise a TypeError when trying to delete a T_STRING_INPLACE struct member.
......
...@@ -93,6 +93,21 @@ char *_PyParser_TokenNames[] = { ...@@ -93,6 +93,21 @@ char *_PyParser_TokenNames[] = {
}; };
/* Ensure that the locale does not interfere with tokenization. */
static int
ascii_isalpha(int c)
{
return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
}
static int
ascii_isalnum(int c)
{
return ascii_isalpha(c) || ('0' <= c && c <= '9');
}
/* Create and initialize a new tok_state structure */ /* Create and initialize a new tok_state structure */
static struct tok_state * static struct tok_state *
...@@ -230,7 +245,7 @@ get_coding_spec(const char *s, Py_ssize_t size) ...@@ -230,7 +245,7 @@ get_coding_spec(const char *s, Py_ssize_t size)
} while (t[0] == '\x20' || t[0] == '\t'); } while (t[0] == '\x20' || t[0] == '\t');
begin = t; begin = t;
while (isalnum(Py_CHARMASK(t[0])) || while (ascii_isalnum(Py_CHARMASK(t[0])) ||
t[0] == '-' || t[0] == '_' || t[0] == '.') t[0] == '-' || t[0] == '_' || t[0] == '.')
t++; t++;
...@@ -1185,7 +1200,6 @@ indenterror(struct tok_state *tok) ...@@ -1185,7 +1200,6 @@ indenterror(struct tok_state *tok)
return 0; return 0;
} }
/* Get next token, after space stripping etc. */ /* Get next token, after space stripping etc. */
static int static int
...@@ -1341,7 +1355,7 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end) ...@@ -1341,7 +1355,7 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
} }
/* Identifier (most frequent token!) */ /* Identifier (most frequent token!) */
if (isalpha(c) || c == '_') { if (ascii_isalpha(c) || c == '_') {
/* Process r"", u"" and ur"" */ /* Process r"", u"" and ur"" */
switch (c) { switch (c) {
case 'b': case 'b':
...@@ -1367,7 +1381,7 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end) ...@@ -1367,7 +1381,7 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
goto letter_quote; goto letter_quote;
break; break;
} }
while (isalnum(c) || c == '_') { while (ascii_isalnum(c) || c == '_') {
c = tok_nextc(tok); c = tok_nextc(tok);
} }
tok_backup(tok, c); tok_backup(tok, c);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment