Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
4ceeeb09
Commit
4ceeeb09
authored
Apr 03, 2010
by
Benjamin Peterson
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ensure that the locale does not affect the tokenization of identifiers
parent
ab8b9cae
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
20 additions
and
4 deletions
+20
-4
Misc/NEWS
Misc/NEWS
+2
-0
Parser/tokenizer.c
Parser/tokenizer.c
+18
-4
No files found.
Misc/NEWS
View file @
4ceeeb09
...
...
@@ -12,6 +12,8 @@ What's New in Python 2.7 beta 1?
Core and Builtins
-----------------
- Ensure that tokenization of identifiers is not affected by locale.
- Issue #1222585: Added LDCXXSHARED for C++ support. Patch by Arfrever.
- Raise a TypeError when trying to delete a T_STRING_INPLACE struct member.
...
...
Parser/tokenizer.c
View file @
4ceeeb09
...
...
@@ -93,6 +93,21 @@ char *_PyParser_TokenNames[] = {
};
/* Ensure that the locale does not interfere with tokenization. */
static
int
ascii_isalpha
(
int
c
)
{
return
(
'a'
<=
c
&&
c
<=
'z'
)
||
(
'A'
<=
c
&&
c
<=
'Z'
);
}
static
int
ascii_isalnum
(
int
c
)
{
return
ascii_isalpha
(
c
)
||
(
'0'
<=
c
&&
c
<=
'9'
);
}
/* Create and initialize a new tok_state structure */
static
struct
tok_state
*
...
...
@@ -230,7 +245,7 @@ get_coding_spec(const char *s, Py_ssize_t size)
}
while
(
t
[
0
]
==
'\x20'
||
t
[
0
]
==
'\t'
);
begin
=
t
;
while
(
isalnum
(
Py_CHARMASK
(
t
[
0
]))
||
while
(
ascii_
isalnum
(
Py_CHARMASK
(
t
[
0
]))
||
t
[
0
]
==
'-'
||
t
[
0
]
==
'_'
||
t
[
0
]
==
'.'
)
t
++
;
...
...
@@ -1185,7 +1200,6 @@ indenterror(struct tok_state *tok)
return
0
;
}
/* Get next token, after space stripping etc. */
static
int
...
...
@@ -1341,7 +1355,7 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
}
/* Identifier (most frequent token!) */
if
(
isalpha
(
c
)
||
c
==
'_'
)
{
if
(
ascii_
isalpha
(
c
)
||
c
==
'_'
)
{
/* Process r"", u"" and ur"" */
switch
(
c
)
{
case
'b'
:
...
...
@@ -1367,7 +1381,7 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
goto
letter_quote
;
break
;
}
while
(
isalnum
(
c
)
||
c
==
'_'
)
{
while
(
ascii_
isalnum
(
c
)
||
c
==
'_'
)
{
c
=
tok_nextc
(
tok
);
}
tok_backup
(
tok
,
c
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment