Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
47383403
Commit
47383403
authored
Aug 15, 2007
by
Martin v. Löwis
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Implement PEP 3131. Add isidentifier to str.
parent
32c4ac01
Changes
11
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
152 additions
and
5 deletions
+152
-5
Doc/lib/libstdtypes.tex
Doc/lib/libstdtypes.tex
+5
-0
Include/errcode.h
Include/errcode.h
+1
-0
Include/unicodeobject.h
Include/unicodeobject.h
+6
-0
Lib/test/badsyntax_3131.py
Lib/test/badsyntax_3131.py
+2
-0
Lib/test/test_pep3131.py
Lib/test/test_pep3131.py
+29
-0
Lib/test/test_unicode.py
Lib/test/test_unicode.py
+13
-0
Misc/NEWS
Misc/NEWS
+2
-0
Objects/unicodeobject.c
Objects/unicodeobject.c
+44
-1
Parser/tokenizer.c
Parser/tokenizer.c
+26
-3
Python/ast.c
Python/ast.c
+20
-1
Python/pythonrun.c
Python/pythonrun.c
+4
-0
No files found.
Doc/lib/libstdtypes.tex
View file @
47383403
...
...
@@ -653,6 +653,11 @@ is at least one character, false otherwise.
For 8-bit strings, this method is locale-dependent.
\end{methoddesc}
\begin{methoddesc}
[str]
{
isidentifier
}{}
Return True if S is a valid identifier according
\n\
to the language definition.
\end{methoddesc}
\begin{methoddesc}
[str]
{
islower
}{}
Return true if all cased characters in the string are lowercase and
there is at least one cased character, false otherwise.
...
...
Include/errcode.h
View file @
47383403
...
...
@@ -29,6 +29,7 @@ extern "C" {
#define E_EOFS 23
/* EOF in triple-quoted string */
#define E_EOLS 24
/* EOL in single-quoted string */
#define E_LINECONT 25
/* Unexpected characters after a line continuation */
#define E_IDENTIFIER 26
/* Invalid characters in identifier */
#ifdef __cplusplus
}
...
...
Include/unicodeobject.h
View file @
47383403
...
...
@@ -182,6 +182,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding
# define PyUnicode_GetMax PyUnicodeUCS2_GetMax
# define PyUnicode_GetSize PyUnicodeUCS2_GetSize
# define PyUnicode_IsIdentifier PyUnicodeUCS2_IsIdentifier
# define PyUnicode_Join PyUnicodeUCS2_Join
# define PyUnicode_Partition PyUnicodeUCS2_Partition
# define PyUnicode_RPartition PyUnicodeUCS2_RPartition
...
...
@@ -268,6 +269,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding
# define PyUnicode_GetMax PyUnicodeUCS4_GetMax
# define PyUnicode_GetSize PyUnicodeUCS4_GetSize
# define PyUnicode_IsIdentifier PyUnicodeUCS4_IsIdentifier
# define PyUnicode_Join PyUnicodeUCS4_Join
# define PyUnicode_Partition PyUnicodeUCS4_Partition
# define PyUnicode_RPartition PyUnicodeUCS4_RPartition
...
...
@@ -1250,6 +1252,10 @@ PyAPI_FUNC(int) PyUnicode_Contains(
PyObject
*
element
/* Element string */
);
/* Checks whether argument is a valid identifier. */
PyAPI_FUNC
(
int
)
PyUnicode_IsIdentifier
(
PyObject
*
s
);
/* Externally visible for str.strip(unicode) */
PyAPI_FUNC
(
PyObject
*
)
_PyUnicode_XStrip
(
PyUnicodeObject
*
self
,
...
...
Lib/test/badsyntax_3131.py
0 → 100644
View file @
47383403
# -*- coding: utf-8 -*-
€
=
2
Lib/test/test_pep3131.py
0 → 100644
View file @
47383403
# -*- coding: utf-8 -*-
import
unittest
from
test
import
test_support
class
PEP3131Test
(
unittest
.
TestCase
):
def
test_valid
(
self
):
class
T
:
ä
=
1
µ
=
2
# this is a compatibility character
蟒
=
3
self
.
assertEquals
(
getattr
(
T
,
"
\
xe4
"
),
1
)
self
.
assertEquals
(
getattr
(
T
,
"
\
u03bc
"
),
2
)
self
.
assertEquals
(
getattr
(
T
,
'
\
u87d2
'
),
3
)
def
test_invalid
(
self
):
try
:
from
test
import
badsyntax_3131
except
SyntaxError
as
s
:
self
.
assertEquals
(
str
(
s
),
"invalid character in identifier (badsyntax_3131.py, line 2)"
)
else
:
self
.
fail
(
"expected exception didn't occur"
)
def
test_main
():
test_support
.
run_unittest
(
PEP3131Test
)
if
__name__
==
"__main__"
:
test_main
()
Lib/test/test_unicode.py
View file @
47383403
...
...
@@ -313,6 +313,19 @@ class UnicodeTest(
self
.
assertRaises
(
TypeError
,
"abc"
.
isnumeric
,
42
)
def
test_isidentifier
(
self
):
self
.
assertTrue
(
"a"
.
isidentifier
())
self
.
assertTrue
(
"Z"
.
isidentifier
())
self
.
assertTrue
(
"_"
.
isidentifier
())
self
.
assertTrue
(
"b0"
.
isidentifier
())
self
.
assertTrue
(
"bc"
.
isidentifier
())
self
.
assertTrue
(
"b_"
.
isidentifier
())
self
.
assertTrue
(
""
.
isidentifier
())
self
.
assertFalse
(
" "
.
isidentifier
())
self
.
assertFalse
(
"["
.
isidentifier
())
self
.
assertFalse
(
""
.
isidentifier
())
def
test_contains
(
self
):
# Testing Unicode contains method
self
.
assert_
(
'a'
in
'abdb'
)
...
...
Misc/NEWS
View file @
47383403
...
...
@@ -26,6 +26,8 @@ TO DO
Core and Builtins
-----------------
- PEP 3131: Support non-ASCII identifiers.
- PEP 3120: Change default encoding to UTF-8.
- PEP 3123: Use proper C inheritance for PyObject.
...
...
Objects/unicodeobject.c
View file @
47383403
...
...
@@ -227,7 +227,8 @@ int unicode_resize(register PyUnicodeObject *unicode,
}
/* We allocate one more byte to make sure the string is
Ux0000 terminated -- XXX is this needed ?
Ux0000 terminated; some code (e.g. new_identifier)
relies on that.
XXX This allocator could further be enhanced by assuring that the
free list never reduces its size below 1.
...
...
@@ -6679,6 +6680,47 @@ unicode_isnumeric(PyUnicodeObject *self)
return
PyBool_FromLong
(
1
);
}
int
PyUnicode_IsIdentifier
(
PyObject
*
self
)
{
register
const
Py_UNICODE
*
p
=
PyUnicode_AS_UNICODE
((
PyUnicodeObject
*
)
self
);
register
const
Py_UNICODE
*
e
;
/* Special case for empty strings */
if
(
PyUnicode_GET_SIZE
(
self
)
==
0
)
return
0
;
/* PEP 3131 says that the first character must be in
XID_Start and subsequent characters in XID_Continue,
and for the ASCII range, the 2.x rules apply (i.e
start with letters and underscore, continue with
letters, digits, underscore). However, given the current
definition of XID_Start and XID_Continue, it is sufficient
to check just for these, except that _ must be allowed
as starting an identifier. */
if
(
!
_PyUnicode_IsXidStart
(
*
p
)
&&
*
p
!=
0x5F
/* LOW LINE */
)
return
0
;
e
=
p
+
PyUnicode_GET_SIZE
(
self
);
for
(
p
++
;
p
<
e
;
p
++
)
{
if
(
!
_PyUnicode_IsXidContinue
(
*
p
))
return
0
;
}
return
1
;
}
PyDoc_STRVAR
(
isidentifier__doc__
,
"S.isidentifier() -> bool
\n
\
\n
\
Return True if S is a valid identifier according
\n
\
to the language definition."
);
static
PyObject
*
unicode_isidentifier
(
PyObject
*
self
)
{
return
PyBool_FromLong
(
PyUnicode_IsIdentifier
(
self
));
}
PyDoc_STRVAR
(
join__doc__
,
"S.join(sequence) -> unicode
\n
\
\n
\
...
...
@@ -7714,6 +7756,7 @@ static PyMethodDef unicode_methods[] = {
{
"isnumeric"
,
(
PyCFunction
)
unicode_isnumeric
,
METH_NOARGS
,
isnumeric__doc__
},
{
"isalpha"
,
(
PyCFunction
)
unicode_isalpha
,
METH_NOARGS
,
isalpha__doc__
},
{
"isalnum"
,
(
PyCFunction
)
unicode_isalnum
,
METH_NOARGS
,
isalnum__doc__
},
{
"isidentifier"
,
(
PyCFunction
)
unicode_isidentifier
,
METH_NOARGS
,
isidentifier__doc__
},
{
"zfill"
,
(
PyCFunction
)
unicode_zfill
,
METH_VARARGS
,
zfill__doc__
},
#if 0
{"capwords", (PyCFunction) unicode_capwords, METH_NOARGS, capwords__doc__},
...
...
Parser/tokenizer.c
View file @
47383403
...
...
@@ -21,13 +21,15 @@
#define is_potential_identifier_start(c) (\
(c >= 'a' && c <= 'z')\
|| (c >= 'A' && c <= 'Z')\
|| c == '_')
|| c == '_'\
|| (c >= 128))
#define is_potential_identifier_char(c) (\
(c >= 'a' && c <= 'z')\
|| (c >= 'A' && c <= 'Z')\
|| (c >= '0' && c <= '9')\
|| c == '_')
|| c == '_'\
|| (c >= 128))
extern
char
*
PyOS_Readline
(
FILE
*
,
FILE
*
,
char
*
);
/* Return malloc'ed string including trailing \n;
...
...
@@ -1070,6 +1072,19 @@ indenterror(struct tok_state *tok)
return
0
;
}
#ifdef PGEN
#define verify_identifier(s,e) 1
#else
/* Verify that the identifier follows PEP 3131. */
static
int
verify_identifier
(
char
*
start
,
char
*
end
)
{
PyObject
*
s
=
PyUnicode_DecodeUTF8
(
start
,
end
-
start
,
NULL
);
int
result
=
PyUnicode_IsIdentifier
(
s
);
Py_DECREF
(
s
);
return
result
;
}
#endif
/* Get next token, after space stripping etc. */
...
...
@@ -1077,7 +1092,7 @@ static int
tok_get
(
register
struct
tok_state
*
tok
,
char
**
p_start
,
char
**
p_end
)
{
register
int
c
;
int
blankline
;
int
blankline
,
nonascii
;
*
p_start
=
*
p_end
=
NULL
;
nextline:
...
...
@@ -1195,6 +1210,7 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
}
/* Identifier (most frequent token!) */
nonascii
=
0
;
if
(
is_potential_identifier_start
(
c
))
{
/* Process r"", u"" and ur"" */
switch
(
c
)
{
...
...
@@ -1214,9 +1230,16 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
break
;
}
while
(
is_potential_identifier_char
(
c
))
{
if
(
c
>=
128
)
nonascii
=
1
;
c
=
tok_nextc
(
tok
);
}
tok_backup
(
tok
,
c
);
if
(
nonascii
&&
!
verify_identifier
(
tok
->
start
,
tok
->
cur
))
{
tok
->
done
=
E_IDENTIFIER
;
return
ERRORTOKEN
;
}
*
p_start
=
tok
->
start
;
*
p_end
=
tok
->
cur
;
return
NAME
;
...
...
Python/ast.c
View file @
47383403
...
...
@@ -47,8 +47,27 @@ static PyObject *parsestrplus(struct compiling *, const node *n,
#define COMP_SETCOMP 2
static
identifier
new_identifier
(
const
char
*
n
,
PyArena
*
arena
)
{
new_identifier
(
const
char
*
n
,
PyArena
*
arena
)
{
PyObject
*
id
=
PyUnicode_DecodeUTF8
(
n
,
strlen
(
n
),
NULL
);
Py_UNICODE
*
u
=
PyUnicode_AS_UNICODE
(
id
);
/* Check whether there are non-ASCII characters in the
identifier; if so, normalize to NFKC. */
for
(;
*
u
;
u
++
)
{
if
(
*
u
>=
128
)
{
PyObject
*
m
=
PyImport_ImportModule
(
"unicodedata"
);
PyObject
*
id2
;
if
(
!
m
)
return
NULL
;
id2
=
PyObject_CallMethod
(
m
,
"normalize"
,
"sO"
,
"NFKC"
,
id
);
Py_DECREF
(
m
);
if
(
!
id2
)
return
NULL
;
Py_DECREF
(
id
);
id
=
id2
;
break
;
}
}
PyUnicode_InternInPlace
(
&
id
);
PyArena_AddPyObject
(
arena
,
id
);
return
id
;
...
...
Python/pythonrun.c
View file @
47383403
...
...
@@ -1530,6 +1530,10 @@ err_input(perrdetail *err)
case
E_LINECONT
:
msg
=
"unexpected character after line continuation character"
;
break
;
case
E_IDENTIFIER
:
msg
=
"invalid character in identifier"
;
break
;
default:
fprintf
(
stderr
,
"error=%d
\n
"
,
err
->
error
);
msg
=
"unknown parsing error"
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment