Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
ad446d57
Commit
ad446d57
authored
Nov 10, 2014
by
Serhiy Storchaka
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Issue #22578: Added attributes to the re.error class.
parent
eb99e515
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
174 additions
and
62 deletions
+174
-62
Doc/library/re.rst
Doc/library/re.rst
+25
-2
Lib/sre_constants.py
Lib/sre_constants.py
+29
-1
Lib/sre_parse.py
Lib/sre_parse.py
+82
-59
Lib/test/test_re.py
Lib/test/test_re.py
+36
-0
Misc/NEWS
Misc/NEWS
+2
-0
No files found.
Doc/library/re.rst
View file @
ad446d57
...
@@ -733,13 +733,36 @@ form.
...
@@ -733,13 +733,36 @@ form.
Clear the regular expression cache.
Clear the regular expression cache.
.. exception:: error
.. exception:: error
(msg, pattern=None, pos=None)
Exception raised when a string passed to one of the functions here is not a
Exception raised when a string passed to one of the functions here is not a
valid regular expression (for example, it might contain unmatched parentheses)
valid regular expression (for example, it might contain unmatched parentheses)
or when some other error occurs during compilation or matching. It is never an
or when some other error occurs during compilation or matching. It is never an
error if a string contains no match for a pattern.
error if a string contains no match for a pattern. The error instance has
the following additional attributes:
.. attribute:: msg
The unformatted error message.
.. attribute:: pattern
The regular expression pattern.
.. attribute:: pos
The index of *pattern* where compilation failed.
.. attribute:: lineno
The line corresponding to *pos*.
.. attribute:: colno
The column corresponding to *pos*.
.. versionchanged:: 3.5
Added additional attributes.
.. _re-objects:
.. _re-objects:
...
...
Lib/sre_constants.py
View file @
ad446d57
...
@@ -21,7 +21,35 @@ from _sre import MAXREPEAT, MAXGROUPS
...
@@ -21,7 +21,35 @@ from _sre import MAXREPEAT, MAXGROUPS
# should this really be here?
# should this really be here?
class
error
(
Exception
):
class
error
(
Exception
):
pass
def
__init__
(
self
,
msg
,
pattern
=
None
,
pos
=
None
):
self
.
msg
=
msg
self
.
pattern
=
pattern
self
.
pos
=
pos
if
pattern
is
not
None
and
pos
is
not
None
:
msg
=
'%s at position %d'
%
(
msg
,
pos
)
if
isinstance
(
pattern
,
str
):
newline
=
'
\
n
'
else
:
newline
=
b'
\
n
'
self
.
lineno
=
pattern
.
count
(
newline
,
0
,
pos
)
+
1
self
.
colno
=
pos
-
pattern
.
rfind
(
newline
,
0
,
pos
)
if
newline
in
pattern
:
msg
=
'%s (line %d, column %d)'
%
(
msg
,
self
.
lineno
,
self
.
colno
)
else
:
self
.
lineno
=
self
.
colno
=
None
super
().
__init__
(
msg
)
def
linecol
(
doc
,
pos
):
if
isinstance
(
pattern
,
str
):
newline
=
'
\
n
'
else
:
newline
=
b'
\
n
'
lineno
=
pattern
.
count
(
newline
,
0
,
pos
)
+
1
if
lineno
==
1
:
colno
=
pos
+
1
else
:
colno
=
pos
-
doc
.
rindex
(
newline
,
0
,
pos
)
return
lineno
,
colno
class
_NamedIntConstant
(
int
):
class
_NamedIntConstant
(
int
):
...
...
Lib/sre_parse.py
View file @
ad446d57
...
@@ -81,8 +81,8 @@ class Pattern:
...
@@ -81,8 +81,8 @@ class Pattern:
if name is not None:
if name is not None:
ogid = self.groupdict.get(name, None)
ogid = self.groupdict.get(name, None)
if ogid is not None:
if ogid is not None:
raise error("
redefinition
of
group
name
%
s
as
group
%
d
;
"
raise error("
redefinition
of
group
name
%
r
as
group
%
d
;
"
"
was
group
%
d
" % (
repr(name)
, gid, ogid))
"
was
group
%
d
" % (
name
, gid, ogid))
self.groupdict[name] = gid
self.groupdict[name] = gid
return gid
return gid
def closegroup(self, gid, p):
def closegroup(self, gid, p):
...
@@ -206,24 +206,25 @@ class SubPattern:
...
@@ -206,24 +206,25 @@ class SubPattern:
class Tokenizer:
class Tokenizer:
def __init__(self, string):
def __init__(self, string):
self.istext = isinstance(string, str)
self.istext = isinstance(string, str)
self.string = string
if not self.istext:
if not self.istext:
string = str(string, 'latin1')
string = str(string, 'latin1')
self.string = string
self.
decoded_
string = string
self.index = 0
self.index = 0
self.__next()
self.__next()
def __next(self):
def __next(self):
index = self.index
index = self.index
try:
try:
char = self.string[index]
char = self.
decoded_
string[index]
except IndexError:
except IndexError:
self.next = None
self.next = None
return
return
if char == "
\\
":
if char == "
\\
":
index += 1
index += 1
try:
try:
char += self.string[index]
char += self.
decoded_
string[index]
except IndexError:
except IndexError:
raise
error("
bogus
escape
(
end
of
line
)
")
raise
self.error("
bogus
escape
(
end
of
line
)
") from None
self.index = index + 1
self.index = index + 1
self.next = char
self.next = char
def match(self, char):
def match(self, char):
...
@@ -250,15 +251,19 @@ class Tokenizer:
...
@@ -250,15 +251,19 @@ class Tokenizer:
c = self.next
c = self.next
self.__next()
self.__next()
if c is None:
if c is None:
raise error("
unterminated
name
")
raise
self.
error("
unterminated
name
")
if c == terminator:
if c == terminator:
break
break
result += c
result += c
return result
return result
def tell(self):
def tell(self):
return self.index
, self.next
return self.index
- len(self.next or '')
def seek(self, index):
def seek(self, index):
self.index, self.next = index
self.index = index
self.__next()
def error(self, msg, offset=0):
return error(msg, self.string, self.tell() - offset)
# The following three functions are not used in this module anymore, but we keep
# The following three functions are not used in this module anymore, but we keep
# them here (with DeprecationWarnings) for backwards compatibility.
# them here (with DeprecationWarnings) for backwards compatibility.
...
@@ -322,8 +327,8 @@ def _class_escape(source, escape):
...
@@ -322,8 +327,8 @@ def _class_escape(source, escape):
escape += source.getwhile(2, OCTDIGITS)
escape += source.getwhile(2, OCTDIGITS)
c = int(escape[1:], 8)
c = int(escape[1:], 8)
if c > 0o377:
if c > 0o377:
raise error('octal escape value %r outside of '
raise
source.
error('octal escape value %r outside of '
'range 0-0o377' % escape
)
'range 0-0o377' % escape, len(escape)
)
return LITERAL, c
return LITERAL, c
elif c in DIGITS:
elif c in DIGITS:
raise ValueError
raise ValueError
...
@@ -331,7 +336,7 @@ def _class_escape(source, escape):
...
@@ -331,7 +336,7 @@ def _class_escape(source, escape):
return LITERAL, ord(escape[1])
return LITERAL, ord(escape[1])
except ValueError:
except ValueError:
pass
pass
raise
error("
bogus
escape
:
%
s
" % repr
(escape))
raise
source.error("
bogus
escape
:
%
r" % escape, len
(escape))
def _escape(source, escape, state):
def _escape(source, escape, state):
# handle escape code in expression
# handle escape code in expression
...
@@ -377,21 +382,23 @@ def _escape(source, escape, state):
...
@@ -377,21 +382,23 @@ def _escape(source, escape, state):
escape += source.get()
escape += source.get()
c = int(escape[1:], 8)
c = int(escape[1:], 8)
if c > 0o377:
if c > 0o377:
raise error('octal escape value %r outside of '
raise source.error('octal escape value %r outside of '
'range 0-0o377' % escape)
'range 0-0o377' % escape,
len(escape))
return LITERAL, c
return LITERAL, c
# not an octal escape, so this is a group reference
# not an octal escape, so this is a group reference
group = int(escape[1:])
group = int(escape[1:])
if group < state.groups:
if group < state.groups:
if not state.checkgroup(group):
if not state.checkgroup(group):
raise error("
cannot
refer
to
open
group
")
raise source.error("
cannot
refer
to
open
group
",
len(escape))
return GROUPREF, group
return GROUPREF, group
raise ValueError
raise ValueError
if len(escape) == 2:
if len(escape) == 2:
return LITERAL, ord(escape[1])
return LITERAL, ord(escape[1])
except ValueError:
except ValueError:
pass
pass
raise
error("
bogus
escape
:
%
s
" % repr
(escape))
raise
source.error("
bogus
escape
:
%
r" % escape, len
(escape))
def _parse_sub(source, state, nested=True):
def _parse_sub(source, state, nested=True):
# parse an alternation: a|b|c
# parse an alternation: a|b|c
...
@@ -404,7 +411,7 @@ def _parse_sub(source, state, nested=True):
...
@@ -404,7 +411,7 @@ def _parse_sub(source, state, nested=True):
if not sourcematch("
|
"):
if not sourcematch("
|
"):
break
break
if nested and source.next is not None and source.next != "
)
":
if nested and source.next is not None and source.next != "
)
":
raise error("
pattern
not
properly
closed
")
raise
source.
error("
pattern
not
properly
closed
")
if len(items) == 1:
if len(items) == 1:
return items[0]
return items[0]
...
@@ -449,11 +456,11 @@ def _parse_sub_cond(source, state, condgroup):
...
@@ -449,11 +456,11 @@ def _parse_sub_cond(source, state, condgroup):
if source.match("
|
"):
if source.match("
|
"):
item_no = _parse(source, state)
item_no = _parse(source, state)
if source.next == "
|
":
if source.next == "
|
":
raise error("
conditional
backref
with
more
than
two
branches
")
raise
source.
error("
conditional
backref
with
more
than
two
branches
")
else:
else:
item_no = None
item_no = None
if source.next is not None and source.next != "
)
":
if source.next is not None and source.next != "
)
":
raise error("
pattern
not
properly
closed
")
raise
source.
error("
pattern
not
properly
closed
")
subpattern = SubPattern(state)
subpattern = SubPattern(state)
subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no)))
subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no)))
return subpattern
return subpattern
...
@@ -510,7 +517,7 @@ def _parse(source, state):
...
@@ -510,7 +517,7 @@ def _parse(source, state):
while
True
:
while
True
:
this
=
sourceget
()
this
=
sourceget
()
if
this
is
None
:
if
this
is
None
:
raise
error
(
"unexpected end of regular expression"
)
raise
source
.
error
(
"unexpected end of regular expression"
)
if
this
==
"]"
and
set
!=
start
:
if
this
==
"]"
and
set
!=
start
:
break
break
elif
this
[
0
]
==
"
\
\
"
:
elif
this
[
0
]
==
"
\
\
"
:
...
@@ -521,7 +528,7 @@ def _parse(source, state):
...
@@ -521,7 +528,7 @@ def _parse(source, state):
# potential range
# potential range
this
=
sourceget
()
this
=
sourceget
()
if
this
is
None
:
if
this
is
None
:
raise
error
(
"unexpected end of regular expression"
)
raise
source
.
error
(
"unexpected end of regular expression"
)
if
this
==
"]"
:
if
this
==
"]"
:
if
code1
[
0
]
is
IN
:
if
code1
[
0
]
is
IN
:
code1
=
code1
[
1
][
0
]
code1
=
code1
[
1
][
0
]
...
@@ -533,11 +540,11 @@ def _parse(source, state):
...
@@ -533,11 +540,11 @@ def _parse(source, state):
else
:
else
:
code2
=
LITERAL
,
_ord
(
this
)
code2
=
LITERAL
,
_ord
(
this
)
if
code1
[
0
]
!=
LITERAL
or
code2
[
0
]
!=
LITERAL
:
if
code1
[
0
]
!=
LITERAL
or
code2
[
0
]
!=
LITERAL
:
raise
error
(
"bad character range"
)
raise
source
.
error
(
"bad character range"
,
len
(
this
)
)
lo
=
code1
[
1
]
lo
=
code1
[
1
]
hi
=
code2
[
1
]
hi
=
code2
[
1
]
if
hi
<
lo
:
if
hi
<
lo
:
raise
error
(
"bad character range"
)
raise
source
.
error
(
"bad character range"
,
len
(
this
)
)
setappend
((
RANGE
,
(
lo
,
hi
)))
setappend
((
RANGE
,
(
lo
,
hi
)))
else
:
else
:
if
code1
[
0
]
is
IN
:
if
code1
[
0
]
is
IN
:
...
@@ -555,6 +562,7 @@ def _parse(source, state):
...
@@ -555,6 +562,7 @@ def _parse(source, state):
elif
this
in
REPEAT_CHARS
:
elif
this
in
REPEAT_CHARS
:
# repeat previous item
# repeat previous item
here
=
source
.
tell
()
if
this
==
"?"
:
if
this
==
"?"
:
min
,
max
=
0
,
1
min
,
max
=
0
,
1
elif
this
==
"*"
:
elif
this
==
"*"
:
...
@@ -566,7 +574,6 @@ def _parse(source, state):
...
@@ -566,7 +574,6 @@ def _parse(source, state):
if
source
.
next
==
"}"
:
if
source
.
next
==
"}"
:
subpatternappend
((
LITERAL
,
_ord
(
this
)))
subpatternappend
((
LITERAL
,
_ord
(
this
)))
continue
continue
here
=
source
.
tell
()
min
,
max
=
0
,
MAXREPEAT
min
,
max
=
0
,
MAXREPEAT
lo
=
hi
=
""
lo
=
hi
=
""
while
source
.
next
in
DIGITS
:
while
source
.
next
in
DIGITS
:
...
@@ -589,18 +596,21 @@ def _parse(source, state):
...
@@ -589,18 +596,21 @@ def _parse(source, state):
if
max
>=
MAXREPEAT
:
if
max
>=
MAXREPEAT
:
raise
OverflowError
(
"the repetition number is too large"
)
raise
OverflowError
(
"the repetition number is too large"
)
if
max
<
min
:
if
max
<
min
:
raise
error
(
"bad repeat interval"
)
raise
source
.
error
(
"bad repeat interval"
,
source
.
tell
()
-
here
)
else
:
else
:
raise
error
(
"not supported"
)
raise
source
.
error
(
"not supported"
,
len
(
this
)
)
# figure out which item to repeat
# figure out which item to repeat
if
subpattern
:
if
subpattern
:
item
=
subpattern
[
-
1
:]
item
=
subpattern
[
-
1
:]
else
:
else
:
item
=
None
item
=
None
if
not
item
or
(
_len
(
item
)
==
1
and
item
[
0
][
0
]
==
AT
):
if
not
item
or
(
_len
(
item
)
==
1
and
item
[
0
][
0
]
==
AT
):
raise
error
(
"nothing to repeat"
)
raise
source
.
error
(
"nothing to repeat"
,
source
.
tell
()
-
here
+
len
(
this
))
if
item
[
0
][
0
]
in
_REPEATCODES
:
if
item
[
0
][
0
]
in
_REPEATCODES
:
raise
error
(
"multiple repeat"
)
raise
source
.
error
(
"multiple repeat"
,
source
.
tell
()
-
here
+
len
(
this
))
if
sourcematch
(
"?"
):
if
sourcematch
(
"?"
):
subpattern
[
-
1
]
=
(
MIN_REPEAT
,
(
min
,
max
,
item
))
subpattern
[
-
1
]
=
(
MIN_REPEAT
,
(
min
,
max
,
item
))
else
:
else
:
...
@@ -618,7 +628,7 @@ def _parse(source, state):
...
@@ -618,7 +628,7 @@ def _parse(source, state):
# options
# options
char
=
sourceget
()
char
=
sourceget
()
if
char
is
None
:
if
char
is
None
:
raise
error
(
"unexpected end of pattern"
)
raise
self
.
error
(
"unexpected end of pattern"
)
if
char
==
"P"
:
if
char
==
"P"
:
# python extensions
# python extensions
if
sourcematch
(
"<"
):
if
sourcematch
(
"<"
):
...
@@ -626,28 +636,32 @@ def _parse(source, state):
...
@@ -626,28 +636,32 @@ def _parse(source, state):
name
=
source
.
getuntil
(
">"
)
name
=
source
.
getuntil
(
">"
)
group
=
1
group
=
1
if
not
name
:
if
not
name
:
raise
error
(
"missing group name"
)
raise
source
.
error
(
"missing group name"
,
1
)
if
not
name
.
isidentifier
():
if
not
name
.
isidentifier
():
raise
error
(
"bad character in group name %r"
%
name
)
raise
source
.
error
(
"bad character in group name "
"%r"
%
name
,
len
(
name
)
+
1
)
elif
sourcematch
(
"="
):
elif
sourcematch
(
"="
):
# named backreference
# named backreference
name
=
source
.
getuntil
(
")"
)
name
=
source
.
getuntil
(
")"
)
if
not
name
:
if
not
name
:
raise
error
(
"missing group name"
)
raise
source
.
error
(
"missing group name"
,
1
)
if
not
name
.
isidentifier
():
if
not
name
.
isidentifier
():
raise
error
(
"bad character in backref group name "
raise
source
.
error
(
"bad character in backref "
"%r"
%
name
)
"group name %r"
%
name
,
len
(
name
)
+
1
)
gid
=
state
.
groupdict
.
get
(
name
)
gid
=
state
.
groupdict
.
get
(
name
)
if
gid
is
None
:
if
gid
is
None
:
msg
=
"unknown group name: {0!r}"
.
format
(
name
)
msg
=
"unknown group name: {0!r}"
.
format
(
name
)
raise
error
(
msg
)
raise
source
.
error
(
msg
,
len
(
name
)
+
1
)
subpatternappend
((
GROUPREF
,
gid
))
subpatternappend
((
GROUPREF
,
gid
))
continue
continue
else
:
else
:
char
=
sourceget
()
char
=
sourceget
()
if
char
is
None
:
if
char
is
None
:
raise
error
(
"unexpected end of pattern"
)
raise
source
.
error
(
"unexpected end of pattern"
)
raise
error
(
"unknown specifier: ?P%s"
%
char
)
raise
source
.
error
(
"unknown specifier: ?P%s"
%
char
,
len
(
char
))
elif
char
==
":"
:
elif
char
==
":"
:
# non-capturing group
# non-capturing group
group
=
2
group
=
2
...
@@ -655,7 +669,7 @@ def _parse(source, state):
...
@@ -655,7 +669,7 @@ def _parse(source, state):
# comment
# comment
while
True
:
while
True
:
if
source
.
next
is
None
:
if
source
.
next
is
None
:
raise
error
(
"unbalanced parenthesis"
)
raise
source
.
error
(
"unbalanced parenthesis"
)
if
sourceget
()
==
")"
:
if
sourceget
()
==
")"
:
break
break
continue
continue
...
@@ -665,11 +679,11 @@ def _parse(source, state):
...
@@ -665,11 +679,11 @@ def _parse(source, state):
if
char
==
"<"
:
if
char
==
"<"
:
char
=
sourceget
()
char
=
sourceget
()
if
char
is
None
or
char
not
in
"=!"
:
if
char
is
None
or
char
not
in
"=!"
:
raise
error
(
"syntax error"
)
raise
source
.
error
(
"syntax error"
)
dir
=
-
1
# lookbehind
dir
=
-
1
# lookbehind
p
=
_parse_sub
(
source
,
state
)
p
=
_parse_sub
(
source
,
state
)
if
not
sourcematch
(
")"
):
if
not
sourcematch
(
")"
):
raise
error
(
"unbalanced parenthesis"
)
raise
source
.
error
(
"unbalanced parenthesis"
)
if
char
==
"="
:
if
char
==
"="
:
subpatternappend
((
ASSERT
,
(
dir
,
p
)))
subpatternappend
((
ASSERT
,
(
dir
,
p
)))
else
:
else
:
...
@@ -680,23 +694,26 @@ def _parse(source, state):
...
@@ -680,23 +694,26 @@ def _parse(source, state):
condname
=
source
.
getuntil
(
")"
)
condname
=
source
.
getuntil
(
")"
)
group
=
2
group
=
2
if
not
condname
:
if
not
condname
:
raise
error
(
"missing group name"
)
raise
source
.
error
(
"missing group name"
,
1
)
if
condname
.
isidentifier
():
if
condname
.
isidentifier
():
condgroup
=
state
.
groupdict
.
get
(
condname
)
condgroup
=
state
.
groupdict
.
get
(
condname
)
if
condgroup
is
None
:
if
condgroup
is
None
:
msg
=
"unknown group name: {0!r}"
.
format
(
condname
)
msg
=
"unknown group name: {0!r}"
.
format
(
condname
)
raise
error
(
msg
)
raise
source
.
error
(
msg
,
len
(
condname
)
+
1
)
else
:
else
:
try
:
try
:
condgroup
=
int
(
condname
)
condgroup
=
int
(
condname
)
if
condgroup
<
0
:
if
condgroup
<
0
:
raise
ValueError
raise
ValueError
except
ValueError
:
except
ValueError
:
raise
error
(
"bad character in group name"
)
raise
source
.
error
(
"bad character in group name"
,
len
(
condname
)
+
1
)
if
not
condgroup
:
if
not
condgroup
:
raise
error
(
"bad group number"
)
raise
source
.
error
(
"bad group number"
,
len
(
condname
)
+
1
)
if
condgroup
>=
MAXGROUPS
:
if
condgroup
>=
MAXGROUPS
:
raise
error
(
"the group number is too large"
)
raise
source
.
error
(
"the group number is too large"
,
len
(
condname
)
+
1
)
elif
char
in
FLAGS
:
elif
char
in
FLAGS
:
# flags
# flags
state
.
flags
|=
FLAGS
[
char
]
state
.
flags
|=
FLAGS
[
char
]
...
@@ -704,20 +721,23 @@ def _parse(source, state):
...
@@ -704,20 +721,23 @@ def _parse(source, state):
state
.
flags
|=
FLAGS
[
sourceget
()]
state
.
flags
|=
FLAGS
[
sourceget
()]
verbose
=
state
.
flags
&
SRE_FLAG_VERBOSE
verbose
=
state
.
flags
&
SRE_FLAG_VERBOSE
else
:
else
:
raise
error
(
"unexpected end of pattern "
+
char
)
raise
source
.
error
(
"unexpected end of pattern"
)
if
group
:
if
group
:
# parse group contents
# parse group contents
if
group
==
2
:
if
group
==
2
:
# anonymous group
# anonymous group
group
=
None
group
=
None
else
:
else
:
try
:
group
=
state
.
opengroup
(
name
)
group
=
state
.
opengroup
(
name
)
except
error
as
err
:
raise
source
.
error
(
err
.
msg
,
len
(
name
)
+
1
)
if
condgroup
:
if
condgroup
:
p
=
_parse_sub_cond
(
source
,
state
,
condgroup
)
p
=
_parse_sub_cond
(
source
,
state
,
condgroup
)
else
:
else
:
p
=
_parse_sub
(
source
,
state
)
p
=
_parse_sub
(
source
,
state
)
if
not
sourcematch
(
")"
):
if
not
sourcematch
(
")"
):
raise
error
(
"unbalanced parenthesis"
)
raise
source
.
error
(
"unbalanced parenthesis"
)
if
group
is
not
None
:
if
group
is
not
None
:
state
.
closegroup
(
group
,
p
)
state
.
closegroup
(
group
,
p
)
subpatternappend
((
SUBPATTERN
,
(
group
,
p
)))
subpatternappend
((
SUBPATTERN
,
(
group
,
p
)))
...
@@ -725,10 +745,10 @@ def _parse(source, state):
...
@@ -725,10 +745,10 @@ def _parse(source, state):
while
True
:
while
True
:
char
=
sourceget
()
char
=
sourceget
()
if
char
is
None
:
if
char
is
None
:
raise
error
(
"unexpected end of pattern"
)
raise
source
.
error
(
"unexpected end of pattern"
)
if
char
==
")"
:
if
char
==
")"
:
break
break
raise
error
(
"unknown extension"
)
raise
source
.
error
(
"unknown extension"
,
len
(
char
)
)
elif
this
==
"^"
:
elif
this
==
"^"
:
subpatternappend
((
AT
,
AT_BEGINNING
))
subpatternappend
((
AT
,
AT_BEGINNING
))
...
@@ -737,7 +757,7 @@ def _parse(source, state):
...
@@ -737,7 +757,7 @@ def _parse(source, state):
subpattern
.
append
((
AT
,
AT_END
))
subpattern
.
append
((
AT
,
AT_END
))
else
:
else
:
raise
error
(
"parser error"
)
raise
source
.
error
(
"parser error"
,
len
(
this
)
)
return
subpattern
return
subpattern
...
@@ -768,9 +788,10 @@ def parse(str, flags=0, pattern=None):
...
@@ -768,9 +788,10 @@ def parse(str, flags=0, pattern=None):
if
source
.
next
is
not
None
:
if
source
.
next
is
not
None
:
if
source
.
next
==
")"
:
if
source
.
next
==
")"
:
raise
error
(
"unbalanced parenthesis"
)
raise
source
.
error
(
"unbalanced parenthesis"
)
else
:
else
:
raise
error
(
"bogus characters at end of regular expression"
)
raise
source
.
error
(
"bogus characters at end of regular expression"
,
len
(
tail
))
if
flags
&
SRE_FLAG_DEBUG
:
if
flags
&
SRE_FLAG_DEBUG
:
p
.
dump
()
p
.
dump
()
...
@@ -809,16 +830,18 @@ def parse_template(source, pattern):
...
@@ -809,16 +830,18 @@ def parse_template(source, pattern):
if
s
.
match
(
"<"
):
if
s
.
match
(
"<"
):
name
=
s
.
getuntil
(
">"
)
name
=
s
.
getuntil
(
">"
)
if
not
name
:
if
not
name
:
raise
error
(
"missing group name"
)
raise
s
.
error
(
"missing group name"
,
1
)
try
:
try
:
index
=
int
(
name
)
index
=
int
(
name
)
if
index
<
0
:
if
index
<
0
:
raise
error
(
"negative group number"
)
raise
s
.
error
(
"negative group number"
,
len
(
name
)
+
1
)
if
index
>=
MAXGROUPS
:
if
index
>=
MAXGROUPS
:
raise
error
(
"the group number is too large"
)
raise
s
.
error
(
"the group number is too large"
,
len
(
name
)
+
1
)
except
ValueError
:
except
ValueError
:
if
not
name
.
isidentifier
():
if
not
name
.
isidentifier
():
raise
error
(
"bad character in group name"
)
raise
s
.
error
(
"bad character in group name"
,
len
(
name
)
+
1
)
try
:
try
:
index
=
pattern
.
groupindex
[
name
]
index
=
pattern
.
groupindex
[
name
]
except
KeyError
:
except
KeyError
:
...
@@ -841,8 +864,8 @@ def parse_template(source, pattern):
...
@@ -841,8 +864,8 @@ def parse_template(source, pattern):
isoctal
=
True
isoctal
=
True
c
=
int
(
this
[
1
:],
8
)
c
=
int
(
this
[
1
:],
8
)
if
c
>
0o377
:
if
c
>
0o377
:
raise
error
(
'octal escape value %r outside of '
raise
s
.
error
(
'octal escape value %r outside of '
'range 0-0o377'
%
this
)
'range 0-0o377'
%
this
,
len
(
this
)
)
lappend
(
chr
(
c
))
lappend
(
chr
(
c
))
if
not
isoctal
:
if
not
isoctal
:
addgroup
(
int
(
this
[
1
:]))
addgroup
(
int
(
this
[
1
:]))
...
...
Lib/test/test_re.py
View file @
ad446d57
...
@@ -1419,6 +1419,42 @@ SUBPATTERN None
...
@@ -1419,6 +1419,42 @@ SUBPATTERN None
self.assertIsNone(re.match(b'(?Li)
\
xc5
', b'
\
xe5
'))
self.assertIsNone(re.match(b'(?Li)
\
xc5
', b'
\
xe5
'))
self.assertIsNone(re.match(b'(?Li)
\
xe5
', b'
\
xc5
'))
self.assertIsNone(re.match(b'(?Li)
\
xe5
', b'
\
xc5
'))
def test_error(self):
with self.assertRaises(re.error) as cm:
re.compile('(
\
u20ac
))')
err = cm.exception
self.assertIsInstance(err.pattern, str)
self.assertEqual(err.pattern, '(
\
u20ac
))')
self.assertEqual(err.pos, 3)
self.assertEqual(err.lineno, 1)
self.assertEqual(err.colno, 4)
self.assertIn(err.msg, str(err))
self.assertIn(' at position 3', str(err))
self.assertNotIn(' at position 3', err.msg)
# Bytes pattern
with self.assertRaises(re.error) as cm:
re.compile(b'(
\
xa4
))')
err = cm.exception
self.assertIsInstance(err.pattern, bytes)
self.assertEqual(err.pattern, b'(
\
xa4
))')
self.assertEqual(err.pos, 3)
# Multiline pattern
with self.assertRaises(re.error) as cm:
re.compile("""
(
abc
)
)
(
""", re.VERBOSE)
err = cm.exception
self.assertEqual(err.pos, 77)
self.assertEqual(err.lineno, 5)
self.assertEqual(err.colno, 17)
self.assertIn(err.msg, str(err))
self.assertIn(' at position 77', str(err))
self.assertIn('(line 5, column 17)', str(err))
class PatternReprTests(unittest.TestCase):
class PatternReprTests(unittest.TestCase):
def check(self, pattern, expected):
def check(self, pattern, expected):
...
...
Misc/NEWS
View file @
ad446d57
...
@@ -183,6 +183,8 @@ Core and Builtins
...
@@ -183,6 +183,8 @@ Core and Builtins
Library
Library
-------
-------
-
Issue
#
22578
:
Added
attributes
to
the
re
.
error
class
.
-
Issue
#
12728
:
Different
Unicode
characters
having
the
same
uppercase
but
-
Issue
#
12728
:
Different
Unicode
characters
having
the
same
uppercase
but
different
lowercase
are
now
matched
in
case
-
insensitive
regular
expressions
.
different
lowercase
are
now
matched
in
case
-
insensitive
regular
expressions
.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment