Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
ad446d57
Commit
ad446d57
authored
Nov 10, 2014
by
Serhiy Storchaka
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Issue #22578: Added attributes to the re.error class.
parent
eb99e515
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
174 additions
and
62 deletions
+174
-62
Doc/library/re.rst
Doc/library/re.rst
+25
-2
Lib/sre_constants.py
Lib/sre_constants.py
+29
-1
Lib/sre_parse.py
Lib/sre_parse.py
+82
-59
Lib/test/test_re.py
Lib/test/test_re.py
+36
-0
Misc/NEWS
Misc/NEWS
+2
-0
No files found.
Doc/library/re.rst
View file @
ad446d57
...
...
@@ -733,13 +733,36 @@ form.
Clear the regular expression cache.
.. exception:: error
.. exception:: error
(msg, pattern=None, pos=None)
Exception raised when a string passed to one of the functions here is not a
valid regular expression (for example, it might contain unmatched parentheses)
or when some other error occurs during compilation or matching. It is never an
error if a string contains no match for a pattern.
error if a string contains no match for a pattern. The error instance has
the following additional attributes:
.. attribute:: msg
The unformatted error message.
.. attribute:: pattern
The regular expression pattern.
.. attribute:: pos
The index of *pattern* where compilation failed.
.. attribute:: lineno
The line corresponding to *pos*.
.. attribute:: colno
The column corresponding to *pos*.
.. versionchanged:: 3.5
Added additional attributes.
.. _re-objects:
...
...
Lib/sre_constants.py
View file @
ad446d57
...
...
@@ -21,7 +21,35 @@ from _sre import MAXREPEAT, MAXGROUPS
# should this really be here?
class
error
(
Exception
):
pass
def
__init__
(
self
,
msg
,
pattern
=
None
,
pos
=
None
):
self
.
msg
=
msg
self
.
pattern
=
pattern
self
.
pos
=
pos
if
pattern
is
not
None
and
pos
is
not
None
:
msg
=
'%s at position %d'
%
(
msg
,
pos
)
if
isinstance
(
pattern
,
str
):
newline
=
'
\
n
'
else
:
newline
=
b'
\
n
'
self
.
lineno
=
pattern
.
count
(
newline
,
0
,
pos
)
+
1
self
.
colno
=
pos
-
pattern
.
rfind
(
newline
,
0
,
pos
)
if
newline
in
pattern
:
msg
=
'%s (line %d, column %d)'
%
(
msg
,
self
.
lineno
,
self
.
colno
)
else
:
self
.
lineno
=
self
.
colno
=
None
super
().
__init__
(
msg
)
def
linecol
(
doc
,
pos
):
if
isinstance
(
pattern
,
str
):
newline
=
'
\
n
'
else
:
newline
=
b'
\
n
'
lineno
=
pattern
.
count
(
newline
,
0
,
pos
)
+
1
if
lineno
==
1
:
colno
=
pos
+
1
else
:
colno
=
pos
-
doc
.
rindex
(
newline
,
0
,
pos
)
return
lineno
,
colno
class
_NamedIntConstant
(
int
):
...
...
Lib/sre_parse.py
View file @
ad446d57
...
...
@@ -81,8 +81,8 @@ class Pattern:
if name is not None:
ogid = self.groupdict.get(name, None)
if ogid is not None:
raise error("
redefinition
of
group
name
%
s
as
group
%
d
;
"
"
was
group
%
d
" % (
repr(name)
, gid, ogid))
raise error("
redefinition
of
group
name
%
r
as
group
%
d
;
"
"
was
group
%
d
" % (
name
, gid, ogid))
self.groupdict[name] = gid
return gid
def closegroup(self, gid, p):
...
...
@@ -206,24 +206,25 @@ class SubPattern:
class Tokenizer:
def __init__(self, string):
self.istext = isinstance(string, str)
self.string = string
if not self.istext:
string = str(string, 'latin1')
self.string = string
self.
decoded_
string = string
self.index = 0
self.__next()
def __next(self):
index = self.index
try:
char = self.string[index]
char = self.
decoded_
string[index]
except IndexError:
self.next = None
return
if char == "
\\
":
index += 1
try:
char += self.string[index]
char += self.
decoded_
string[index]
except IndexError:
raise
error("
bogus
escape
(
end
of
line
)
")
raise
self.error("
bogus
escape
(
end
of
line
)
") from None
self.index = index + 1
self.next = char
def match(self, char):
...
...
@@ -250,15 +251,19 @@ class Tokenizer:
c = self.next
self.__next()
if c is None:
raise error("
unterminated
name
")
raise
self.
error("
unterminated
name
")
if c == terminator:
break
result += c
return result
def tell(self):
return self.index
, self.next
return self.index
- len(self.next or '')
def seek(self, index):
self.index, self.next = index
self.index = index
self.__next()
def error(self, msg, offset=0):
return error(msg, self.string, self.tell() - offset)
# The following three functions are not used in this module anymore, but we keep
# them here (with DeprecationWarnings) for backwards compatibility.
...
...
@@ -322,8 +327,8 @@ def _class_escape(source, escape):
escape += source.getwhile(2, OCTDIGITS)
c = int(escape[1:], 8)
if c > 0o377:
raise error('octal escape value %r outside of '
'range 0-0o377' % escape
)
raise
source.
error('octal escape value %r outside of '
'range 0-0o377' % escape, len(escape)
)
return LITERAL, c
elif c in DIGITS:
raise ValueError
...
...
@@ -331,7 +336,7 @@ def _class_escape(source, escape):
return LITERAL, ord(escape[1])
except ValueError:
pass
raise
error("
bogus
escape
:
%
s
" % repr
(escape))
raise
source.error("
bogus
escape
:
%
r" % escape, len
(escape))
def _escape(source, escape, state):
# handle escape code in expression
...
...
@@ -377,21 +382,23 @@ def _escape(source, escape, state):
escape += source.get()
c = int(escape[1:], 8)
if c > 0o377:
raise error('octal escape value %r outside of '
'range 0-0o377' % escape)
raise source.error('octal escape value %r outside of '
'range 0-0o377' % escape,
len(escape))
return LITERAL, c
# not an octal escape, so this is a group reference
group = int(escape[1:])
if group < state.groups:
if not state.checkgroup(group):
raise error("
cannot
refer
to
open
group
")
raise source.error("
cannot
refer
to
open
group
",
len(escape))
return GROUPREF, group
raise ValueError
if len(escape) == 2:
return LITERAL, ord(escape[1])
except ValueError:
pass
raise
error("
bogus
escape
:
%
s
" % repr
(escape))
raise
source.error("
bogus
escape
:
%
r" % escape, len
(escape))
def _parse_sub(source, state, nested=True):
# parse an alternation: a|b|c
...
...
@@ -404,7 +411,7 @@ def _parse_sub(source, state, nested=True):
if not sourcematch("
|
"):
break
if nested and source.next is not None and source.next != "
)
":
raise error("
pattern
not
properly
closed
")
raise
source.
error("
pattern
not
properly
closed
")
if len(items) == 1:
return items[0]
...
...
@@ -449,11 +456,11 @@ def _parse_sub_cond(source, state, condgroup):
if source.match("
|
"):
item_no = _parse(source, state)
if source.next == "
|
":
raise error("
conditional
backref
with
more
than
two
branches
")
raise
source.
error("
conditional
backref
with
more
than
two
branches
")
else:
item_no = None
if source.next is not None and source.next != "
)
":
raise error("
pattern
not
properly
closed
")
raise
source.
error("
pattern
not
properly
closed
")
subpattern = SubPattern(state)
subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no)))
return subpattern
...
...
@@ -510,7 +517,7 @@ def _parse(source, state):
while
True
:
this
=
sourceget
()
if
this
is
None
:
raise
error
(
"unexpected end of regular expression"
)
raise
source
.
error
(
"unexpected end of regular expression"
)
if
this
==
"]"
and
set
!=
start
:
break
elif
this
[
0
]
==
"
\
\
"
:
...
...
@@ -521,7 +528,7 @@ def _parse(source, state):
# potential range
this
=
sourceget
()
if
this
is
None
:
raise
error
(
"unexpected end of regular expression"
)
raise
source
.
error
(
"unexpected end of regular expression"
)
if
this
==
"]"
:
if
code1
[
0
]
is
IN
:
code1
=
code1
[
1
][
0
]
...
...
@@ -533,11 +540,11 @@ def _parse(source, state):
else
:
code2
=
LITERAL
,
_ord
(
this
)
if
code1
[
0
]
!=
LITERAL
or
code2
[
0
]
!=
LITERAL
:
raise
error
(
"bad character range"
)
raise
source
.
error
(
"bad character range"
,
len
(
this
)
)
lo
=
code1
[
1
]
hi
=
code2
[
1
]
if
hi
<
lo
:
raise
error
(
"bad character range"
)
raise
source
.
error
(
"bad character range"
,
len
(
this
)
)
setappend
((
RANGE
,
(
lo
,
hi
)))
else
:
if
code1
[
0
]
is
IN
:
...
...
@@ -555,6 +562,7 @@ def _parse(source, state):
elif
this
in
REPEAT_CHARS
:
# repeat previous item
here
=
source
.
tell
()
if
this
==
"?"
:
min
,
max
=
0
,
1
elif
this
==
"*"
:
...
...
@@ -566,7 +574,6 @@ def _parse(source, state):
if
source
.
next
==
"}"
:
subpatternappend
((
LITERAL
,
_ord
(
this
)))
continue
here
=
source
.
tell
()
min
,
max
=
0
,
MAXREPEAT
lo
=
hi
=
""
while
source
.
next
in
DIGITS
:
...
...
@@ -589,18 +596,21 @@ def _parse(source, state):
if
max
>=
MAXREPEAT
:
raise
OverflowError
(
"the repetition number is too large"
)
if
max
<
min
:
raise
error
(
"bad repeat interval"
)
raise
source
.
error
(
"bad repeat interval"
,
source
.
tell
()
-
here
)
else
:
raise
error
(
"not supported"
)
raise
source
.
error
(
"not supported"
,
len
(
this
)
)
# figure out which item to repeat
if
subpattern
:
item
=
subpattern
[
-
1
:]
else
:
item
=
None
if
not
item
or
(
_len
(
item
)
==
1
and
item
[
0
][
0
]
==
AT
):
raise
error
(
"nothing to repeat"
)
raise
source
.
error
(
"nothing to repeat"
,
source
.
tell
()
-
here
+
len
(
this
))
if
item
[
0
][
0
]
in
_REPEATCODES
:
raise
error
(
"multiple repeat"
)
raise
source
.
error
(
"multiple repeat"
,
source
.
tell
()
-
here
+
len
(
this
))
if
sourcematch
(
"?"
):
subpattern
[
-
1
]
=
(
MIN_REPEAT
,
(
min
,
max
,
item
))
else
:
...
...
@@ -618,7 +628,7 @@ def _parse(source, state):
# options
char
=
sourceget
()
if
char
is
None
:
raise
error
(
"unexpected end of pattern"
)
raise
self
.
error
(
"unexpected end of pattern"
)
if
char
==
"P"
:
# python extensions
if
sourcematch
(
"<"
):
...
...
@@ -626,28 +636,32 @@ def _parse(source, state):
name
=
source
.
getuntil
(
">"
)
group
=
1
if
not
name
:
raise
error
(
"missing group name"
)
raise
source
.
error
(
"missing group name"
,
1
)
if
not
name
.
isidentifier
():
raise
error
(
"bad character in group name %r"
%
name
)
raise
source
.
error
(
"bad character in group name "
"%r"
%
name
,
len
(
name
)
+
1
)
elif
sourcematch
(
"="
):
# named backreference
name
=
source
.
getuntil
(
")"
)
if
not
name
:
raise
error
(
"missing group name"
)
raise
source
.
error
(
"missing group name"
,
1
)
if
not
name
.
isidentifier
():
raise
error
(
"bad character in backref group name "
"%r"
%
name
)
raise
source
.
error
(
"bad character in backref "
"group name %r"
%
name
,
len
(
name
)
+
1
)
gid
=
state
.
groupdict
.
get
(
name
)
if
gid
is
None
:
msg
=
"unknown group name: {0!r}"
.
format
(
name
)
raise
error
(
msg
)
raise
source
.
error
(
msg
,
len
(
name
)
+
1
)
subpatternappend
((
GROUPREF
,
gid
))
continue
else
:
char
=
sourceget
()
if
char
is
None
:
raise
error
(
"unexpected end of pattern"
)
raise
error
(
"unknown specifier: ?P%s"
%
char
)
raise
source
.
error
(
"unexpected end of pattern"
)
raise
source
.
error
(
"unknown specifier: ?P%s"
%
char
,
len
(
char
))
elif
char
==
":"
:
# non-capturing group
group
=
2
...
...
@@ -655,7 +669,7 @@ def _parse(source, state):
# comment
while
True
:
if
source
.
next
is
None
:
raise
error
(
"unbalanced parenthesis"
)
raise
source
.
error
(
"unbalanced parenthesis"
)
if
sourceget
()
==
")"
:
break
continue
...
...
@@ -665,11 +679,11 @@ def _parse(source, state):
if
char
==
"<"
:
char
=
sourceget
()
if
char
is
None
or
char
not
in
"=!"
:
raise
error
(
"syntax error"
)
raise
source
.
error
(
"syntax error"
)
dir
=
-
1
# lookbehind
p
=
_parse_sub
(
source
,
state
)
if
not
sourcematch
(
")"
):
raise
error
(
"unbalanced parenthesis"
)
raise
source
.
error
(
"unbalanced parenthesis"
)
if
char
==
"="
:
subpatternappend
((
ASSERT
,
(
dir
,
p
)))
else
:
...
...
@@ -680,23 +694,26 @@ def _parse(source, state):
condname
=
source
.
getuntil
(
")"
)
group
=
2
if
not
condname
:
raise
error
(
"missing group name"
)
raise
source
.
error
(
"missing group name"
,
1
)
if
condname
.
isidentifier
():
condgroup
=
state
.
groupdict
.
get
(
condname
)
if
condgroup
is
None
:
msg
=
"unknown group name: {0!r}"
.
format
(
condname
)
raise
error
(
msg
)
raise
source
.
error
(
msg
,
len
(
condname
)
+
1
)
else
:
try
:
condgroup
=
int
(
condname
)
if
condgroup
<
0
:
raise
ValueError
except
ValueError
:
raise
error
(
"bad character in group name"
)
raise
source
.
error
(
"bad character in group name"
,
len
(
condname
)
+
1
)
if
not
condgroup
:
raise
error
(
"bad group number"
)
raise
source
.
error
(
"bad group number"
,
len
(
condname
)
+
1
)
if
condgroup
>=
MAXGROUPS
:
raise
error
(
"the group number is too large"
)
raise
source
.
error
(
"the group number is too large"
,
len
(
condname
)
+
1
)
elif
char
in
FLAGS
:
# flags
state
.
flags
|=
FLAGS
[
char
]
...
...
@@ -704,20 +721,23 @@ def _parse(source, state):
state
.
flags
|=
FLAGS
[
sourceget
()]
verbose
=
state
.
flags
&
SRE_FLAG_VERBOSE
else
:
raise
error
(
"unexpected end of pattern "
+
char
)
raise
source
.
error
(
"unexpected end of pattern"
)
if
group
:
# parse group contents
if
group
==
2
:
# anonymous group
group
=
None
else
:
group
=
state
.
opengroup
(
name
)
try
:
group
=
state
.
opengroup
(
name
)
except
error
as
err
:
raise
source
.
error
(
err
.
msg
,
len
(
name
)
+
1
)
if
condgroup
:
p
=
_parse_sub_cond
(
source
,
state
,
condgroup
)
else
:
p
=
_parse_sub
(
source
,
state
)
if
not
sourcematch
(
")"
):
raise
error
(
"unbalanced parenthesis"
)
raise
source
.
error
(
"unbalanced parenthesis"
)
if
group
is
not
None
:
state
.
closegroup
(
group
,
p
)
subpatternappend
((
SUBPATTERN
,
(
group
,
p
)))
...
...
@@ -725,10 +745,10 @@ def _parse(source, state):
while
True
:
char
=
sourceget
()
if
char
is
None
:
raise
error
(
"unexpected end of pattern"
)
raise
source
.
error
(
"unexpected end of pattern"
)
if
char
==
")"
:
break
raise
error
(
"unknown extension"
)
raise
source
.
error
(
"unknown extension"
,
len
(
char
)
)
elif
this
==
"^"
:
subpatternappend
((
AT
,
AT_BEGINNING
))
...
...
@@ -737,7 +757,7 @@ def _parse(source, state):
subpattern
.
append
((
AT
,
AT_END
))
else
:
raise
error
(
"parser error"
)
raise
source
.
error
(
"parser error"
,
len
(
this
)
)
return
subpattern
...
...
@@ -768,9 +788,10 @@ def parse(str, flags=0, pattern=None):
if
source
.
next
is
not
None
:
if
source
.
next
==
")"
:
raise
error
(
"unbalanced parenthesis"
)
raise
source
.
error
(
"unbalanced parenthesis"
)
else
:
raise
error
(
"bogus characters at end of regular expression"
)
raise
source
.
error
(
"bogus characters at end of regular expression"
,
len
(
tail
))
if
flags
&
SRE_FLAG_DEBUG
:
p
.
dump
()
...
...
@@ -809,16 +830,18 @@ def parse_template(source, pattern):
if
s
.
match
(
"<"
):
name
=
s
.
getuntil
(
">"
)
if
not
name
:
raise
error
(
"missing group name"
)
raise
s
.
error
(
"missing group name"
,
1
)
try
:
index
=
int
(
name
)
if
index
<
0
:
raise
error
(
"negative group number"
)
raise
s
.
error
(
"negative group number"
,
len
(
name
)
+
1
)
if
index
>=
MAXGROUPS
:
raise
error
(
"the group number is too large"
)
raise
s
.
error
(
"the group number is too large"
,
len
(
name
)
+
1
)
except
ValueError
:
if
not
name
.
isidentifier
():
raise
error
(
"bad character in group name"
)
raise
s
.
error
(
"bad character in group name"
,
len
(
name
)
+
1
)
try
:
index
=
pattern
.
groupindex
[
name
]
except
KeyError
:
...
...
@@ -841,8 +864,8 @@ def parse_template(source, pattern):
isoctal
=
True
c
=
int
(
this
[
1
:],
8
)
if
c
>
0o377
:
raise
error
(
'octal escape value %r outside of '
'range 0-0o377'
%
this
)
raise
s
.
error
(
'octal escape value %r outside of '
'range 0-0o377'
%
this
,
len
(
this
)
)
lappend
(
chr
(
c
))
if
not
isoctal
:
addgroup
(
int
(
this
[
1
:]))
...
...
Lib/test/test_re.py
View file @
ad446d57
...
...
@@ -1419,6 +1419,42 @@ SUBPATTERN None
self.assertIsNone(re.match(b'(?Li)
\
xc5
', b'
\
xe5
'))
self.assertIsNone(re.match(b'(?Li)
\
xe5
', b'
\
xc5
'))
def test_error(self):
with self.assertRaises(re.error) as cm:
re.compile('(
\
u20ac
))')
err = cm.exception
self.assertIsInstance(err.pattern, str)
self.assertEqual(err.pattern, '(
\
u20ac
))')
self.assertEqual(err.pos, 3)
self.assertEqual(err.lineno, 1)
self.assertEqual(err.colno, 4)
self.assertIn(err.msg, str(err))
self.assertIn(' at position 3', str(err))
self.assertNotIn(' at position 3', err.msg)
# Bytes pattern
with self.assertRaises(re.error) as cm:
re.compile(b'(
\
xa4
))')
err = cm.exception
self.assertIsInstance(err.pattern, bytes)
self.assertEqual(err.pattern, b'(
\
xa4
))')
self.assertEqual(err.pos, 3)
# Multiline pattern
with self.assertRaises(re.error) as cm:
re.compile("""
(
abc
)
)
(
""", re.VERBOSE)
err = cm.exception
self.assertEqual(err.pos, 77)
self.assertEqual(err.lineno, 5)
self.assertEqual(err.colno, 17)
self.assertIn(err.msg, str(err))
self.assertIn(' at position 77', str(err))
self.assertIn('(line 5, column 17)', str(err))
class PatternReprTests(unittest.TestCase):
def check(self, pattern, expected):
...
...
Misc/NEWS
View file @
ad446d57
...
...
@@ -183,6 +183,8 @@ Core and Builtins
Library
-------
-
Issue
#
22578
:
Added
attributes
to
the
re
.
error
class
.
-
Issue
#
12728
:
Different
Unicode
characters
having
the
same
uppercase
but
different
lowercase
are
now
matched
in
case
-
insensitive
regular
expressions
.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment