Commit ad446d57 authored by Serhiy Storchaka's avatar Serhiy Storchaka

Issue #22578: Added attributes to the re.error class.

parent eb99e515
...@@ -733,13 +733,36 @@ form. ...@@ -733,13 +733,36 @@ form.
Clear the regular expression cache. Clear the regular expression cache.
.. exception:: error .. exception:: error(msg, pattern=None, pos=None)
Exception raised when a string passed to one of the functions here is not a Exception raised when a string passed to one of the functions here is not a
valid regular expression (for example, it might contain unmatched parentheses) valid regular expression (for example, it might contain unmatched parentheses)
or when some other error occurs during compilation or matching. It is never an or when some other error occurs during compilation or matching. It is never an
error if a string contains no match for a pattern. error if a string contains no match for a pattern. The error instance has
the following additional attributes:
.. attribute:: msg
The unformatted error message.
.. attribute:: pattern
The regular expression pattern.
.. attribute:: pos
The index of *pattern* where compilation failed.
.. attribute:: lineno
The line corresponding to *pos*.
.. attribute:: colno
The column corresponding to *pos*.
.. versionchanged:: 3.5
Added additional attributes.
.. _re-objects: .. _re-objects:
......
...@@ -21,7 +21,35 @@ from _sre import MAXREPEAT, MAXGROUPS ...@@ -21,7 +21,35 @@ from _sre import MAXREPEAT, MAXGROUPS
# should this really be here? # should this really be here?
class error(Exception): class error(Exception):
pass def __init__(self, msg, pattern=None, pos=None):
self.msg = msg
self.pattern = pattern
self.pos = pos
if pattern is not None and pos is not None:
msg = '%s at position %d' % (msg, pos)
if isinstance(pattern, str):
newline = '\n'
else:
newline = b'\n'
self.lineno = pattern.count(newline, 0, pos) + 1
self.colno = pos - pattern.rfind(newline, 0, pos)
if newline in pattern:
msg = '%s (line %d, column %d)' % (msg, self.lineno, self.colno)
else:
self.lineno = self.colno = None
super().__init__(msg)
def linecol(doc, pos):
if isinstance(pattern, str):
newline = '\n'
else:
newline = b'\n'
lineno = pattern.count(newline, 0, pos) + 1
if lineno == 1:
colno = pos + 1
else:
colno = pos - doc.rindex(newline, 0, pos)
return lineno, colno
class _NamedIntConstant(int): class _NamedIntConstant(int):
......
This diff is collapsed.
...@@ -1419,6 +1419,42 @@ SUBPATTERN None ...@@ -1419,6 +1419,42 @@ SUBPATTERN None
self.assertIsNone(re.match(b'(?Li)\xc5', b'\xe5')) self.assertIsNone(re.match(b'(?Li)\xc5', b'\xe5'))
self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5')) self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5'))
def test_error(self):
with self.assertRaises(re.error) as cm:
re.compile('(\u20ac))')
err = cm.exception
self.assertIsInstance(err.pattern, str)
self.assertEqual(err.pattern, '(\u20ac))')
self.assertEqual(err.pos, 3)
self.assertEqual(err.lineno, 1)
self.assertEqual(err.colno, 4)
self.assertIn(err.msg, str(err))
self.assertIn(' at position 3', str(err))
self.assertNotIn(' at position 3', err.msg)
# Bytes pattern
with self.assertRaises(re.error) as cm:
re.compile(b'(\xa4))')
err = cm.exception
self.assertIsInstance(err.pattern, bytes)
self.assertEqual(err.pattern, b'(\xa4))')
self.assertEqual(err.pos, 3)
# Multiline pattern
with self.assertRaises(re.error) as cm:
re.compile("""
(
abc
)
)
(
""", re.VERBOSE)
err = cm.exception
self.assertEqual(err.pos, 77)
self.assertEqual(err.lineno, 5)
self.assertEqual(err.colno, 17)
self.assertIn(err.msg, str(err))
self.assertIn(' at position 77', str(err))
self.assertIn('(line 5, column 17)', str(err))
class PatternReprTests(unittest.TestCase): class PatternReprTests(unittest.TestCase):
def check(self, pattern, expected): def check(self, pattern, expected):
......
...@@ -183,6 +183,8 @@ Core and Builtins ...@@ -183,6 +183,8 @@ Core and Builtins
Library Library
------- -------
- Issue #22578: Added attributes to the re.error class.
- Issue #12728: Different Unicode characters having the same uppercase but - Issue #12728: Different Unicode characters having the same uppercase but
different lowercase are now matched in case-insensitive regular expressions. different lowercase are now matched in case-insensitive regular expressions.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment