Commit b275210a authored by Serhiy Storchaka's avatar Serhiy Storchaka

Issue #25788: fileinput.hook_encoded() now supports an "errors" argument

for passing to open.  Original patch by Joseph Hackman.
parent 258a5d4d
...@@ -193,10 +193,14 @@ The two following opening hooks are provided by this module: ...@@ -193,10 +193,14 @@ The two following opening hooks are provided by this module:
Usage example: ``fi = fileinput.FileInput(openhook=fileinput.hook_compressed)`` Usage example: ``fi = fileinput.FileInput(openhook=fileinput.hook_compressed)``
.. function:: hook_encoded(encoding) .. function:: hook_encoded(encoding, errors=None)
Returns a hook which opens each file with :func:`open`, using the given Returns a hook which opens each file with :func:`open`, using the given
*encoding* to read the file. *encoding* and *errors* to read the file.
Usage example: ``fi = Usage example: ``fi =
fileinput.FileInput(openhook=fileinput.hook_encoded("iso-8859-1"))`` fileinput.FileInput(openhook=fileinput.hook_encoded("utf-8",
"surrogateescape"))``
.. versionchanged:: 3.6
Added the optional *errors* parameter.
...@@ -358,6 +358,13 @@ The :func:`~zlib.compress` function now accepts keyword arguments. ...@@ -358,6 +358,13 @@ The :func:`~zlib.compress` function now accepts keyword arguments.
(Contributed by Aviv Palivoda in :issue:`26243`.) (Contributed by Aviv Palivoda in :issue:`26243`.)
fileinput
---------
:func:`~fileinput.hook_encoded` now supports the *errors* argument.
(Contributed by Joseph Hackman in :issue:`25788`.)
Optimizations Optimizations
============= =============
......
...@@ -400,9 +400,9 @@ def hook_compressed(filename, mode): ...@@ -400,9 +400,9 @@ def hook_compressed(filename, mode):
return open(filename, mode) return open(filename, mode)
def hook_encoded(encoding): def hook_encoded(encoding, errors=None):
def openhook(filename, mode): def openhook(filename, mode):
return open(filename, mode, encoding=encoding) return open(filename, mode, encoding=encoding, errors=errors)
return openhook return openhook
......
...@@ -945,7 +945,8 @@ class Test_hook_encoded(unittest.TestCase): ...@@ -945,7 +945,8 @@ class Test_hook_encoded(unittest.TestCase):
def test(self): def test(self):
encoding = object() encoding = object()
result = fileinput.hook_encoded(encoding) errors = object()
result = fileinput.hook_encoded(encoding, errors=errors)
fake_open = InvocationRecorder() fake_open = InvocationRecorder()
original_open = builtins.open original_open = builtins.open
...@@ -963,8 +964,26 @@ class Test_hook_encoded(unittest.TestCase): ...@@ -963,8 +964,26 @@ class Test_hook_encoded(unittest.TestCase):
self.assertIs(args[0], filename) self.assertIs(args[0], filename)
self.assertIs(args[1], mode) self.assertIs(args[1], mode)
self.assertIs(kwargs.pop('encoding'), encoding) self.assertIs(kwargs.pop('encoding'), encoding)
self.assertIs(kwargs.pop('errors'), errors)
self.assertFalse(kwargs) self.assertFalse(kwargs)
def test_errors(self):
with open(TESTFN, 'wb') as f:
f.write(b'\x80abc')
self.addCleanup(safe_unlink, TESTFN)
def check(errors, expected_lines):
with FileInput(files=TESTFN, mode='r',
openhook=hook_encoded('utf-8', errors=errors)) as fi:
lines = list(fi)
self.assertEqual(lines, expected_lines)
check('ignore', ['abc'])
with self.assertRaises(UnicodeDecodeError):
check('strict', ['abc'])
check('replace', ['\ufffdabc'])
check('backslashreplace', ['\\x80abc'])
def test_modes(self): def test_modes(self):
with open(TESTFN, 'wb') as f: with open(TESTFN, 'wb') as f:
# UTF-7 is a convenient, seldom used encoding # UTF-7 is a convenient, seldom used encoding
......
...@@ -538,6 +538,7 @@ Michael Guravage ...@@ -538,6 +538,7 @@ Michael Guravage
Lars Gustäbel Lars Gustäbel
Thomas Güttler Thomas Güttler
Jonas H. Jonas H.
Joseph Hackman
Barry Haddow Barry Haddow
Philipp Hagemeister Philipp Hagemeister
Paul ten Hagen Paul ten Hagen
......
...@@ -256,6 +256,9 @@ Core and Builtins ...@@ -256,6 +256,9 @@ Core and Builtins
Library Library
------- -------
- Issue #25788: fileinput.hook_encoded() now supports an "errors" argument
for passing to open. Original patch by Joseph Hackman.
- Issue #26634: recursive_repr() now sets __qualname__ of wrapper. Patch by - Issue #26634: recursive_repr() now sets __qualname__ of wrapper. Patch by
Xiang Zhang. Xiang Zhang.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment