Merge pull request #1735 from pypa/bugfix/1702-utf8-config

When reading config files, require them to be encoded with UTF-8.

Merge pull request #1735 from pypa/bugfix/1702-utf8-config
When reading config files, require them to be encoded with UTF-8.
4edd0d57 · Jason R. Coombs · GitHub · 393809a0 · f3678108 · 4edd0d57
Commit 4edd0d57 authored Apr 05, 2019 by Jason R. Coombs Committed by GitHub Apr 05, 2019
5 changed files
--- a/changelog.d/1735.breaking.rst
+++ b/changelog.d/1735.breaking.rst
+When parsing setup.cfg files, setuptools now requires the files to be encoded as UTF-8. Any other encoding will lead to a UnicodeDecodeError. This change removes support for specifying an encoding using a 'coding: ' directive in the header of the file, a feature that was introduces in 40.7. Given the recent release of the aforementioned feature, it is assumed that few if any projects are utilizing the feature to specify an encoding other than UTF-8.
--- a/setuptools/dist.py
+++ b/setuptools/dist.py
@@ -35,7 +35,6 @@ from setuptools.depends import Require
 from setuptools import windows_support
 from setuptools.monkey import get_unpatched
 from setuptools.config import parse_configuration
-from .unicode_utils import detect_encoding
 import pkg_resources

 __import__('setuptools.extern.packaging.specifiers')
@@ -587,13 +586,9 @@ class Distribution(_Distribution):

        parser = ConfigParser()
        for filename in filenames:
-            with io.open(filename, 'rb') as fp:
-                encoding = detect_encoding(fp)
+            with io.open(filename, encoding='utf-8') as reader:
                if DEBUG:
-                    self.announce("  reading %s [%s]" % (
-                        filename, encoding or 'locale')
-                    )
-                reader = io.TextIOWrapper(fp, encoding=encoding)
+                    self.announce("  reading {filename}".format(**locals()))
                (parser.read_file if six.PY3 else parser.readfp)(reader)
            for section in parser.sections():
                options = parser.options(section)

--- a/setuptools/tests/test_config.py
+++ b/setuptools/tests/test_config.py
@@ -9,7 +9,6 @@ from mock import patch
 from setuptools.dist import Distribution, _Distribution
 from setuptools.config import ConfigHandler, read_configuration
 from setuptools.extern.six.moves import configparser
-from setuptools.tests import is_ascii
 from . import py2_only, py3_only
 from .textwrap import DALS

@@ -446,10 +445,6 @@ class TestMetadata:
            with get_dist(tmpdir):
                pass

-    skip_if_not_ascii = pytest.mark.skipif(
-        not is_ascii, reason='Test not supported with this locale')
-
-    @skip_if_not_ascii
    def test_non_ascii_1(self, tmpdir):
        fake_env(
            tmpdir,
@@ -457,18 +452,8 @@ class TestMetadata:
            'description = éàïôñ\n',
            encoding='utf-8'
        )
-        with pytest.raises(UnicodeDecodeError):
-            with get_dist(tmpdir):
-                pass
-
-    def test_non_ascii_2(self, tmpdir):
-        fake_env(
-            tmpdir,
-            '# -*- coding: invalid\n'
-        )
-        with pytest.raises(LookupError):
-            with get_dist(tmpdir):
-                pass
+        with get_dist(tmpdir):
+            pass

    def test_non_ascii_3(self, tmpdir):
        fake_env(
@@ -479,7 +464,6 @@ class TestMetadata:
        with get_dist(tmpdir):
            pass

-    @skip_if_not_ascii
    def test_non_ascii_4(self, tmpdir):
        fake_env(
            tmpdir,
@@ -491,8 +475,10 @@ class TestMetadata:
        with get_dist(tmpdir) as dist:
            assert dist.metadata.description == 'éàïôñ'

-    @skip_if_not_ascii
-    def test_non_ascii_5(self, tmpdir):
+    def test_not_utf8(self, tmpdir):
+        """
+        Config files encoded not in UTF-8 will fail
+        """
        fake_env(
            tmpdir,
            '# vim: set fileencoding=iso-8859-15 :\n'
@@ -500,8 +486,9 @@ class TestMetadata:
            'description = éàïôñ\n',
            encoding='iso-8859-15'
        )
-        with get_dist(tmpdir) as dist:
-            assert dist.metadata.description == 'éàïôñ'
+        with pytest.raises(UnicodeDecodeError):
+            with get_dist(tmpdir):
+                pass


 class TestOptions:

--- a/setuptools/tests/test_setopt.py
+++ b/setuptools/tests/test_setopt.py
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+import io
+
+import six
+
+from setuptools.command import setopt
+from setuptools.extern.six.moves import configparser
+
+
+class TestEdit:
+    @staticmethod
+    def parse_config(filename):
+        parser = configparser.ConfigParser()
+        with io.open(filename, encoding='utf-8') as reader:
+            (parser.read_file if six.PY3 else parser.readfp)(reader)
+        return parser
+
+    @staticmethod
+    def write_text(file, content):
+        with io.open(file, 'wb') as strm:
+            strm.write(content.encode('utf-8'))
+
+    def test_utf8_encoding_retained(self, tmpdir):
+        """
+        When editing a file, non-ASCII characters encoded in
+        UTF-8 should be retained.
+        """
+        config = tmpdir.join('setup.cfg')
+        self.write_text(str(config), '[names]\njaraco=джарако')
+        setopt.edit_config(str(config), dict(names=dict(other='yes')))
+        parser = self.parse_config(str(config))
+        assert parser.get('names', 'jaraco') == 'джарако'
+        assert parser.get('names', 'other') == 'yes'
--- a/setuptools/unicode_utils.py
+++ b/setuptools/unicode_utils.py
 import unicodedata
 import sys
-import re

 from setuptools.extern import six

@@ -43,15 +42,3 @@ def try_encode(string, enc):
        return string.encode(enc)
    except UnicodeEncodeError:
        return None
-
-
-CODING_RE = re.compile(br'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)')
-
-
-def detect_encoding(fp):
-    first_line = fp.readline()
-    fp.seek(0)
-    m = CODING_RE.match(first_line)
-    if m is None:
-        return None
-    return m.group(1).decode('ascii')