Commit 4edd0d57 authored by Jason R. Coombs's avatar Jason R. Coombs Committed by GitHub

Merge pull request #1735 from pypa/bugfix/1702-utf8-config

When reading config files, require them to be encoded with UTF-8.
parents 393809a0 f3678108
When parsing setup.cfg files, setuptools now requires the files to be encoded as UTF-8. Any other encoding will lead to a UnicodeDecodeError. This change removes support for specifying an encoding using a 'coding: ' directive in the header of the file, a feature that was introduces in 40.7. Given the recent release of the aforementioned feature, it is assumed that few if any projects are utilizing the feature to specify an encoding other than UTF-8.
...@@ -35,7 +35,6 @@ from setuptools.depends import Require ...@@ -35,7 +35,6 @@ from setuptools.depends import Require
from setuptools import windows_support from setuptools import windows_support
from setuptools.monkey import get_unpatched from setuptools.monkey import get_unpatched
from setuptools.config import parse_configuration from setuptools.config import parse_configuration
from .unicode_utils import detect_encoding
import pkg_resources import pkg_resources
__import__('setuptools.extern.packaging.specifiers') __import__('setuptools.extern.packaging.specifiers')
...@@ -587,13 +586,9 @@ class Distribution(_Distribution): ...@@ -587,13 +586,9 @@ class Distribution(_Distribution):
parser = ConfigParser() parser = ConfigParser()
for filename in filenames: for filename in filenames:
with io.open(filename, 'rb') as fp: with io.open(filename, encoding='utf-8') as reader:
encoding = detect_encoding(fp)
if DEBUG: if DEBUG:
self.announce(" reading %s [%s]" % ( self.announce(" reading {filename}".format(**locals()))
filename, encoding or 'locale')
)
reader = io.TextIOWrapper(fp, encoding=encoding)
(parser.read_file if six.PY3 else parser.readfp)(reader) (parser.read_file if six.PY3 else parser.readfp)(reader)
for section in parser.sections(): for section in parser.sections():
options = parser.options(section) options = parser.options(section)
......
...@@ -9,7 +9,6 @@ from mock import patch ...@@ -9,7 +9,6 @@ from mock import patch
from setuptools.dist import Distribution, _Distribution from setuptools.dist import Distribution, _Distribution
from setuptools.config import ConfigHandler, read_configuration from setuptools.config import ConfigHandler, read_configuration
from setuptools.extern.six.moves import configparser from setuptools.extern.six.moves import configparser
from setuptools.tests import is_ascii
from . import py2_only, py3_only from . import py2_only, py3_only
from .textwrap import DALS from .textwrap import DALS
...@@ -446,10 +445,6 @@ class TestMetadata: ...@@ -446,10 +445,6 @@ class TestMetadata:
with get_dist(tmpdir): with get_dist(tmpdir):
pass pass
skip_if_not_ascii = pytest.mark.skipif(
not is_ascii, reason='Test not supported with this locale')
@skip_if_not_ascii
def test_non_ascii_1(self, tmpdir): def test_non_ascii_1(self, tmpdir):
fake_env( fake_env(
tmpdir, tmpdir,
...@@ -457,18 +452,8 @@ class TestMetadata: ...@@ -457,18 +452,8 @@ class TestMetadata:
'description = éàïôñ\n', 'description = éàïôñ\n',
encoding='utf-8' encoding='utf-8'
) )
with pytest.raises(UnicodeDecodeError): with get_dist(tmpdir):
with get_dist(tmpdir): pass
pass
def test_non_ascii_2(self, tmpdir):
fake_env(
tmpdir,
'# -*- coding: invalid\n'
)
with pytest.raises(LookupError):
with get_dist(tmpdir):
pass
def test_non_ascii_3(self, tmpdir): def test_non_ascii_3(self, tmpdir):
fake_env( fake_env(
...@@ -479,7 +464,6 @@ class TestMetadata: ...@@ -479,7 +464,6 @@ class TestMetadata:
with get_dist(tmpdir): with get_dist(tmpdir):
pass pass
@skip_if_not_ascii
def test_non_ascii_4(self, tmpdir): def test_non_ascii_4(self, tmpdir):
fake_env( fake_env(
tmpdir, tmpdir,
...@@ -491,8 +475,10 @@ class TestMetadata: ...@@ -491,8 +475,10 @@ class TestMetadata:
with get_dist(tmpdir) as dist: with get_dist(tmpdir) as dist:
assert dist.metadata.description == 'éàïôñ' assert dist.metadata.description == 'éàïôñ'
@skip_if_not_ascii def test_not_utf8(self, tmpdir):
def test_non_ascii_5(self, tmpdir): """
Config files encoded not in UTF-8 will fail
"""
fake_env( fake_env(
tmpdir, tmpdir,
'# vim: set fileencoding=iso-8859-15 :\n' '# vim: set fileencoding=iso-8859-15 :\n'
...@@ -500,8 +486,9 @@ class TestMetadata: ...@@ -500,8 +486,9 @@ class TestMetadata:
'description = éàïôñ\n', 'description = éàïôñ\n',
encoding='iso-8859-15' encoding='iso-8859-15'
) )
with get_dist(tmpdir) as dist: with pytest.raises(UnicodeDecodeError):
assert dist.metadata.description == 'éàïôñ' with get_dist(tmpdir):
pass
class TestOptions: class TestOptions:
......
# coding: utf-8
from __future__ import unicode_literals
import io
import six
from setuptools.command import setopt
from setuptools.extern.six.moves import configparser
class TestEdit:
@staticmethod
def parse_config(filename):
parser = configparser.ConfigParser()
with io.open(filename, encoding='utf-8') as reader:
(parser.read_file if six.PY3 else parser.readfp)(reader)
return parser
@staticmethod
def write_text(file, content):
with io.open(file, 'wb') as strm:
strm.write(content.encode('utf-8'))
def test_utf8_encoding_retained(self, tmpdir):
"""
When editing a file, non-ASCII characters encoded in
UTF-8 should be retained.
"""
config = tmpdir.join('setup.cfg')
self.write_text(str(config), '[names]\njaraco=джарако')
setopt.edit_config(str(config), dict(names=dict(other='yes')))
parser = self.parse_config(str(config))
assert parser.get('names', 'jaraco') == 'джарако'
assert parser.get('names', 'other') == 'yes'
import unicodedata import unicodedata
import sys import sys
import re
from setuptools.extern import six from setuptools.extern import six
...@@ -43,15 +42,3 @@ def try_encode(string, enc): ...@@ -43,15 +42,3 @@ def try_encode(string, enc):
return string.encode(enc) return string.encode(enc)
except UnicodeEncodeError: except UnicodeEncodeError:
return None return None
CODING_RE = re.compile(br'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)')
def detect_encoding(fp):
first_line = fp.readline()
fp.seek(0)
m = CODING_RE.match(first_line)
if m is None:
return None
return m.group(1).decode('ascii')
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment