Commit 4edd0d57 authored by Jason R. Coombs's avatar Jason R. Coombs Committed by GitHub

Merge pull request #1735 from pypa/bugfix/1702-utf8-config

When reading config files, require them to be encoded with UTF-8.
parents 393809a0 f3678108
When parsing setup.cfg files, setuptools now requires the files to be encoded as UTF-8. Any other encoding will lead to a UnicodeDecodeError. This change removes support for specifying an encoding using a 'coding: ' directive in the header of the file, a feature that was introduces in 40.7. Given the recent release of the aforementioned feature, it is assumed that few if any projects are utilizing the feature to specify an encoding other than UTF-8.
......@@ -35,7 +35,6 @@ from setuptools.depends import Require
from setuptools import windows_support
from setuptools.monkey import get_unpatched
from setuptools.config import parse_configuration
from .unicode_utils import detect_encoding
import pkg_resources
__import__('setuptools.extern.packaging.specifiers')
......@@ -587,13 +586,9 @@ class Distribution(_Distribution):
parser = ConfigParser()
for filename in filenames:
with io.open(filename, 'rb') as fp:
encoding = detect_encoding(fp)
with io.open(filename, encoding='utf-8') as reader:
if DEBUG:
self.announce(" reading %s [%s]" % (
filename, encoding or 'locale')
)
reader = io.TextIOWrapper(fp, encoding=encoding)
self.announce(" reading {filename}".format(**locals()))
(parser.read_file if six.PY3 else parser.readfp)(reader)
for section in parser.sections():
options = parser.options(section)
......
......@@ -9,7 +9,6 @@ from mock import patch
from setuptools.dist import Distribution, _Distribution
from setuptools.config import ConfigHandler, read_configuration
from setuptools.extern.six.moves import configparser
from setuptools.tests import is_ascii
from . import py2_only, py3_only
from .textwrap import DALS
......@@ -446,10 +445,6 @@ class TestMetadata:
with get_dist(tmpdir):
pass
skip_if_not_ascii = pytest.mark.skipif(
not is_ascii, reason='Test not supported with this locale')
@skip_if_not_ascii
def test_non_ascii_1(self, tmpdir):
fake_env(
tmpdir,
......@@ -457,18 +452,8 @@ class TestMetadata:
'description = éàïôñ\n',
encoding='utf-8'
)
with pytest.raises(UnicodeDecodeError):
with get_dist(tmpdir):
pass
def test_non_ascii_2(self, tmpdir):
fake_env(
tmpdir,
'# -*- coding: invalid\n'
)
with pytest.raises(LookupError):
with get_dist(tmpdir):
pass
with get_dist(tmpdir):
pass
def test_non_ascii_3(self, tmpdir):
fake_env(
......@@ -479,7 +464,6 @@ class TestMetadata:
with get_dist(tmpdir):
pass
@skip_if_not_ascii
def test_non_ascii_4(self, tmpdir):
fake_env(
tmpdir,
......@@ -491,8 +475,10 @@ class TestMetadata:
with get_dist(tmpdir) as dist:
assert dist.metadata.description == 'éàïôñ'
@skip_if_not_ascii
def test_non_ascii_5(self, tmpdir):
def test_not_utf8(self, tmpdir):
"""
Config files encoded not in UTF-8 will fail
"""
fake_env(
tmpdir,
'# vim: set fileencoding=iso-8859-15 :\n'
......@@ -500,8 +486,9 @@ class TestMetadata:
'description = éàïôñ\n',
encoding='iso-8859-15'
)
with get_dist(tmpdir) as dist:
assert dist.metadata.description == 'éàïôñ'
with pytest.raises(UnicodeDecodeError):
with get_dist(tmpdir):
pass
class TestOptions:
......
# coding: utf-8
from __future__ import unicode_literals
import io
import six
from setuptools.command import setopt
from setuptools.extern.six.moves import configparser
class TestEdit:
@staticmethod
def parse_config(filename):
parser = configparser.ConfigParser()
with io.open(filename, encoding='utf-8') as reader:
(parser.read_file if six.PY3 else parser.readfp)(reader)
return parser
@staticmethod
def write_text(file, content):
with io.open(file, 'wb') as strm:
strm.write(content.encode('utf-8'))
def test_utf8_encoding_retained(self, tmpdir):
"""
When editing a file, non-ASCII characters encoded in
UTF-8 should be retained.
"""
config = tmpdir.join('setup.cfg')
self.write_text(str(config), '[names]\njaraco=джарако')
setopt.edit_config(str(config), dict(names=dict(other='yes')))
parser = self.parse_config(str(config))
assert parser.get('names', 'jaraco') == 'джарако'
assert parser.get('names', 'other') == 'yes'
import unicodedata
import sys
import re
from setuptools.extern import six
......@@ -43,15 +42,3 @@ def try_encode(string, enc):
return string.encode(enc)
except UnicodeEncodeError:
return None
CODING_RE = re.compile(br'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)')
def detect_encoding(fp):
first_line = fp.readline()
fp.seek(0)
m = CODING_RE.match(first_line)
if m is None:
return None
return m.group(1).decode('ascii')
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment