Rely on stdlib to decode entities.

6fba91c1 · Jason R. Coombs · e141c08e · 6fba91c1 · 6fba91c1
Commit 6fba91c1 authored Mar 17, 2018 by Jason R. Coombs
Hide whitespace changes
Inline Side-by-side

Showing with 11 additions and 15 deletions

setuptools/package_index.py setuptools/package_index.py +2 -15

setuptools/py33compat.py setuptools/py33compat.py +9 -0

No files found.
--- a/setuptools/package_index.py
+++ b/setuptools/package_index.py
@@ -23,6 +23,7 @@ from distutils import log
 from distutils.errors import DistutilsError
 from fnmatch import translate
 from setuptools.py27compat import get_all_headers
+from setuptools.py33compat import unescape
 from setuptools.wheel import Wheel

 EGG_FRAGMENT = re.compile(r'^egg=([-A-Za-z0-9_.+!]+)$')
@@ -931,23 +932,9 @@ class PackageIndex(Environment):
 entity_sub = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?').sub


-def uchr(c):
-    if not isinstance(c, int):
-        return c
-    if c > 255:
-        return six.unichr(c)
-    return chr(c)
-
-
 def decode_entity(match):
    what = match.group(1)
-    if what.startswith('#x'):
-        what = int(what[2:], 16)
-    elif what.startswith('#'):
-        what = int(what[1:])
-    else:
-        what = six.moves.html_entities.name2codepoint.get(what, match.group(0))
-    return uchr(what)
+    return unescape(what)


 def htmldecode(text):

--- a/setuptools/py33compat.py
+++ b/setuptools/py33compat.py
@@ -2,7 +2,13 @@ import dis
 import array
 import collections

+try:
+    import html
+except ImportError:
+    html = None
+
 from setuptools.extern import six
+from setuptools.extern.six.moves import html_parser


 OpArg = collections.namedtuple('OpArg', 'opcode arg')
@@ -43,3 +49,6 @@ class Bytecode_compat(object):


 Bytecode = getattr(dis, 'Bytecode', Bytecode_compat)
+
+
+unescape = getattr(html, 'unescape', html_parser.HTMLParser().unescape)