Commit 1f7fffb3 authored by Georg Brandl's avatar Georg Brandl

#2830: add html.escape() helper and move cgi.escape() uses in the standard...

#2830: add html.escape() helper and move cgi.escape() uses in the standard library to it.  It defaults to quote=True and also escapes single quotes, which makes casual use safer.  The cgi.escape() interface is not touched, but emits a (silent) PendingDeprecationWarning.
parent 70543acf
...@@ -293,7 +293,7 @@ following WSGI-application:: ...@@ -293,7 +293,7 @@ following WSGI-application::
# -*- coding: UTF-8 -*- # -*- coding: UTF-8 -*-
import sys, os import sys, os
from cgi import escape from html import escape
from flup.server.fcgi import WSGIServer from flup.server.fcgi import WSGIServer
def app(environ, start_response): def app(environ, start_response):
......
...@@ -328,9 +328,9 @@ algorithms implemented in this module in other circumstances. ...@@ -328,9 +328,9 @@ algorithms implemented in this module in other circumstances.
attribute value delimited by double quotes, as in ``<a href="...">``. Note attribute value delimited by double quotes, as in ``<a href="...">``. Note
that single quotes are never translated. that single quotes are never translated.
If the value to be quoted might include single- or double-quote characters, .. deprecated:: 3.2
or both, consider using the :func:`~xml.sax.saxutils.quoteattr` function in the This function is unsafe because *quote* is false by default, and therefore
:mod:`xml.sax.saxutils` module instead. deprecated. Use :func:`html.escape` instead.
.. _cgi-security: .. _cgi-security:
...@@ -508,8 +508,8 @@ Common problems and solutions ...@@ -508,8 +508,8 @@ Common problems and solutions
.. rubric:: Footnotes .. rubric:: Footnotes
.. [#] Note that some recent versions of the HTML specification do state what order the .. [#] Note that some recent versions of the HTML specification do state what
field values should be supplied in, but knowing whether a request was order the field values should be supplied in, but knowing whether a request
received from a conforming browser, or even from a browser at all, is tedious was received from a conforming browser, or even from a browser at all, is
and error-prone. tedious and error-prone.
:mod:`html` --- HyperText Markup Language support
=================================================
.. module:: html
:synopsis: Helpers for manipulating HTML.
.. versionadded:: 3.2
This module defines utilities to manipulate HTML.
.. function:: escape(s, quote=True)
Convert the characters ``&``, ``<`` and ``>`` in string *s* to HTML-safe
sequences. Use this if you need to display text that might contain such
characters in HTML. If the optional flag *quote* is true, the characters
(``"``) and (``'``) are also translated; this helps for inclusion in an HTML
attribute value delimited by quotes, as in ``<a href="...">``.
...@@ -20,6 +20,7 @@ definition of the Python bindings for the DOM and SAX interfaces. ...@@ -20,6 +20,7 @@ definition of the Python bindings for the DOM and SAX interfaces.
.. toctree:: .. toctree::
html.rst
html.parser.rst html.parser.rst
html.entities.rst html.entities.rst
pyexpat.rst pyexpat.rst
......
...@@ -31,13 +31,13 @@ __version__ = "2.6" ...@@ -31,13 +31,13 @@ __version__ = "2.6"
# Imports # Imports
# ======= # =======
from operator import attrgetter
from io import StringIO from io import StringIO
import sys import sys
import os import os
import urllib.parse import urllib.parse
import email.parser import email.parser
from warnings import warn from warnings import warn
import html
__all__ = ["MiniFieldStorage", "FieldStorage", __all__ = ["MiniFieldStorage", "FieldStorage",
"parse", "parse_qs", "parse_qsl", "parse_multipart", "parse", "parse_qs", "parse_qsl", "parse_multipart",
...@@ -800,8 +800,8 @@ def print_exception(type=None, value=None, tb=None, limit=None): ...@@ -800,8 +800,8 @@ def print_exception(type=None, value=None, tb=None, limit=None):
list = traceback.format_tb(tb, limit) + \ list = traceback.format_tb(tb, limit) + \
traceback.format_exception_only(type, value) traceback.format_exception_only(type, value)
print("<PRE>%s<B>%s</B></PRE>" % ( print("<PRE>%s<B>%s</B></PRE>" % (
escape("".join(list[:-1])), html.escape("".join(list[:-1])),
escape(list[-1]), html.escape(list[-1]),
)) ))
del tb del tb
...@@ -812,7 +812,7 @@ def print_environ(environ=os.environ): ...@@ -812,7 +812,7 @@ def print_environ(environ=os.environ):
print("<H3>Shell Environment:</H3>") print("<H3>Shell Environment:</H3>")
print("<DL>") print("<DL>")
for key in keys: for key in keys:
print("<DT>", escape(key), "<DD>", escape(environ[key])) print("<DT>", html.escape(key), "<DD>", html.escape(environ[key]))
print("</DL>") print("</DL>")
print() print()
...@@ -825,10 +825,10 @@ def print_form(form): ...@@ -825,10 +825,10 @@ def print_form(form):
print("<P>No form fields.") print("<P>No form fields.")
print("<DL>") print("<DL>")
for key in keys: for key in keys:
print("<DT>" + escape(key) + ":", end=' ') print("<DT>" + html.escape(key) + ":", end=' ')
value = form[key] value = form[key]
print("<i>" + escape(repr(type(value))) + "</i>") print("<i>" + html.escape(repr(type(value))) + "</i>")
print("<DD>" + escape(repr(value))) print("<DD>" + html.escape(repr(value)))
print("</DL>") print("</DL>")
print() print()
...@@ -839,9 +839,9 @@ def print_directory(): ...@@ -839,9 +839,9 @@ def print_directory():
try: try:
pwd = os.getcwd() pwd = os.getcwd()
except os.error as msg: except os.error as msg:
print("os.error:", escape(str(msg))) print("os.error:", html.escape(str(msg)))
else: else:
print(escape(pwd)) print(html.escape(pwd))
print() print()
def print_arguments(): def print_arguments():
...@@ -899,9 +899,9 @@ environment as well. Here are some common variable names: ...@@ -899,9 +899,9 @@ environment as well. Here are some common variable names:
# ========= # =========
def escape(s, quote=None): def escape(s, quote=None):
'''Replace special characters "&", "<" and ">" to HTML-safe sequences. """Deprecated API."""
If the optional flag quote is true, the quotation mark character (") warn("cgi.escape is deprecated, use html.escape instead",
is also translated.''' PendingDeprecationWarning, stacklevel=2)
s = s.replace("&", "&amp;") # Must be done first! s = s.replace("&", "&amp;") # Must be done first!
s = s.replace("<", "&lt;") s = s.replace("<", "&lt;")
s = s.replace(">", "&gt;") s = s.replace(">", "&gt;")
...@@ -909,6 +909,7 @@ def escape(s, quote=None): ...@@ -909,6 +909,7 @@ def escape(s, quote=None):
s = s.replace('"', "&quot;") s = s.replace('"', "&quot;")
return s return s
def valid_boundary(s, _vb_pattern="^[ -~]{0,200}[!-~]$"): def valid_boundary(s, _vb_pattern="^[ -~]{0,200}[!-~]$"):
import re import re
return re.match(_vb_pattern, s) return re.match(_vb_pattern, s)
......
# This directory is a Python package. """
General functions for HTML manipulation.
"""
_escape_map = {ord('&'): '&amp;', ord('<'): '&lt;', ord('>'): '&gt;'}
_escape_map_full = {ord('&'): '&amp;', ord('<'): '&lt;', ord('>'): '&gt;',
ord('"'): '&quot;', ord('\''): '&#x27;'}
# NB: this is a candidate for a bytes/string polymorphic interface
def escape(s, quote=True):
"""
Replace special characters "&", "<" and ">" to HTML-safe sequences.
If the optional flag quote is true (the default), the quotation mark
character (") is also translated.
"""
if quote:
return s.translate(_escape_map_full)
return s.translate(_escape_map)
...@@ -84,7 +84,7 @@ __version__ = "0.6" ...@@ -84,7 +84,7 @@ __version__ = "0.6"
__all__ = ["HTTPServer", "BaseHTTPRequestHandler"] __all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
import cgi import html
import email.message import email.message
import email.parser import email.parser
import http.client import http.client
...@@ -705,7 +705,7 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): ...@@ -705,7 +705,7 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
return None return None
list.sort(key=lambda a: a.lower()) list.sort(key=lambda a: a.lower())
r = [] r = []
displaypath = cgi.escape(urllib.parse.unquote(self.path)) displaypath = html.escape(urllib.parse.unquote(self.path))
r.append('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">') r.append('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
r.append("<html>\n<title>Directory listing for %s</title>\n" % displaypath) r.append("<html>\n<title>Directory listing for %s</title>\n" % displaypath)
r.append("<body>\n<h2>Directory listing for %s</h2>\n" % displaypath) r.append("<body>\n<h2>Directory listing for %s</h2>\n" % displaypath)
...@@ -721,7 +721,7 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): ...@@ -721,7 +721,7 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
displayname = name + "@" displayname = name + "@"
# Note: a link to a directory displays with @ and links with / # Note: a link to a directory displays with @ and links with /
r.append('<li><a href="%s">%s</a>\n' r.append('<li><a href="%s">%s</a>\n'
% (urllib.parse.quote(linkname), cgi.escape(displayname))) % (urllib.parse.quote(linkname), html.escape(displayname)))
r.append("</ul>\n<hr>\n</body>\n</html>\n") r.append("</ul>\n<hr>\n</body>\n</html>\n")
enc = sys.getfilesystemencoding() enc = sys.getfilesystemencoding()
encoded = ''.join(r).encode(enc) encoded = ''.join(r).encode(enc)
......
...@@ -568,8 +568,8 @@ class Test_touch_import(support.TestCase): ...@@ -568,8 +568,8 @@ class Test_touch_import(support.TestCase):
def test_from_import(self): def test_from_import(self):
node = parse('bar()') node = parse('bar()')
fixer_util.touch_import("cgi", "escape", node) fixer_util.touch_import("html", "escape", node)
self.assertEqual(str(node), 'from cgi import escape\nbar()\n\n') self.assertEqual(str(node), 'from html import escape\nbar()\n\n')
def test_name_import(self): def test_name_import(self):
node = parse('bar()') node = parse('bar()')
......
"""
Tests for the html module functions.
"""
import html
import unittest
from test.support import run_unittest
class HtmlTests(unittest.TestCase):
def test_escape(self):
self.assertEqual(
html.escape('\'<script>"&foo;"</script>\''),
'&#x27;&lt;script&gt;&quot;&amp;foo;&quot;&lt;/script&gt;&#x27;')
self.assertEqual(
html.escape('\'<script>"&foo;"</script>\'', False),
'\'&lt;script&gt;"&amp;foo;"&lt;/script&gt;\'')
def test_main():
run_unittest(HtmlTests)
if __name__ == '__main__':
test_main()
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# except if the test is specific to the Python implementation. # except if the test is specific to the Python implementation.
import sys import sys
import cgi import html
import unittest import unittest
from test import support from test import support
...@@ -1328,7 +1328,7 @@ XINCLUDE["default.xml"] = """\ ...@@ -1328,7 +1328,7 @@ XINCLUDE["default.xml"] = """\
<p>Example.</p> <p>Example.</p>
<xi:include href="{}"/> <xi:include href="{}"/>
</document> </document>
""".format(cgi.escape(SIMPLE_XMLFILE, True)) """.format(html.escape(SIMPLE_XMLFILE, True))
def xinclude_loader(href, parse="xml", encoding=None): def xinclude_loader(href, parse="xml", encoding=None):
try: try:
......
...@@ -24,6 +24,9 @@ Core and Builtins ...@@ -24,6 +24,9 @@ Core and Builtins
Library Library
------- -------
- Issue #2830: Add the ``html.escape()`` function, which quotes all problematic
characters by default. Deprecate ``cgi.escape()``.
- Issue 9409: Fix the regex to match all kind of filenames, for interactive - Issue 9409: Fix the regex to match all kind of filenames, for interactive
debugging in doctests. debugging in doctests.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment