Commit 1f7fffb3 authored by Georg Brandl's avatar Georg Brandl

#2830: add html.escape() helper and move cgi.escape() uses in the standard...

#2830: add html.escape() helper and move cgi.escape() uses in the standard library to it.  It defaults to quote=True and also escapes single quotes, which makes casual use safer.  The cgi.escape() interface is not touched, but emits a (silent) PendingDeprecationWarning.
parent 70543acf
......@@ -293,7 +293,7 @@ following WSGI-application::
# -*- coding: UTF-8 -*-
import sys, os
from cgi import escape
from html import escape
from flup.server.fcgi import WSGIServer
def app(environ, start_response):
......
......@@ -328,9 +328,9 @@ algorithms implemented in this module in other circumstances.
attribute value delimited by double quotes, as in ``<a href="...">``. Note
that single quotes are never translated.
If the value to be quoted might include single- or double-quote characters,
or both, consider using the :func:`~xml.sax.saxutils.quoteattr` function in the
:mod:`xml.sax.saxutils` module instead.
.. deprecated:: 3.2
This function is unsafe because *quote* is false by default, and therefore
deprecated. Use :func:`html.escape` instead.
.. _cgi-security:
......@@ -508,8 +508,8 @@ Common problems and solutions
.. rubric:: Footnotes
.. [#] Note that some recent versions of the HTML specification do state what order the
field values should be supplied in, but knowing whether a request was
received from a conforming browser, or even from a browser at all, is tedious
and error-prone.
.. [#] Note that some recent versions of the HTML specification do state what
order the field values should be supplied in, but knowing whether a request
was received from a conforming browser, or even from a browser at all, is
tedious and error-prone.
:mod:`html` --- HyperText Markup Language support
=================================================
.. module:: html
:synopsis: Helpers for manipulating HTML.
.. versionadded:: 3.2
This module defines utilities to manipulate HTML.
.. function:: escape(s, quote=True)
Convert the characters ``&``, ``<`` and ``>`` in string *s* to HTML-safe
sequences. Use this if you need to display text that might contain such
characters in HTML. If the optional flag *quote* is true, the characters
(``"``) and (``'``) are also translated; this helps for inclusion in an HTML
attribute value delimited by quotes, as in ``<a href="...">``.
......@@ -20,6 +20,7 @@ definition of the Python bindings for the DOM and SAX interfaces.
.. toctree::
html.rst
html.parser.rst
html.entities.rst
pyexpat.rst
......
......@@ -31,13 +31,13 @@ __version__ = "2.6"
# Imports
# =======
from operator import attrgetter
from io import StringIO
import sys
import os
import urllib.parse
import email.parser
from warnings import warn
import html
__all__ = ["MiniFieldStorage", "FieldStorage",
"parse", "parse_qs", "parse_qsl", "parse_multipart",
......@@ -800,8 +800,8 @@ def print_exception(type=None, value=None, tb=None, limit=None):
list = traceback.format_tb(tb, limit) + \
traceback.format_exception_only(type, value)
print("<PRE>%s<B>%s</B></PRE>" % (
escape("".join(list[:-1])),
escape(list[-1]),
html.escape("".join(list[:-1])),
html.escape(list[-1]),
))
del tb
......@@ -812,7 +812,7 @@ def print_environ(environ=os.environ):
print("<H3>Shell Environment:</H3>")
print("<DL>")
for key in keys:
print("<DT>", escape(key), "<DD>", escape(environ[key]))
print("<DT>", html.escape(key), "<DD>", html.escape(environ[key]))
print("</DL>")
print()
......@@ -825,10 +825,10 @@ def print_form(form):
print("<P>No form fields.")
print("<DL>")
for key in keys:
print("<DT>" + escape(key) + ":", end=' ')
print("<DT>" + html.escape(key) + ":", end=' ')
value = form[key]
print("<i>" + escape(repr(type(value))) + "</i>")
print("<DD>" + escape(repr(value)))
print("<i>" + html.escape(repr(type(value))) + "</i>")
print("<DD>" + html.escape(repr(value)))
print("</DL>")
print()
......@@ -839,9 +839,9 @@ def print_directory():
try:
pwd = os.getcwd()
except os.error as msg:
print("os.error:", escape(str(msg)))
print("os.error:", html.escape(str(msg)))
else:
print(escape(pwd))
print(html.escape(pwd))
print()
def print_arguments():
......@@ -899,9 +899,9 @@ environment as well. Here are some common variable names:
# =========
def escape(s, quote=None):
'''Replace special characters "&", "<" and ">" to HTML-safe sequences.
If the optional flag quote is true, the quotation mark character (")
is also translated.'''
"""Deprecated API."""
warn("cgi.escape is deprecated, use html.escape instead",
PendingDeprecationWarning, stacklevel=2)
s = s.replace("&", "&amp;") # Must be done first!
s = s.replace("<", "&lt;")
s = s.replace(">", "&gt;")
......@@ -909,6 +909,7 @@ def escape(s, quote=None):
s = s.replace('"', "&quot;")
return s
def valid_boundary(s, _vb_pattern="^[ -~]{0,200}[!-~]$"):
import re
return re.match(_vb_pattern, s)
......
# This directory is a Python package.
"""
General functions for HTML manipulation.
"""
_escape_map = {ord('&'): '&amp;', ord('<'): '&lt;', ord('>'): '&gt;'}
_escape_map_full = {ord('&'): '&amp;', ord('<'): '&lt;', ord('>'): '&gt;',
ord('"'): '&quot;', ord('\''): '&#x27;'}
# NB: this is a candidate for a bytes/string polymorphic interface
def escape(s, quote=True):
"""
Replace special characters "&", "<" and ">" to HTML-safe sequences.
If the optional flag quote is true (the default), the quotation mark
character (") is also translated.
"""
if quote:
return s.translate(_escape_map_full)
return s.translate(_escape_map)
......@@ -84,7 +84,7 @@ __version__ = "0.6"
__all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
import cgi
import html
import email.message
import email.parser
import http.client
......@@ -705,7 +705,7 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
return None
list.sort(key=lambda a: a.lower())
r = []
displaypath = cgi.escape(urllib.parse.unquote(self.path))
displaypath = html.escape(urllib.parse.unquote(self.path))
r.append('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
r.append("<html>\n<title>Directory listing for %s</title>\n" % displaypath)
r.append("<body>\n<h2>Directory listing for %s</h2>\n" % displaypath)
......@@ -721,7 +721,7 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
displayname = name + "@"
# Note: a link to a directory displays with @ and links with /
r.append('<li><a href="%s">%s</a>\n'
% (urllib.parse.quote(linkname), cgi.escape(displayname)))
% (urllib.parse.quote(linkname), html.escape(displayname)))
r.append("</ul>\n<hr>\n</body>\n</html>\n")
enc = sys.getfilesystemencoding()
encoded = ''.join(r).encode(enc)
......
......@@ -568,8 +568,8 @@ class Test_touch_import(support.TestCase):
def test_from_import(self):
node = parse('bar()')
fixer_util.touch_import("cgi", "escape", node)
self.assertEqual(str(node), 'from cgi import escape\nbar()\n\n')
fixer_util.touch_import("html", "escape", node)
self.assertEqual(str(node), 'from html import escape\nbar()\n\n')
def test_name_import(self):
node = parse('bar()')
......
"""
Tests for the html module functions.
"""
import html
import unittest
from test.support import run_unittest
class HtmlTests(unittest.TestCase):
def test_escape(self):
self.assertEqual(
html.escape('\'<script>"&foo;"</script>\''),
'&#x27;&lt;script&gt;&quot;&amp;foo;&quot;&lt;/script&gt;&#x27;')
self.assertEqual(
html.escape('\'<script>"&foo;"</script>\'', False),
'\'&lt;script&gt;"&amp;foo;"&lt;/script&gt;\'')
def test_main():
run_unittest(HtmlTests)
if __name__ == '__main__':
test_main()
......@@ -12,7 +12,7 @@
# except if the test is specific to the Python implementation.
import sys
import cgi
import html
import unittest
from test import support
......@@ -1328,7 +1328,7 @@ XINCLUDE["default.xml"] = """\
<p>Example.</p>
<xi:include href="{}"/>
</document>
""".format(cgi.escape(SIMPLE_XMLFILE, True))
""".format(html.escape(SIMPLE_XMLFILE, True))
def xinclude_loader(href, parse="xml", encoding=None):
try:
......
......@@ -24,6 +24,9 @@ Core and Builtins
Library
-------
- Issue #2830: Add the ``html.escape()`` function, which quotes all problematic
characters by default. Deprecate ``cgi.escape()``.
- Issue 9409: Fix the regex to match all kind of filenames, for interactive
debugging in doctests.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment