Get rid of Products.CMFDefault.utils

dde46fed · Bryton Lacquement · c21014cd · dde46fed · dde46fed · dde46fed
Commit dde46fed authored Sep 24, 2019 by Bryton Lacquement 🚪
9 changed files
--- a/product/ERP5/Document/Coordinate.py
+++ b/product/ERP5/Document/Coordinate.py
@@ -32,8 +32,7 @@ from zExceptions import Forbidden

 from Products.ERP5Type import Permissions, PropertySheet, interfaces
 from Products.ERP5Type.Base import Base
-from Products.ERP5Type.Utils import deprecated
-from Products.CMFDefault.utils import formatRFC822Headers
+from Products.ERP5Type.Utils import deprecated, formatRFC822Headers
 import re

 _marker = object()

--- a/product/ERP5Type/Utils.py
+++ b/product/ERP5Type/Utils.py
@@ -1767,3 +1767,26 @@ def reencodeUrlEscapes(url):
      url += [_reencodeUrlEscapes_map[c] for c in part]
  except StopIteration:
    return ''.join(url)
+
+#####################################################
+# Replacement for Products.CMFDefault
+#####################################################
+
+def formatRFC822Headers(headers):
+  """ Convert the key-value pairs in 'headers' to valid RFC822-style
+      headers, including adding leading whitespace to elements which
+      contain newlines in order to preserve continuation-line semantics.
+
+      This code is taken from Products.CMFDefault.utils and modified
+      for ERP5 purpose
+  """
+  munged = []
+  linesplit = re.compile(r'[\n\r]+?')
+  for key, value in headers:
+    if value is not None:
+      if type(value) in (list, tuple):
+        vallines = map(str, value)
+      else:
+        vallines = linesplit.split(str(value))
+      munged.append('%s: %s' % (key, '\r\n  '.join(vallines)))
+  return '\r\n'.join(munged)
--- a/product/ERP5Type/WebDAVSupport.py
+++ b/product/ERP5Type/WebDAVSupport.py
@@ -13,67 +13,23 @@
 #
 ##############################################################################

+import collections
+import email
 import re
 import transaction
+from lxml import html
+from Products.ERP5Type.Utils import formatRFC822Headers
 from Acquisition import aq_parent, aq_inner, aq_base
 from AccessControl import ClassSecurityInfo, ModuleSecurityInfo
 from Products.ERP5Type.Globals import InitializeClass
 from Products.ERP5Type import Permissions, PropertySheet, Constraint
 from Products.CMFCore.PortalContent import ResourceLockedError
 from Products.CMFCore.utils import getToolByName
-from Products.CMFDefault.utils import parseHeadersBody
-from Products.CMFDefault.utils import html_headcheck
-from Products.CMFDefault.utils import bodyfinder
-from Products.CMFDefault.utils import SimpleHTMLParser as CMFSimpleHTMLParser
 from zLOG import LOG
 from zExceptions import Forbidden

 security = ModuleSecurityInfo( 'Products.ERP5Type.WebDAVSupport' )

-class SimpleHTMLParser(CMFSimpleHTMLParser):
-
-  def do_meta( self, attrs ):
-
-    name = ''
-    content = ''
-
-    for attrname, value in attrs:
-      value = value.strip()
-      if attrname == "name":
-          name = value.capitalize()
-      if attrname == "content":
-          content = value
-
-    if name:
-      if not self.metatags.has_key(name):
-        self.metatags[ name ] = content
-      elif type(self.metatags[ name ]) is type([]):
-        self.metatags[ name ].append(content)
-      else:
-        self.metatags[ name ] = [self.metatags[ name ], content]
-
-security.declarePublic('formatRFC822Headers')
-def formatRFC822Headers( headers ):
-
-  """ Convert the key-value pairs in 'headers' to valid RFC822-style
-      headers, including adding leading whitespace to elements which
-      contain newlines in order to preserve continuation-line semantics.
-
-      This code is taken from Products.CMFDefault.utils and modified
-      for ERP5 purpose
-  """
-  munged = []
-  linesplit = re.compile( r'[\n\r]+?' )
-
-  for key, value in headers:
-    if value is not None:
-      if type(value) in (type([]), type(())):
-        vallines = map(lambda x: str(x), value)
-      else:
-        vallines = linesplit.split( str(value) )
-      munged.append( '%s: %s' % ( key, '\r\n  '.join( vallines ) ) )
-
-  return '\r\n'.join( munged )

 class TextContent:
  """
@@ -87,30 +43,28 @@ class TextContent:

  security = ClassSecurityInfo()

-  security.declarePrivate('guessFormat')
-  def guessFormat(self, text):
-    """ Simple stab at guessing the inner format of the text """
-    if html_headcheck(text):
-      return 'text/html'
-    else:
-      return 'text/structured'
-
-  security.declarePrivate('handleText')
-  def handleText(self, text, format=None):
-    """ Handles the raw text, returning headers, body, format """
-    headers = {}
-    if not format:
-      format = self.guessFormat(text)
-    if format == 'text/html':
-      parser = SimpleHTMLParser()
-      parser.feed(text)
-      headers.update(parser.metatags)
-      if parser.title:
-        headers['title'] = parser.title
-      body = bodyfinder(text)
-    else:
-      headers, body = parseHeadersBody(text, headers)
-    return headers, text, format
+  security.declarePrivate('parseHeadersFromText')
+  def parseHeadersFromText(self, text):
+    """ Handles the raw text, returning headers """
+    try:
+      tree = html.fromstring(text)
+      if tree.tag != "html":
+        raise Exception
+    except Exception:
+      # this is probably not html code, try rfc822 parsing
+      message = email.message_from_string(text)
+      return {k.capitalize(): '\n'.join(message.get_all(k))
+              for k in message.keys()}
+
+    headers = collections.defaultdict(list)
+    for meta in tree.iterfind(".//meta"):
+      name = meta.get("name")
+      if name:
+        headers[name.capitalize()].append(meta.get("content"))
+    title = tree.find("head/title")
+    if title is not None:
+      headers["title"] = title.text
+    return {k: v if len(v) > 1 else v[0] for k, v in headers.iteritems()}

  ## FTP handlers
  security.declareProtected(Permissions.ModifyPortalContent, 'PUT')
@@ -123,18 +77,11 @@ class TextContent:
    body = REQUEST.get('BODY', '')

    try:
-      headers, body, format = self.handleText(text=body)
+      headers = self.parseHeadersFromText(body)
      content_type = REQUEST.get_header('Content-Type', '')
      headers.setdefault('content_type', content_type)
      headers['file'] = body
      self._edit(**headers)
-    except 'EditingConflict', msg:
-      # XXX Can we get an error msg through?  Should we be raising an
-      #     exception, to be handled in the FTP mechanism?  Inquiring
-      #     minds...
-      transaction.abort()
-      RESPONSE.setStatus(450)
-      return RESPONSE
    except ResourceLockedError, msg:
      transaction.abort()
      RESPONSE.setStatus(423)

--- a/product/PloneHotfix20121106/safe_html.py
+++ b/product/PloneHotfix20121106/safe_html.py
 import re
 from HTMLParser import HTMLParser, HTMLParseError
-from Products.CMFDefault.utils import IllegalHTML
+from Products.PortalTransforms.libtransforms.utils import IllegalHTML
 from Products.PortalTransforms.transforms import safe_html

 CHARREF_RE = re.compile(r"&(?:amp;)?#([xX]?[0-9a-fA-F]+);?")

--- a/product/PortalTransforms/transforms/html_body.py
+++ b/product/PortalTransforms/transforms/html_body.py
 from Products.PortalTransforms.interfaces import ITransform
 from zope.interface import implements
-from Products.CMFDefault.utils import bodyfinder
+from Products.PortalTransforms.libtransforms.utils import bodyfinder

 class HTMLBody:
    """Simple transform which extracts the content of the body tag"""

--- a/product/PortalTransforms/transforms/pdf_to_html.py
+++ b/product/PortalTransforms/transforms/pdf_to_html.py
@@ -4,10 +4,10 @@ Uses the http://sf.net/projects/pdftohtml bin to do its handy work
 """
 from Products.PortalTransforms.interfaces import ITransform
 from zope.interface import implements
-from Products.PortalTransforms.libtransforms.utils import bin_search, sansext
+from Products.PortalTransforms.libtransforms.utils import bin_search, \
+  bodyfinder, sansext
 from Products.PortalTransforms.libtransforms.commandtransform import commandtransform
 from Products.PortalTransforms.libtransforms.commandtransform import popentransform
-from Products.CMFDefault.utils import bodyfinder
 import os

 class popen_pdf_to_html(popentransform):

--- a/product/PortalTransforms/transforms/rtf_to_html.py
+++ b/product/PortalTransforms/transforms/rtf_to_html.py
@@ -4,9 +4,9 @@ Uses the http://freshmeat.net/projects/rtfconverter/ bin to do its handy work

 from Products.PortalTransforms.interfaces import ITransform
 from zope.interface import implements
-from Products.PortalTransforms.libtransforms.utils import bin_search, sansext
+from Products.PortalTransforms.libtransforms.utils import bin_search, \
+  bodyfinder, sansext
 from Products.PortalTransforms.libtransforms.commandtransform import commandtransform
-from Products.CMFDefault.utils import bodyfinder
 import os

 class rtf_to_html(commandtransform):

--- a/product/PortalTransforms/transforms/safe_html.py
+++ b/product/PortalTransforms/transforms/safe_html.py
@@ -8,10 +8,7 @@ import codecs
 from Products.PortalTransforms.interfaces import ITransform
 from zope.interface import implements
 from Products.PortalTransforms.utils import log
-from Products.CMFDefault.utils import IllegalHTML
-from Products.CMFDefault.utils import SimpleHTMLParser
-from Products.CMFDefault.utils import VALID_TAGS
-from Products.CMFDefault.utils import NASTY_TAGS
+from Products.PortalTransforms.libtransforms.utils import IllegalHTML
 from Products.PortalTransforms.utils import safeToInt

 from lxml import etree
@@ -24,10 +21,64 @@ except ImportError:
  # Means BeautifulSoup module is not installed
  soupfromstring = None
 # tag mapping: tag -> short or long tag
-VALID_TAGS = VALID_TAGS.copy()
-NASTY_TAGS = NASTY_TAGS.copy()
-
-# add some tags to allowed types. These should be backported to CMFDefault.
+VALID_TAGS = {
+  'a': 1,
+  'b': 1,
+  'base': 0,
+  'big': 1,
+  'blockquote': 1,
+  'body': 1,
+  'br': 0,
+  'caption': 1,
+  'cite': 1,
+  'code': 1,
+  'dd': 1,
+  'div': 1,
+  'dl': 1,
+  'dt': 1,
+  'em': 1,
+  'h1': 1,
+  'h2': 1,
+  'h3': 1,
+  'h4': 1,
+  'h5': 1,
+  'h6': 1,
+  'head': 1,
+  'hr': 0,
+  'html': 1,
+  'i': 1,
+  'img': 0,
+  'kbd': 1,
+  'li': 1,
+#  'link': 1, type="script" hoses us
+  'meta': 0,
+  'ol': 1,
+  'p': 1,
+  'pre': 1,
+  'small': 1,
+  'span': 1,
+  'strong': 1,
+  'sub': 1,
+  'sup': 1,
+  'table': 1,
+  'tbody': 1,
+  'td': 1,
+  'th': 1,
+  'title': 1,
+  'tr': 1,
+  'tt': 1,
+  'u': 1,
+  'ul': 1,
+  }
+
+NASTY_TAGS = {
+  'script': 1,
+  'object': 1,
+  'embed': 1,
+  'applet': 1,
+  }
+
+# add some tags to allowed types.
 VALID_TAGS['ins'] = 1
 VALID_TAGS['del'] = 1
 VALID_TAGS['q'] = 1
@@ -72,7 +123,7 @@ VALID_TAGS['source'] = 1
 VALID_TAGS['time'] = 1
 VALID_TAGS['video'] = 1

-# add some tags to nasty. These should also probably be backported to CMFDefault.
+# add some tags to nasty.
 NASTY_TAGS['style'] = 1  # this helps improve Word HTML cleanup.
 NASTY_TAGS['meta'] = 1  # allowed by parsers, but can cause unexpected behavior


--- a/product/PortalTransforms/transforms/textile_to_html.py
+++ b/product/PortalTransforms/transforms/textile_to_html.py
@@ -8,8 +8,6 @@ import os

 from zope.interface import implements

-from Products.CMFDefault.utils import bodyfinder
-
 from Products.PortalTransforms.interfaces import ITransform
 from Products.PortalTransforms.libtransforms.commandtransform import commandtransform
 from Products.PortalTransforms.libtransforms.utils import bin_search