Commit dde46fed authored by Bryton Lacquement's avatar Bryton Lacquement 🚪

Get rid of Products.CMFDefault.utils

parent c21014cd
...@@ -32,8 +32,7 @@ from zExceptions import Forbidden ...@@ -32,8 +32,7 @@ from zExceptions import Forbidden
from Products.ERP5Type import Permissions, PropertySheet, interfaces from Products.ERP5Type import Permissions, PropertySheet, interfaces
from Products.ERP5Type.Base import Base from Products.ERP5Type.Base import Base
from Products.ERP5Type.Utils import deprecated from Products.ERP5Type.Utils import deprecated, formatRFC822Headers
from Products.CMFDefault.utils import formatRFC822Headers
import re import re
_marker = object() _marker = object()
......
...@@ -1767,3 +1767,26 @@ def reencodeUrlEscapes(url): ...@@ -1767,3 +1767,26 @@ def reencodeUrlEscapes(url):
url += [_reencodeUrlEscapes_map[c] for c in part] url += [_reencodeUrlEscapes_map[c] for c in part]
except StopIteration: except StopIteration:
return ''.join(url) return ''.join(url)
#####################################################
# Replacement for Products.CMFDefault
#####################################################
def formatRFC822Headers(headers):
""" Convert the key-value pairs in 'headers' to valid RFC822-style
headers, including adding leading whitespace to elements which
contain newlines in order to preserve continuation-line semantics.
This code is taken from Products.CMFDefault.utils and modified
for ERP5 purpose
"""
munged = []
linesplit = re.compile(r'[\n\r]+?')
for key, value in headers:
if value is not None:
if type(value) in (list, tuple):
vallines = map(str, value)
else:
vallines = linesplit.split(str(value))
munged.append('%s: %s' % (key, '\r\n '.join(vallines)))
return '\r\n'.join(munged)
...@@ -13,67 +13,23 @@ ...@@ -13,67 +13,23 @@
# #
############################################################################## ##############################################################################
import collections
import email
import re import re
import transaction import transaction
from lxml import html
from Products.ERP5Type.Utils import formatRFC822Headers
from Acquisition import aq_parent, aq_inner, aq_base from Acquisition import aq_parent, aq_inner, aq_base
from AccessControl import ClassSecurityInfo, ModuleSecurityInfo from AccessControl import ClassSecurityInfo, ModuleSecurityInfo
from Products.ERP5Type.Globals import InitializeClass from Products.ERP5Type.Globals import InitializeClass
from Products.ERP5Type import Permissions, PropertySheet, Constraint from Products.ERP5Type import Permissions, PropertySheet, Constraint
from Products.CMFCore.PortalContent import ResourceLockedError from Products.CMFCore.PortalContent import ResourceLockedError
from Products.CMFCore.utils import getToolByName from Products.CMFCore.utils import getToolByName
from Products.CMFDefault.utils import parseHeadersBody
from Products.CMFDefault.utils import html_headcheck
from Products.CMFDefault.utils import bodyfinder
from Products.CMFDefault.utils import SimpleHTMLParser as CMFSimpleHTMLParser
from zLOG import LOG from zLOG import LOG
from zExceptions import Forbidden from zExceptions import Forbidden
security = ModuleSecurityInfo( 'Products.ERP5Type.WebDAVSupport' ) security = ModuleSecurityInfo( 'Products.ERP5Type.WebDAVSupport' )
class SimpleHTMLParser(CMFSimpleHTMLParser):
def do_meta( self, attrs ):
name = ''
content = ''
for attrname, value in attrs:
value = value.strip()
if attrname == "name":
name = value.capitalize()
if attrname == "content":
content = value
if name:
if not self.metatags.has_key(name):
self.metatags[ name ] = content
elif type(self.metatags[ name ]) is type([]):
self.metatags[ name ].append(content)
else:
self.metatags[ name ] = [self.metatags[ name ], content]
security.declarePublic('formatRFC822Headers')
def formatRFC822Headers( headers ):
""" Convert the key-value pairs in 'headers' to valid RFC822-style
headers, including adding leading whitespace to elements which
contain newlines in order to preserve continuation-line semantics.
This code is taken from Products.CMFDefault.utils and modified
for ERP5 purpose
"""
munged = []
linesplit = re.compile( r'[\n\r]+?' )
for key, value in headers:
if value is not None:
if type(value) in (type([]), type(())):
vallines = map(lambda x: str(x), value)
else:
vallines = linesplit.split( str(value) )
munged.append( '%s: %s' % ( key, '\r\n '.join( vallines ) ) )
return '\r\n'.join( munged )
class TextContent: class TextContent:
""" """
...@@ -87,30 +43,28 @@ class TextContent: ...@@ -87,30 +43,28 @@ class TextContent:
security = ClassSecurityInfo() security = ClassSecurityInfo()
security.declarePrivate('guessFormat') security.declarePrivate('parseHeadersFromText')
def guessFormat(self, text): def parseHeadersFromText(self, text):
""" Simple stab at guessing the inner format of the text """ """ Handles the raw text, returning headers """
if html_headcheck(text): try:
return 'text/html' tree = html.fromstring(text)
else: if tree.tag != "html":
return 'text/structured' raise Exception
except Exception:
security.declarePrivate('handleText') # this is probably not html code, try rfc822 parsing
def handleText(self, text, format=None): message = email.message_from_string(text)
""" Handles the raw text, returning headers, body, format """ return {k.capitalize(): '\n'.join(message.get_all(k))
headers = {} for k in message.keys()}
if not format:
format = self.guessFormat(text) headers = collections.defaultdict(list)
if format == 'text/html': for meta in tree.iterfind(".//meta"):
parser = SimpleHTMLParser() name = meta.get("name")
parser.feed(text) if name:
headers.update(parser.metatags) headers[name.capitalize()].append(meta.get("content"))
if parser.title: title = tree.find("head/title")
headers['title'] = parser.title if title is not None:
body = bodyfinder(text) headers["title"] = title.text
else: return {k: v if len(v) > 1 else v[0] for k, v in headers.iteritems()}
headers, body = parseHeadersBody(text, headers)
return headers, text, format
## FTP handlers ## FTP handlers
security.declareProtected(Permissions.ModifyPortalContent, 'PUT') security.declareProtected(Permissions.ModifyPortalContent, 'PUT')
...@@ -123,18 +77,11 @@ class TextContent: ...@@ -123,18 +77,11 @@ class TextContent:
body = REQUEST.get('BODY', '') body = REQUEST.get('BODY', '')
try: try:
headers, body, format = self.handleText(text=body) headers = self.parseHeadersFromText(body)
content_type = REQUEST.get_header('Content-Type', '') content_type = REQUEST.get_header('Content-Type', '')
headers.setdefault('content_type', content_type) headers.setdefault('content_type', content_type)
headers['file'] = body headers['file'] = body
self._edit(**headers) self._edit(**headers)
except 'EditingConflict', msg:
# XXX Can we get an error msg through? Should we be raising an
# exception, to be handled in the FTP mechanism? Inquiring
# minds...
transaction.abort()
RESPONSE.setStatus(450)
return RESPONSE
except ResourceLockedError, msg: except ResourceLockedError, msg:
transaction.abort() transaction.abort()
RESPONSE.setStatus(423) RESPONSE.setStatus(423)
......
import re import re
from HTMLParser import HTMLParser, HTMLParseError from HTMLParser import HTMLParser, HTMLParseError
from Products.CMFDefault.utils import IllegalHTML from Products.PortalTransforms.libtransforms.utils import IllegalHTML
from Products.PortalTransforms.transforms import safe_html from Products.PortalTransforms.transforms import safe_html
CHARREF_RE = re.compile(r"&(?:amp;)?#([xX]?[0-9a-fA-F]+);?") CHARREF_RE = re.compile(r"&(?:amp;)?#([xX]?[0-9a-fA-F]+);?")
......
from Products.PortalTransforms.interfaces import ITransform from Products.PortalTransforms.interfaces import ITransform
from zope.interface import implements from zope.interface import implements
from Products.CMFDefault.utils import bodyfinder from Products.PortalTransforms.libtransforms.utils import bodyfinder
class HTMLBody: class HTMLBody:
"""Simple transform which extracts the content of the body tag""" """Simple transform which extracts the content of the body tag"""
......
...@@ -4,10 +4,10 @@ Uses the http://sf.net/projects/pdftohtml bin to do its handy work ...@@ -4,10 +4,10 @@ Uses the http://sf.net/projects/pdftohtml bin to do its handy work
""" """
from Products.PortalTransforms.interfaces import ITransform from Products.PortalTransforms.interfaces import ITransform
from zope.interface import implements from zope.interface import implements
from Products.PortalTransforms.libtransforms.utils import bin_search, sansext from Products.PortalTransforms.libtransforms.utils import bin_search, \
bodyfinder, sansext
from Products.PortalTransforms.libtransforms.commandtransform import commandtransform from Products.PortalTransforms.libtransforms.commandtransform import commandtransform
from Products.PortalTransforms.libtransforms.commandtransform import popentransform from Products.PortalTransforms.libtransforms.commandtransform import popentransform
from Products.CMFDefault.utils import bodyfinder
import os import os
class popen_pdf_to_html(popentransform): class popen_pdf_to_html(popentransform):
......
...@@ -4,9 +4,9 @@ Uses the http://freshmeat.net/projects/rtfconverter/ bin to do its handy work ...@@ -4,9 +4,9 @@ Uses the http://freshmeat.net/projects/rtfconverter/ bin to do its handy work
from Products.PortalTransforms.interfaces import ITransform from Products.PortalTransforms.interfaces import ITransform
from zope.interface import implements from zope.interface import implements
from Products.PortalTransforms.libtransforms.utils import bin_search, sansext from Products.PortalTransforms.libtransforms.utils import bin_search, \
bodyfinder, sansext
from Products.PortalTransforms.libtransforms.commandtransform import commandtransform from Products.PortalTransforms.libtransforms.commandtransform import commandtransform
from Products.CMFDefault.utils import bodyfinder
import os import os
class rtf_to_html(commandtransform): class rtf_to_html(commandtransform):
......
...@@ -8,10 +8,7 @@ import codecs ...@@ -8,10 +8,7 @@ import codecs
from Products.PortalTransforms.interfaces import ITransform from Products.PortalTransforms.interfaces import ITransform
from zope.interface import implements from zope.interface import implements
from Products.PortalTransforms.utils import log from Products.PortalTransforms.utils import log
from Products.CMFDefault.utils import IllegalHTML from Products.PortalTransforms.libtransforms.utils import IllegalHTML
from Products.CMFDefault.utils import SimpleHTMLParser
from Products.CMFDefault.utils import VALID_TAGS
from Products.CMFDefault.utils import NASTY_TAGS
from Products.PortalTransforms.utils import safeToInt from Products.PortalTransforms.utils import safeToInt
from lxml import etree from lxml import etree
...@@ -24,10 +21,64 @@ except ImportError: ...@@ -24,10 +21,64 @@ except ImportError:
# Means BeautifulSoup module is not installed # Means BeautifulSoup module is not installed
soupfromstring = None soupfromstring = None
# tag mapping: tag -> short or long tag # tag mapping: tag -> short or long tag
VALID_TAGS = VALID_TAGS.copy() VALID_TAGS = {
NASTY_TAGS = NASTY_TAGS.copy() 'a': 1,
'b': 1,
# add some tags to allowed types. These should be backported to CMFDefault. 'base': 0,
'big': 1,
'blockquote': 1,
'body': 1,
'br': 0,
'caption': 1,
'cite': 1,
'code': 1,
'dd': 1,
'div': 1,
'dl': 1,
'dt': 1,
'em': 1,
'h1': 1,
'h2': 1,
'h3': 1,
'h4': 1,
'h5': 1,
'h6': 1,
'head': 1,
'hr': 0,
'html': 1,
'i': 1,
'img': 0,
'kbd': 1,
'li': 1,
# 'link': 1, type="script" hoses us
'meta': 0,
'ol': 1,
'p': 1,
'pre': 1,
'small': 1,
'span': 1,
'strong': 1,
'sub': 1,
'sup': 1,
'table': 1,
'tbody': 1,
'td': 1,
'th': 1,
'title': 1,
'tr': 1,
'tt': 1,
'u': 1,
'ul': 1,
}
NASTY_TAGS = {
'script': 1,
'object': 1,
'embed': 1,
'applet': 1,
}
# add some tags to allowed types.
VALID_TAGS['ins'] = 1 VALID_TAGS['ins'] = 1
VALID_TAGS['del'] = 1 VALID_TAGS['del'] = 1
VALID_TAGS['q'] = 1 VALID_TAGS['q'] = 1
...@@ -72,7 +123,7 @@ VALID_TAGS['source'] = 1 ...@@ -72,7 +123,7 @@ VALID_TAGS['source'] = 1
VALID_TAGS['time'] = 1 VALID_TAGS['time'] = 1
VALID_TAGS['video'] = 1 VALID_TAGS['video'] = 1
# add some tags to nasty. These should also probably be backported to CMFDefault. # add some tags to nasty.
NASTY_TAGS['style'] = 1 # this helps improve Word HTML cleanup. NASTY_TAGS['style'] = 1 # this helps improve Word HTML cleanup.
NASTY_TAGS['meta'] = 1 # allowed by parsers, but can cause unexpected behavior NASTY_TAGS['meta'] = 1 # allowed by parsers, but can cause unexpected behavior
......
...@@ -8,8 +8,6 @@ import os ...@@ -8,8 +8,6 @@ import os
from zope.interface import implements from zope.interface import implements
from Products.CMFDefault.utils import bodyfinder
from Products.PortalTransforms.interfaces import ITransform from Products.PortalTransforms.interfaces import ITransform
from Products.PortalTransforms.libtransforms.commandtransform import commandtransform from Products.PortalTransforms.libtransforms.commandtransform import commandtransform
from Products.PortalTransforms.libtransforms.utils import bin_search from Products.PortalTransforms.libtransforms.utils import bin_search
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment