Commit dde46fed by Bryton Lacquement

Get rid of Products.CMFDefault.utils

parent c21014cd
......@@ -32,8 +32,7 @@ from zExceptions import Forbidden
from Products.ERP5Type import Permissions, PropertySheet, interfaces
from Products.ERP5Type.Base import Base
from Products.ERP5Type.Utils import deprecated
from Products.CMFDefault.utils import formatRFC822Headers
from Products.ERP5Type.Utils import deprecated, formatRFC822Headers
import re
_marker = object()
......
......@@ -1767,3 +1767,26 @@ def reencodeUrlEscapes(url):
url += [_reencodeUrlEscapes_map[c] for c in part]
except StopIteration:
return ''.join(url)
#####################################################
# Replacement for Products.CMFDefault
#####################################################
def formatRFC822Headers(headers):
""" Convert the key-value pairs in 'headers' to valid RFC822-style
headers, including adding leading whitespace to elements which
contain newlines in order to preserve continuation-line semantics.
This code is taken from Products.CMFDefault.utils and modified
for ERP5 purpose
"""
munged = []
linesplit = re.compile(r'[\n\r]+?')
for key, value in headers:
if value is not None:
if type(value) in (list, tuple):
vallines = map(str, value)
else:
vallines = linesplit.split(str(value))
munged.append('%s: %s' % (key, '\r\n '.join(vallines)))
return '\r\n'.join(munged)
......@@ -13,67 +13,23 @@
#
##############################################################################
import collections
import email
import re
import transaction
from lxml import html
from Products.ERP5Type.Utils import formatRFC822Headers
from Acquisition import aq_parent, aq_inner, aq_base
from AccessControl import ClassSecurityInfo, ModuleSecurityInfo
from Products.ERP5Type.Globals import InitializeClass
from Products.ERP5Type import Permissions, PropertySheet, Constraint
from Products.CMFCore.PortalContent import ResourceLockedError
from Products.CMFCore.utils import getToolByName
from Products.CMFDefault.utils import parseHeadersBody
from Products.CMFDefault.utils import html_headcheck
from Products.CMFDefault.utils import bodyfinder
from Products.CMFDefault.utils import SimpleHTMLParser as CMFSimpleHTMLParser
from zLOG import LOG
from zExceptions import Forbidden
security = ModuleSecurityInfo( 'Products.ERP5Type.WebDAVSupport' )
class SimpleHTMLParser(CMFSimpleHTMLParser):
def do_meta( self, attrs ):
name = ''
content = ''
for attrname, value in attrs:
value = value.strip()
if attrname == "name":
name = value.capitalize()
if attrname == "content":
content = value
if name:
if not self.metatags.has_key(name):
self.metatags[ name ] = content
elif type(self.metatags[ name ]) is type([]):
self.metatags[ name ].append(content)
else:
self.metatags[ name ] = [self.metatags[ name ], content]
security.declarePublic('formatRFC822Headers')
def formatRFC822Headers( headers ):
""" Convert the key-value pairs in 'headers' to valid RFC822-style
headers, including adding leading whitespace to elements which
contain newlines in order to preserve continuation-line semantics.
This code is taken from Products.CMFDefault.utils and modified
for ERP5 purpose
"""
munged = []
linesplit = re.compile( r'[\n\r]+?' )
for key, value in headers:
if value is not None:
if type(value) in (type([]), type(())):
vallines = map(lambda x: str(x), value)
else:
vallines = linesplit.split( str(value) )
munged.append( '%s: %s' % ( key, '\r\n '.join( vallines ) ) )
return '\r\n'.join( munged )
class TextContent:
"""
......@@ -87,30 +43,28 @@ class TextContent:
security = ClassSecurityInfo()
security.declarePrivate('guessFormat')
def guessFormat(self, text):
""" Simple stab at guessing the inner format of the text """
if html_headcheck(text):
return 'text/html'
else:
return 'text/structured'
security.declarePrivate('handleText')
def handleText(self, text, format=None):
""" Handles the raw text, returning headers, body, format """
headers = {}
if not format:
format = self.guessFormat(text)
if format == 'text/html':
parser = SimpleHTMLParser()
parser.feed(text)
headers.update(parser.metatags)
if parser.title:
headers['title'] = parser.title
body = bodyfinder(text)
else:
headers, body = parseHeadersBody(text, headers)
return headers, text, format
security.declarePrivate('parseHeadersFromText')
def parseHeadersFromText(self, text):
""" Handles the raw text, returning headers """
try:
tree = html.fromstring(text)
if tree.tag != "html":
raise Exception
except Exception:
# this is probably not html code, try rfc822 parsing
message = email.message_from_string(text)
return {k.capitalize(): '\n'.join(message.get_all(k))
for k in message.keys()}
headers = collections.defaultdict(list)
for meta in tree.iterfind(".//meta"):
name = meta.get("name")
if name:
headers[name.capitalize()].append(meta.get("content"))
title = tree.find("head/title")
if title is not None:
headers["title"] = title.text
return {k: v if len(v) > 1 else v[0] for k, v in headers.iteritems()}
## FTP handlers
security.declareProtected(Permissions.ModifyPortalContent, 'PUT')
......@@ -123,18 +77,11 @@ class TextContent:
body = REQUEST.get('BODY', '')
try:
headers, body, format = self.handleText(text=body)
headers = self.parseHeadersFromText(body)
content_type = REQUEST.get_header('Content-Type', '')
headers.setdefault('content_type', content_type)
headers['file'] = body
self._edit(**headers)
except 'EditingConflict', msg:
# XXX Can we get an error msg through? Should we be raising an
# exception, to be handled in the FTP mechanism? Inquiring
# minds...
transaction.abort()
RESPONSE.setStatus(450)
return RESPONSE
except ResourceLockedError, msg:
transaction.abort()
RESPONSE.setStatus(423)
......
import re
from HTMLParser import HTMLParser, HTMLParseError
from Products.CMFDefault.utils import IllegalHTML
from Products.PortalTransforms.libtransforms.utils import IllegalHTML
from Products.PortalTransforms.transforms import safe_html
CHARREF_RE = re.compile(r"&(?:amp;)?#([xX]?[0-9a-fA-F]+);?")
......
from Products.PortalTransforms.interfaces import ITransform
from zope.interface import implements
from Products.CMFDefault.utils import bodyfinder
from Products.PortalTransforms.libtransforms.utils import bodyfinder
class HTMLBody:
"""Simple transform which extracts the content of the body tag"""
......
......@@ -4,10 +4,10 @@ Uses the http://sf.net/projects/pdftohtml bin to do its handy work
"""
from Products.PortalTransforms.interfaces import ITransform
from zope.interface import implements
from Products.PortalTransforms.libtransforms.utils import bin_search, sansext
from Products.PortalTransforms.libtransforms.utils import bin_search, \
bodyfinder, sansext
from Products.PortalTransforms.libtransforms.commandtransform import commandtransform
from Products.PortalTransforms.libtransforms.commandtransform import popentransform
from Products.CMFDefault.utils import bodyfinder
import os
class popen_pdf_to_html(popentransform):
......
......@@ -4,9 +4,9 @@ Uses the http://freshmeat.net/projects/rtfconverter/ bin to do its handy work
from Products.PortalTransforms.interfaces import ITransform
from zope.interface import implements
from Products.PortalTransforms.libtransforms.utils import bin_search, sansext
from Products.PortalTransforms.libtransforms.utils import bin_search, \
bodyfinder, sansext
from Products.PortalTransforms.libtransforms.commandtransform import commandtransform
from Products.CMFDefault.utils import bodyfinder
import os
class rtf_to_html(commandtransform):
......
......@@ -8,10 +8,7 @@ import codecs
from Products.PortalTransforms.interfaces import ITransform
from zope.interface import implements
from Products.PortalTransforms.utils import log
from Products.CMFDefault.utils import IllegalHTML
from Products.CMFDefault.utils import SimpleHTMLParser
from Products.CMFDefault.utils import VALID_TAGS
from Products.CMFDefault.utils import NASTY_TAGS
from Products.PortalTransforms.libtransforms.utils import IllegalHTML
from Products.PortalTransforms.utils import safeToInt
from lxml import etree
......@@ -24,10 +21,64 @@ except ImportError:
# Means BeautifulSoup module is not installed
soupfromstring = None
# tag mapping: tag -> short or long tag
VALID_TAGS = VALID_TAGS.copy()
NASTY_TAGS = NASTY_TAGS.copy()
# add some tags to allowed types. These should be backported to CMFDefault.
VALID_TAGS = {
'a': 1,
'b': 1,
'base': 0,
'big': 1,
'blockquote': 1,
'body': 1,
'br': 0,
'caption': 1,
'cite': 1,
'code': 1,
'dd': 1,
'div': 1,
'dl': 1,
'dt': 1,
'em': 1,
'h1': 1,
'h2': 1,
'h3': 1,
'h4': 1,
'h5': 1,
'h6': 1,
'head': 1,
'hr': 0,
'html': 1,
'i': 1,
'img': 0,
'kbd': 1,
'li': 1,
# 'link': 1, type="script" hoses us
'meta': 0,
'ol': 1,
'p': 1,
'pre': 1,
'small': 1,
'span': 1,
'strong': 1,
'sub': 1,
'sup': 1,
'table': 1,
'tbody': 1,
'td': 1,
'th': 1,
'title': 1,
'tr': 1,
'tt': 1,
'u': 1,
'ul': 1,
}
NASTY_TAGS = {
'script': 1,
'object': 1,
'embed': 1,
'applet': 1,
}
# add some tags to allowed types.
VALID_TAGS['ins'] = 1
VALID_TAGS['del'] = 1
VALID_TAGS['q'] = 1
......@@ -72,7 +123,7 @@ VALID_TAGS['source'] = 1
VALID_TAGS['time'] = 1
VALID_TAGS['video'] = 1
# add some tags to nasty. These should also probably be backported to CMFDefault.
# add some tags to nasty.
NASTY_TAGS['style'] = 1 # this helps improve Word HTML cleanup.
NASTY_TAGS['meta'] = 1 # allowed by parsers, but can cause unexpected behavior
......
......@@ -8,8 +8,6 @@ import os
from zope.interface import implements
from Products.CMFDefault.utils import bodyfinder
from Products.PortalTransforms.interfaces import ITransform
from Products.PortalTransforms.libtransforms.commandtransform import commandtransform
from Products.PortalTransforms.libtransforms.utils import bin_search
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment