1.4.1-nexedi - 2009-03-03

========================= * Allowed the abbr, acronym, var, dfn, samp, address, bdo, thead, tfoot, col, and colgroup tags by default, since they are harmless, valid XHTML and shouldn't be filtered. Fixes: http://dev.plone.org/plone/ticket/6712 and http://dev.plone.org/plone/ticket/7251 [limi] (backport from 1.5.5-final) * Add another XSS fix from for handling extraneous brackets. [dunny] (backport from 1.5.3-final) * Add XSS fixes from Anton Stonor to safe_html transform. [alecm, stonor] (backport from 1.5.3-final) * casting to int is evil without previous check of the type. so we assume as in CMFPlone just zero for non-int-castable values. [jensens] (backport from 1.5.0-a1) * the values in the safe_html valid tag dictionary can become strings when modifying them via the ZMI. Explicitly convert them to integers before testing their value. [wichert] (backport from 1.5.0-a1) git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@25842 20353a03-c40f-0410-a6d1-a30d3c3de9de

1.4.1-nexedi - 2009-03-03
========================= * Allowed the abbr, acronym, var, dfn, samp, address, bdo, thead, tfoot, col, and colgroup tags by default, since they are harmless, valid XHTML and shouldn't be filtered. Fixes: http://dev.plone.org/plone/ticket/6712 and http://dev.plone.org/plone/ticket/7251 [limi] (backport from 1.5.5-final) * Add another XSS fix from for handling extraneous brackets. [dunny] (backport from 1.5.3-final) * Add XSS fixes from Anton Stonor to safe_html transform. [alecm, stonor] (backport from 1.5.3-final) * casting to int is evil without previous check of the type. so we assume as in CMFPlone just zero for non-int-castable values. [jensens] (backport from 1.5.0-a1) * the values in the safe_html valid tag dictionary can become strings when modifying them via the ZMI. Explicitly convert them to integers before testing their value. [wichert] (backport from 1.5.0-a1) git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@25842 20353a03-c40f-0410-a6d1-a30d3c3de9de
5c5173aa · Kazuhiko Shiozaki · aea8feb7 · 5c5173aa · 5c5173aa · 5c5173aa
Commit 5c5173aa authored Mar 03, 2009 by Kazuhiko Shiozaki
3 changed files
--- a/product/PortalTransforms/HISTORY.txt
+++ b/product/PortalTransforms/HISTORY.txt
+1.4.1-nexedi - 2009-03-03
+=========================
+  * Allowed the abbr, acronym, var, dfn, samp, address, bdo, thead, tfoot,
+    col, and colgroup tags by default, since they are harmless, valid XHTML
+    and shouldn't be filtered. Fixes:
+    http://dev.plone.org/plone/ticket/6712 and
+    http://dev.plone.org/plone/ticket/7251
+    [limi] (backport from 1.5.5-final)
+  * Add another XSS fix from for handling extraneous brackets.
+  [dunny] (backport from 1.5.3-final)
+  * Add XSS fixes from Anton Stonor to safe_html transform.
+  [alecm, stonor] (backport from 1.5.3-final)
+  * casting to int is evil without previous check of the type. so we assume as
+    in CMFPlone just zero for non-int-castable values.
+    [jensens] (backport from 1.5.0-a1)
+  * the values in the safe_html valid tag dictionary can become strings when
+    modifying them via the ZMI. Explicitly convert them to integers before
+    testing their value.
+    [wichert] (backport from 1.5.0-a1)
 1.4.0-nexedi - 2008-10-24
-========================
+=========================
  * fixed an infinite loop bug.
  * remove PortalTransforms/configure.zcml that is not compatible with
-    Zope-2.8's five.
+    Zope-2.8's Five (1.0.2).
  * let the user configure 'initial_header_level' (cf
    'rest-header-level' directive).
@@ -16,7 +41,7 @@
    transform tool will be copied and this transform will be removed
    from copied one.
-1.4.0-final - 2006-06-16
+1.4.1-final - 2006-09-08
 ========================
  * Shut down a noisy logging message to DEBUG level.

--- a/product/PortalTransforms/transforms/safe_html.py
+++ b/product/PortalTransforms/transforms/safe_html.py
 import logging
 from sgmllib import SGMLParser
+import re
+from cgi import escape
 from Products.PortalTransforms.interfaces import itransform
 from Products.PortalTransforms.utils import log
@@ -8,17 +10,29 @@ from Products.CMFDefault.utils import IllegalHTML
 from Products.CMFDefault.utils import SimpleHTMLParser
 from Products.CMFDefault.utils import VALID_TAGS
 from Products.CMFDefault.utils import NASTY_TAGS
+from Products.PortalTransforms.utils import safeToInt
 # tag mapping: tag -> short or long tag
 VALID_TAGS = VALID_TAGS.copy()
 NASTY_TAGS = NASTY_TAGS.copy()
-# add some tags to allowed types. This should be fixed in CMFDefault
+# add some tags to allowed types. These should be backported to CMFDefault.
 VALID_TAGS['ins'] = 1
 VALID_TAGS['del'] = 1
 VALID_TAGS['q'] = 1
-VALID_TAGS['map']=1
+VALID_TAGS['map'] = 1
-VALID_TAGS['area']=1
+VALID_TAGS['area'] = 1
+VALID_TAGS['abbr'] = 1
+VALID_TAGS['acronym'] = 1
+VALID_TAGS['var'] = 1
+VALID_TAGS['dfn'] = 1
+VALID_TAGS['samp'] = 1
+VALID_TAGS['address'] = 1
+VALID_TAGS['bdo'] = 1
+VALID_TAGS['thead'] = 1
+VALID_TAGS['tfoot'] = 1
+VALID_TAGS['col'] = 1
+VALID_TAGS['colgroup'] = 1
 msg_pat = """
 <div class="system-message">
@@ -26,6 +40,33 @@ msg_pat = """
 %s</d>
 """
+def hasScript(s):
+   """ Dig out evil Java/VB script inside an HTML attribute """
+   # look for "script" and "expression"
+   javascript_pattern = re.compile("([\s\n]*?s[\s\n]*?c[\s\n]*?r[\s\n]*?i[\s\n]*?p[\s\n]*?t[\s\n]*?:)|([\s\n]*?e[\s\n]*?x[\s\n]*?p[\s\n]*?r[\s\n]*?e[\s\n]*?s[\s\n]*?s[\s\n]*?i[\s\n]*?o[\s\n]*?n)", re.DOTALL|re.IGNORECASE)
+   s = decode_htmlentities(s)
+   return javascript_pattern.findall(s)
+def decode_htmlentities(s):
+   """ XSS code can be hidden with htmlentities """
+   entity_pattern = re.compile("&#(?P<htmlentity>x?\w+)?;?")
+   s = entity_pattern.sub(decode_htmlentity,s)
+   return s
+def decode_htmlentity(m):
+   entity_value = m.groupdict()['htmlentity']
+   if entity_value.lower().startswith('x'):
+      try:
+          return chr(int('0'+entity_value,16))
+      except ValueError:
+          return entity_value
+   try:
+      return chr(int(entity_value))
+   except ValueError:
+      return entity_value
 class StrippingParser(SGMLParser):
    """Pass only allowed tags;  raise exception for known-bad.
@@ -47,7 +88,7 @@ class StrippingParser(SGMLParser):
    def handle_data(self, data):
        if self.suppress: return
        if data:
-            self.result.append(data)
+            self.result.append(escape(data))
    def handle_charref(self, name):
        if self.suppress: return
@@ -74,23 +115,24 @@ class StrippingParser(SGMLParser):
        """
        if self.suppress: return
-        #if self.remove_javascript and tag == script and :
        if self.valid.has_key(tag):
            self.result.append('<' + tag)
+            remove_script = getattr(self,'remove_javascript',True)
            for k, v in attrs:
-                if self.remove_javascript and k.strip().lower().startswith('on'):
+                if remove_script and k.strip().lower().startswith('on'):
                    if not self.raise_error: continue
-                    else: raise IllegalHTML, 'Javascript event "%s" not allowed.' % k
+                    else: raise IllegalHTML, 'Script event "%s" not allowed.' % k
-                elif self.remove_javascript and v.strip().lower().startswith('javascript:' ):
+                elif remove_script and hasScript(v):
                    if not self.raise_error: continue
-                    else: raise IllegalHTML, 'Javascript URI "%s" not allowed.' % v
+                    else: raise IllegalHTML, 'Script URI "%s" not allowed.' % v
                else:
                    self.result.append(' %s="%s"' % (k, v))
            #UNUSED endTag = '</%s>' % tag
-            if self.valid.get(tag):
+            if safeToInt(self.valid.get(tag)):
                self.result.append('>')
            else:
                self.result.append(' />')
@@ -103,10 +145,10 @@ class StrippingParser(SGMLParser):
            pass
    def unknown_endtag(self, tag):
-        if self.nasty.has_key(tag):
+        if self.nasty.has_key(tag) and not self.valid.has_key(tag):
            self.suppress = False
        if self.suppress: return
-        if self.valid.get(tag):
+        if safeToInt(self.valid.get(tag)):
            self.result.append('</%s>' % tag)
            #remTag = '</%s>' % tag
@@ -127,13 +169,19 @@ def scrubHTML(html, valid=VALID_TAGS, nasty=NASTY_TAGS,
 class SafeHTML:
    """Simple transform which uses CMFDefault functions to
-    clean potentially bad tags.
+    clean potentially bad tags.   
-    Tags must explicit be allowed in valid_tags to pass. Only the tags
-    themself are removed, not their contents. If tags are
+    Tags must explicit be allowed in valid_tags to pass. Only
-    removed and in nasty_tags, they are removed with
+    the tags themself are removed, not their contents. If tags
-    all of their contents.
+    are removed and in nasty_tags, they are removed with
-    Be aware that you may need to clean the cache to let a Object
+    all of their contents.         
-    call the transform again.
+    Objects will not be transformed again with changed settings.
+    You need to clear the cache by e.g.
+    1.) restarting your zope or
+    2.) empty the zodb-cache via ZMI -> Control_Panel
+        -> Database Management -> main || other_used_database
+        -> Flush Cache.
    """
    __implements__ = itransform

--- a/product/PortalTransforms/utils.py
+++ b/product/PortalTransforms/utils.py
@@ -28,3 +28,12 @@ from zExceptions import BadRequest
 import os.path
 _www = os.path.join(os.path.dirname(__file__), 'www')
 skins_dir = None
+def safeToInt(value):
+    """Convert value to integer or just return 0 if we can't"""
+    try:
+        return int(value)
+    except ValueError:
+        return 0
+    except TypeError:
+        return 0