Commit f17e3d22 authored by Mike Samuel's avatar Mike Samuel

exp/template/html: handle custom attrs and HTML5 embedded elements.

HTML5 allows embedded SVG and MathML.
Code searches show SVG is used for graphing.

This changes transition to deal with constructs like
   <svg xmlns:xlink="http://www.w3.org/1999/xlink">
It changes attr and clients to call a single function that combines
the name lookup and "on" prefix check to determine an attribute
value type given an attribute name.

That function uses heuristics to recognize that
     xlink:href and svg:href
have URL content, and that data-url is likely contains URL content,
since "javascript:" injection is such a problem.

I did a code search over a closure templates codebase to determine
patterns of custom attribute usage.  I did something like

$ find . -name \*.soy | \
    xargs egrep perl -ne 'while (s/\b((data-|\w+:)\w+)\s*=//) { print "$1\n"; }' | \
    sort | uniq

to produce the list at the bottom.

Filtering that by egrep -i 'src|url|uri' produces

data-docConsumptionUri
data-docIconUrl
data-launchUrl
data-lazySrc
data-pageUrl
data-shareurl
data-suggestServerUrl
data-tweetUrl
g:secondaryurls
g:url

which seem to match all the ones that are likely URL content.
There are some short words that match that heuristic, but I still think it decent since
any custom attribute that has a numeric or enumerated keyword value will be unaffected by
the URL assumption.
Counterexamples from /usr/share/dict:
during, hourly, maturity, nourish, purloin, security, surly

Custom attributes present in existing closure templates codebase:
buzz:aid
data-a
data-action
data-actor
data-allowEqualityOps
data-analyticsId
data-bid
data-c
data-cartId
data-categoryId
data-cid
data-command
data-count
data-country
data-creativeId
data-cssToken
data-dest
data-docAttribution
data-docConsumptionUri
data-docCurrencyCode
data-docIconUrl
data-docId
data-docPrice
data-docPriceMicros
data-docTitle
data-docType
data-docid
data-email
data-entityid
data-errorindex
data-f
data-feature
data-fgid
data-filter
data-fireEvent
data-followable
data-followed
data-hashChange
data-height
data-hover
data-href
data-id
data-index
data-invitable
data-isFree
data-isPurchased
data-jid
data-jumpid
data-launchUrl
data-lazySrc
data-listType
data-maxVisiblePages
data-name
data-nid
data-nodeid
data-numItems
data-numPerPage
data-offerType
data-oid
data-opUsesEquality
data-overflowclass
data-packageName
data-pageId
data-pageUrl
data-pos
data-priceBrief
data-profileIds
data-query
data-rating
data-ref
data-rentalGrantPeriodDays
data-rentalactivePeriodHours
data-reviewId
data-role
data-score
data-shareurl
data-showGeLe
data-showLineInclude
data-size
data-sortval
data-suggestServerType
data-suggestServerUrl
data-suggestionIndex
data-tabBarId
data-tabBarIndex
data-tags
data-target
data-textColor
data-theme
data-title
data-toggletarget
data-tooltip
data-trailerId
data-transactionId
data-transition
data-ts
data-tweetContent
data-tweetUrl
data-type
data-useAjax
data-value
data-width
data-x
dm:index
dm:type
g:aspects
g:decorateusingsecondary
g:em
g:entity
g:groups
g:id
g:istoplevel
g:li
g:numresults
g:oid
g:parentId
g:pl
g:pt
g:rating_override
g:secondaryurls
g:sortby
g:startindex
g:target
g:type
g:url
g:value
ga:barsize
ga:css
ga:expandAfterCharsExceed
ga:initialNumRows
ga:nocancelicon
ga:numRowsToExpandTo
ga:type
ga:unlockwhenrated
gw:address
gw:businessname
gw:comment
gw:phone
gw:source
ng:controller
xlink:href
xml:lang
xmlns:atom
xmlns:dc
xmlns:jstd
xmlns:ng
xmlns:og
xmlns:webstore
xmlns:xlink

R=nigeltao
CC=golang-dev
https://golang.org/cl/5119041
parent 582bb304
...@@ -4,181 +4,172 @@ ...@@ -4,181 +4,172 @@
package html package html
// attrType[n] describes the value of the given attribute. import (
"strings"
)
// attrTypeMap[n] describes the value of the given attribute.
// If an attribute affects (or can mask) the encoding or interpretation of // If an attribute affects (or can mask) the encoding or interpretation of
// other content, or affects the contents, idempotency, or credentials of a // other content, or affects the contents, idempotency, or credentials of a
// network message, then the value in this map is contentTypeUnsafe. // network message, then the value in this map is contentTypeUnsafe.
// This map is derived from HTML5, specifically // This map is derived from HTML5, specifically
// http://www.w3.org/TR/html5/Overview.html#attributes-1 and // http://www.w3.org/TR/html5/Overview.html#attributes-1
// http://www.w3.org/TR/html5/Overview.html#event-handlers-on-elements-document-objects-and-window-objects
// as well as "%URI"-typed attributes from // as well as "%URI"-typed attributes from
// http://www.w3.org/TR/html4/index/attributes.html // http://www.w3.org/TR/html4/index/attributes.html
var attrType = map[string]contentType{ var attrTypeMap = map[string]contentType{
"accept": contentTypePlain, "accept": contentTypePlain,
"accept-charset": contentTypeUnsafe, "accept-charset": contentTypeUnsafe,
"action": contentTypeURL, "action": contentTypeURL,
"alt": contentTypePlain, "alt": contentTypePlain,
"archive": contentTypeURL, "archive": contentTypeURL,
"async": contentTypeUnsafe, "async": contentTypeUnsafe,
"autocomplete": contentTypePlain, "autocomplete": contentTypePlain,
"autofocus": contentTypePlain, "autofocus": contentTypePlain,
"autoplay": contentTypePlain, "autoplay": contentTypePlain,
"background": contentTypeURL, "background": contentTypeURL,
"border": contentTypePlain, "border": contentTypePlain,
"checked": contentTypePlain, "checked": contentTypePlain,
"cite": contentTypeURL, "cite": contentTypeURL,
"challenge": contentTypeUnsafe, "challenge": contentTypeUnsafe,
"charset": contentTypeUnsafe, "charset": contentTypeUnsafe,
"class": contentTypePlain, "class": contentTypePlain,
"classid": contentTypeURL, "classid": contentTypeURL,
"codebase": contentTypeURL, "codebase": contentTypeURL,
"cols": contentTypePlain, "cols": contentTypePlain,
"colspan": contentTypePlain, "colspan": contentTypePlain,
"content": contentTypeUnsafe, "content": contentTypeUnsafe,
"contenteditable": contentTypePlain, "contenteditable": contentTypePlain,
"contextmenu": contentTypePlain, "contextmenu": contentTypePlain,
"controls": contentTypePlain, "controls": contentTypePlain,
"coords": contentTypePlain, "coords": contentTypePlain,
"crossorigin": contentTypeUnsafe, "crossorigin": contentTypeUnsafe,
"data": contentTypeURL, "data": contentTypeURL,
"datetime": contentTypePlain, "datetime": contentTypePlain,
"default": contentTypePlain, "default": contentTypePlain,
"defer": contentTypeUnsafe, "defer": contentTypeUnsafe,
"dir": contentTypePlain, "dir": contentTypePlain,
"dirname": contentTypePlain, "dirname": contentTypePlain,
"disabled": contentTypePlain, "disabled": contentTypePlain,
"draggable": contentTypePlain, "draggable": contentTypePlain,
"dropzone": contentTypePlain, "dropzone": contentTypePlain,
"enctype": contentTypeUnsafe, "enctype": contentTypeUnsafe,
"for": contentTypePlain, "for": contentTypePlain,
"form": contentTypeUnsafe, "form": contentTypeUnsafe,
"formaction": contentTypeURL, "formaction": contentTypeURL,
"formenctype": contentTypeUnsafe, "formenctype": contentTypeUnsafe,
"formmethod": contentTypeUnsafe, "formmethod": contentTypeUnsafe,
"formnovalidate": contentTypeUnsafe, "formnovalidate": contentTypeUnsafe,
"formtarget": contentTypePlain, "formtarget": contentTypePlain,
"headers": contentTypePlain, "headers": contentTypePlain,
"height": contentTypePlain, "height": contentTypePlain,
"hidden": contentTypePlain, "hidden": contentTypePlain,
"high": contentTypePlain, "high": contentTypePlain,
"href": contentTypeURL, "href": contentTypeURL,
"hreflang": contentTypePlain, "hreflang": contentTypePlain,
"http-equiv": contentTypeUnsafe, "http-equiv": contentTypeUnsafe,
"icon": contentTypeURL, "icon": contentTypeURL,
"id": contentTypePlain, "id": contentTypePlain,
"ismap": contentTypePlain, "ismap": contentTypePlain,
"keytype": contentTypeUnsafe, "keytype": contentTypeUnsafe,
"kind": contentTypePlain, "kind": contentTypePlain,
"label": contentTypePlain, "label": contentTypePlain,
"lang": contentTypePlain, "lang": contentTypePlain,
"language": contentTypeUnsafe, "language": contentTypeUnsafe,
"list": contentTypePlain, "list": contentTypePlain,
"longdesc": contentTypeURL, "longdesc": contentTypeURL,
"loop": contentTypePlain, "loop": contentTypePlain,
"low": contentTypePlain, "low": contentTypePlain,
"manifest": contentTypeURL, "manifest": contentTypeURL,
"max": contentTypePlain, "max": contentTypePlain,
"maxlength": contentTypePlain, "maxlength": contentTypePlain,
"media": contentTypePlain, "media": contentTypePlain,
"mediagroup": contentTypePlain, "mediagroup": contentTypePlain,
"method": contentTypeUnsafe, "method": contentTypeUnsafe,
"min": contentTypePlain, "min": contentTypePlain,
"multiple": contentTypePlain, "multiple": contentTypePlain,
"name": contentTypePlain, "name": contentTypePlain,
"novalidate": contentTypeUnsafe, "novalidate": contentTypeUnsafe,
"onabort": contentTypeJS, // Skip handler names from
"onblur": contentTypeJS, // http://www.w3.org/TR/html5/Overview.html#event-handlers-on-elements-document-objects-and-window-objects
"oncanplay": contentTypeJS, // since we have special handling in attrType.
"oncanplaythrough": contentTypeJS, "open": contentTypePlain,
"onchange": contentTypeJS, "optimum": contentTypePlain,
"onclick": contentTypeJS, "pattern": contentTypeUnsafe,
"oncontextmenu": contentTypeJS, "placeholder": contentTypePlain,
"oncuechange": contentTypeJS, "poster": contentTypeURL,
"ondblclick": contentTypeJS, "profile": contentTypeURL,
"ondrag": contentTypeJS, "preload": contentTypePlain,
"ondragend": contentTypeJS, "pubdate": contentTypePlain,
"ondragenter": contentTypeJS, "radiogroup": contentTypePlain,
"ondragleave": contentTypeJS, "readonly": contentTypePlain,
"ondragover": contentTypeJS, "rel": contentTypeUnsafe,
"ondragstart": contentTypeJS, "required": contentTypePlain,
"ondrop": contentTypeJS, "reversed": contentTypePlain,
"ondurationchange": contentTypeJS, "rows": contentTypePlain,
"onemptied": contentTypeJS, "rowspan": contentTypePlain,
"onended": contentTypeJS, "sandbox": contentTypeUnsafe,
"onerror": contentTypeJS, "spellcheck": contentTypePlain,
"onfocus": contentTypeJS, "scope": contentTypePlain,
"oninput": contentTypeJS, "scoped": contentTypePlain,
"oninvalid": contentTypeJS, "seamless": contentTypePlain,
"onkeydown": contentTypeJS, "selected": contentTypePlain,
"onkeypress": contentTypeJS, "shape": contentTypePlain,
"onkeyup": contentTypeJS, "size": contentTypePlain,
"onload": contentTypeJS, "sizes": contentTypePlain,
"onloadeddata": contentTypeJS, "span": contentTypePlain,
"onloadedmetadata": contentTypeJS, "src": contentTypeURL,
"onloadstart": contentTypeJS, "srcdoc": contentTypeHTML,
"onmousedown": contentTypeJS, "srclang": contentTypePlain,
"onmousemove": contentTypeJS, "start": contentTypePlain,
"onmouseout": contentTypeJS, "step": contentTypePlain,
"onmouseover": contentTypeJS, "style": contentTypeCSS,
"onmouseup": contentTypeJS, "tabindex": contentTypePlain,
"onmousewheel": contentTypeJS, "target": contentTypePlain,
"onpause": contentTypeJS, "title": contentTypePlain,
"onplay": contentTypeJS, "type": contentTypeUnsafe,
"onplaying": contentTypeJS, "usemap": contentTypeURL,
"onprogress": contentTypeJS, "value": contentTypeUnsafe,
"onratechange": contentTypeJS, "width": contentTypePlain,
"onreadystatechange": contentTypeJS, "wrap": contentTypePlain,
"onreset": contentTypeJS, "xmlns": contentTypeURL,
"onscroll": contentTypeJS, }
"onseeked": contentTypeJS,
"onseeking": contentTypeJS, // attrType returns a conservative (upper-bound on authority) guess at the
"onselect": contentTypeJS, // type of the named attribute.
"onshow": contentTypeJS, func attrType(name string) contentType {
"onstalled": contentTypeJS, name = strings.ToLower(name)
"onsubmit": contentTypeJS, if strings.HasPrefix(name, "data-") {
"onsuspend": contentTypeJS, // Strip data- so that custom attribute heuristics below are
"ontimeupdate": contentTypeJS, // widely applied.
"onvolumechange": contentTypeJS, // Treat data-action as URL below.
"onwaiting": contentTypeJS, name = name[5:]
"open": contentTypePlain, } else if colon := strings.IndexRune(name, ':'); colon != -1 {
"optimum": contentTypePlain, if name[:colon] == "xmlns" {
"pattern": contentTypeUnsafe, return contentTypeURL
"placeholder": contentTypePlain, }
"poster": contentTypeURL, // Treat svg:href and xlink:href as href below.
"profile": contentTypeURL, name = name[colon+1:]
"preload": contentTypePlain, }
"pubdate": contentTypePlain, if t, ok := attrTypeMap[name]; ok {
"radiogroup": contentTypePlain, return t
"readonly": contentTypePlain, }
"rel": contentTypeUnsafe, // Treat partial event handler names as script.
"required": contentTypePlain, if strings.HasPrefix(name, "on") {
"reversed": contentTypePlain, return contentTypeJS
"rows": contentTypePlain, }
"rowspan": contentTypePlain,
"sandbox": contentTypeUnsafe,
"spellcheck": contentTypePlain,
"scope": contentTypePlain,
"scoped": contentTypePlain,
"seamless": contentTypePlain,
"selected": contentTypePlain,
"shape": contentTypePlain,
"size": contentTypePlain,
"sizes": contentTypePlain,
"span": contentTypePlain,
"src": contentTypeURL,
"srcdoc": contentTypeHTML,
"srclang": contentTypePlain,
"start": contentTypePlain,
"step": contentTypePlain,
"style": contentTypeCSS,
"tabindex": contentTypePlain,
"target": contentTypePlain,
"title": contentTypePlain,
"type": contentTypeUnsafe,
"usemap": contentTypeURL,
"value": contentTypeUnsafe,
"width": contentTypePlain,
"wrap": contentTypePlain,
// TODO: data-* attrs? Recognize data-foo-url and similar. // Heuristics to prevent "javascript:..." injection in custom
// data attributes and custom attributes like g:tweetUrl.
// http://www.w3.org/TR/html5/elements.html#embedding-custom-non-visible-data-with-the-data-attributes:
// "Custom data attributes are intended to store custom data
// private to the page or application, for which there are no
// more appropriate attributes or elements."
// Developers seem to store URL content in data URLs that start
// or end with "URI" or "URL".
if strings.Contains(name, "src") ||
strings.Contains(name, "uri") ||
strings.Contains(name, "url") {
return contentTypeURL
}
return contentTypePlain
} }
...@@ -1400,6 +1400,66 @@ func TestEscapeText(t *testing.T) { ...@@ -1400,6 +1400,66 @@ func TestEscapeText(t *testing.T) {
`<style>value`, `<style>value`,
context{state: stateCSS, element: elementStyle}, context{state: stateCSS, element: elementStyle},
}, },
{
`<a xlink:href`,
context{state: stateAttrName, attr: attrURL},
},
{
`<a xmlns`,
context{state: stateAttrName, attr: attrURL},
},
{
`<a xmlns:foo`,
context{state: stateAttrName, attr: attrURL},
},
{
`<a xmlnsxyz`,
context{state: stateAttrName},
},
{
`<a data-url`,
context{state: stateAttrName, attr: attrURL},
},
{
`<a data-iconUri`,
context{state: stateAttrName, attr: attrURL},
},
{
`<a data-urlItem`,
context{state: stateAttrName, attr: attrURL},
},
{
`<a g:`,
context{state: stateAttrName},
},
{
`<a g:url`,
context{state: stateAttrName, attr: attrURL},
},
{
`<a g:iconUri`,
context{state: stateAttrName, attr: attrURL},
},
{
`<a g:urlItem`,
context{state: stateAttrName, attr: attrURL},
},
{
`<a g:value`,
context{state: stateAttrName},
},
{
`<a svg:style='`,
context{state: stateCSS, delim: delimSingleQuote},
},
{
`<svg:font-face`,
context{state: stateTag},
},
{
`<svg:a svg:onclick="`,
context{state: stateJS, delim: delimDoubleQuote},
},
} }
for _, test := range tests { for _, test := range tests {
......
...@@ -230,7 +230,7 @@ func htmlNameFilter(args ...interface{}) string { ...@@ -230,7 +230,7 @@ func htmlNameFilter(args ...interface{}) string {
return filterFailsafe return filterFailsafe
} }
s = strings.ToLower(s) s = strings.ToLower(s)
if t := attrType[s]; t != contentTypePlain && attrType["on"+s] != contentTypeJS { if t := attrType(s); t != contentTypePlain {
// TODO: Split attr and element name part filters so we can whitelist // TODO: Split attr and element name part filters so we can whitelist
// attributes. // attributes.
return filterFailsafe return filterFailsafe
......
...@@ -106,18 +106,13 @@ func tTag(c context, s []byte) (context, int) { ...@@ -106,18 +106,13 @@ func tTag(c context, s []byte) (context, int) {
err: errorf(ErrBadHTML, 0, "expected space, attr name, or end of tag, but got %q", s[i:]), err: errorf(ErrBadHTML, 0, "expected space, attr name, or end of tag, but got %q", s[i:]),
}, len(s) }, len(s)
} }
canonAttrName := strings.ToLower(string(s[i:j])) switch attrType(string(s[i:j])) {
switch attrType[canonAttrName] {
case contentTypeURL: case contentTypeURL:
attr = attrURL attr = attrURL
case contentTypeCSS: case contentTypeCSS:
attr = attrStyle attr = attrStyle
case contentTypeJS: case contentTypeJS:
attr = attrScript attr = attrScript
default:
if strings.HasPrefix(canonAttrName, "on") {
attr = attrScript
}
} }
if j == len(s) { if j == len(s) {
state = stateAttrName state = stateAttrName
...@@ -512,16 +507,34 @@ var elementNameMap = map[string]element{ ...@@ -512,16 +507,34 @@ var elementNameMap = map[string]element{
"title": elementTitle, "title": elementTitle,
} }
// asciiAlpha returns whether c is an ASCII letter.
func asciiAlpha(c byte) bool {
return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
}
// asciiAlphaNum returns whether c is an ASCII letter or digit.
func asciiAlphaNum(c byte) bool {
return asciiAlpha(c) || '0' <= c && c <= '9'
}
// eatTagName returns the largest j such that s[i:j] is a tag name and the tag type. // eatTagName returns the largest j such that s[i:j] is a tag name and the tag type.
func eatTagName(s []byte, i int) (int, element) { func eatTagName(s []byte, i int) (int, element) {
j := i if i == len(s) || !asciiAlpha(s[i]) {
for ; j < len(s); j++ { return i, elementNone
}
j := i + 1
for j < len(s) {
x := s[j] x := s[j]
if !(('a' <= x && x <= 'z') || if asciiAlphaNum(x) {
('A' <= x && x <= 'Z') || j++
('0' <= x && x <= '9' && i != j)) { continue
break }
// Allow "x-y" or "x:y" but not "x-", "-y", or "x--y".
if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) {
j += 2
continue
} }
break
} }
return j, elementNameMap[strings.ToLower(string(s[i:j]))] return j, elementNameMap[strings.ToLower(string(s[i:j]))]
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment