Commit 3bd5082f authored by Andrew Balholm's avatar Andrew Balholm Committed by Nigel Tao

html: parse and render <plaintext> elements

Pass tests2.dat, test 10:
<table><plaintext><td>

| <html>
|   <head>
|   <body>
|     <plaintext>
|       "<td>"
|     <table>

Also pass tests through test 25:
<!doctypehtml><p><dd>

R=nigeltao
CC=golang-dev
https://golang.org/cl/5369109
parent f5cf0a48
...@@ -655,6 +655,9 @@ func inBodyIM(p *parser) bool { ...@@ -655,6 +655,9 @@ func inBodyIM(p *parser) bool {
} }
p.popUntil(buttonScopeStopTags, "p") p.popUntil(buttonScopeStopTags, "p")
p.addElement(p.tok.Data, p.tok.Attr) p.addElement(p.tok.Data, p.tok.Attr)
case "plaintext":
p.popUntil(buttonScopeStopTags, "p")
p.addElement(p.tok.Data, p.tok.Attr)
case "optgroup", "option": case "optgroup", "option":
if p.top().Data == "option" { if p.top().Data == "option" {
p.oe.pop() p.oe.pop()
......
...@@ -134,7 +134,7 @@ func TestParser(t *testing.T) { ...@@ -134,7 +134,7 @@ func TestParser(t *testing.T) {
}{ }{
// TODO(nigeltao): Process all the test cases from all the .dat files. // TODO(nigeltao): Process all the test cases from all the .dat files.
{"tests1.dat", -1}, {"tests1.dat", -1},
{"tests2.dat", 10}, {"tests2.dat", 26},
{"tests3.dat", 0}, {"tests3.dat", 0},
} }
for _, tf := range testFiles { for _, tf := range testFiles {
...@@ -214,4 +214,7 @@ var renderTestBlacklist = map[string]bool{ ...@@ -214,4 +214,7 @@ var renderTestBlacklist = map[string]bool{
`<a href="blah">aba<table><a href="foo">br<tr><td></td></tr>x</table>aoe`: true, `<a href="blah">aba<table><a href="foo">br<tr><td></td></tr>x</table>aoe`: true,
`<a><table><a></table><p><a><div><a>`: true, `<a><table><a></table><p><a><div><a>`: true,
`<a><table><td><a><table></table><a></tr><a></table><a>`: true, `<a><table><td><a><table></table><a></tr><a></table><a>`: true,
// A <plaintext> element is reparented, putting it before a table.
// A <plaintext> element can't have anything after it in HTML.
`<table><plaintext><td>`: true,
} }
...@@ -52,7 +52,19 @@ func Render(w io.Writer, n *Node) error { ...@@ -52,7 +52,19 @@ func Render(w io.Writer, n *Node) error {
return buf.Flush() return buf.Flush()
} }
// plaintextAbort is returned from render1 when a <plaintext> element
// has been rendered. No more end tags should be rendered after that.
var plaintextAbort = errors.New("html: internal error (plaintext abort)")
func render(w writer, n *Node) error { func render(w writer, n *Node) error {
err := render1(w, n)
if err == plaintextAbort {
err = nil
}
return err
}
func render1(w writer, n *Node) error {
// Render non-element nodes; these are the easy cases. // Render non-element nodes; these are the easy cases.
switch n.Type { switch n.Type {
case ErrorNode: case ErrorNode:
...@@ -61,7 +73,7 @@ func render(w writer, n *Node) error { ...@@ -61,7 +73,7 @@ func render(w writer, n *Node) error {
return escape(w, n.Data) return escape(w, n.Data)
case DocumentNode: case DocumentNode:
for _, c := range n.Child { for _, c := range n.Child {
if err := render(w, c); err != nil { if err := render1(w, c); err != nil {
return err return err
} }
} }
...@@ -128,7 +140,7 @@ func render(w writer, n *Node) error { ...@@ -128,7 +140,7 @@ func render(w writer, n *Node) error {
// Render any child nodes. // Render any child nodes.
switch n.Data { switch n.Data {
case "noembed", "noframes", "noscript", "script", "style": case "noembed", "noframes", "noscript", "plaintext", "script", "style":
for _, c := range n.Child { for _, c := range n.Child {
if c.Type != TextNode { if c.Type != TextNode {
return fmt.Errorf("html: raw text element <%s> has non-text child node", n.Data) return fmt.Errorf("html: raw text element <%s> has non-text child node", n.Data)
...@@ -137,18 +149,23 @@ func render(w writer, n *Node) error { ...@@ -137,18 +149,23 @@ func render(w writer, n *Node) error {
return err return err
} }
} }
if n.Data == "plaintext" {
// Don't render anything else. <plaintext> must be the
// last element in the file, with no closing tag.
return plaintextAbort
}
case "textarea", "title": case "textarea", "title":
for _, c := range n.Child { for _, c := range n.Child {
if c.Type != TextNode { if c.Type != TextNode {
return fmt.Errorf("html: RCDATA element <%s> has non-text child node", n.Data) return fmt.Errorf("html: RCDATA element <%s> has non-text child node", n.Data)
} }
if err := render(w, c); err != nil { if err := render1(w, c); err != nil {
return err return err
} }
} }
default: default:
for _, c := range n.Child { for _, c := range n.Child {
if err := render(w, c); err != nil { if err := render1(w, c); err != nil {
return err return err
} }
} }
......
...@@ -401,14 +401,14 @@ func (z *Tokenizer) readStartTag() TokenType { ...@@ -401,14 +401,14 @@ func (z *Tokenizer) readStartTag() TokenType {
break break
} }
} }
// Any "<noembed>", "<noframes>", "<noscript>", "<script>", "<style>", // Any "<noembed>", "<noframes>", "<noscript>", "<plaintext", "<script>", "<style>",
// "<textarea>" or "<title>" tag flags the tokenizer's next token as raw. // "<textarea>" or "<title>" tag flags the tokenizer's next token as raw.
// The tag name lengths of these special cases ranges in [5, 8]. // The tag name lengths of these special cases ranges in [5, 9].
if x := z.data.end - z.data.start; 5 <= x && x <= 8 { if x := z.data.end - z.data.start; 5 <= x && x <= 9 {
switch z.buf[z.data.start] { switch z.buf[z.data.start] {
case 'n', 's', 't', 'N', 'S', 'T': case 'n', 'p', 's', 't', 'N', 'P', 'S', 'T':
switch s := strings.ToLower(string(z.buf[z.data.start:z.data.end])); s { switch s := strings.ToLower(string(z.buf[z.data.start:z.data.end])); s {
case "noembed", "noframes", "noscript", "script", "style", "textarea", "title": case "noembed", "noframes", "noscript", "plaintext", "script", "style", "textarea", "title":
z.rawTag = s z.rawTag = s
} }
} }
...@@ -551,7 +551,15 @@ func (z *Tokenizer) Next() TokenType { ...@@ -551,7 +551,15 @@ func (z *Tokenizer) Next() TokenType {
z.data.start = z.raw.end z.data.start = z.raw.end
z.data.end = z.raw.end z.data.end = z.raw.end
if z.rawTag != "" { if z.rawTag != "" {
z.readRawOrRCDATA() if z.rawTag == "plaintext" {
// Read everything up to EOF.
for z.err == nil {
z.readByte()
}
z.textIsRaw = true
} else {
z.readRawOrRCDATA()
}
if z.data.end > z.data.start { if z.data.end > z.data.start {
z.tt = TextToken z.tt = TextToken
return z.tt return z.tt
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment