Commit 750de28d authored by Andrew Balholm's avatar Andrew Balholm Committed by Nigel Tao

html: ignore whitespace before <head> element

Pass tests2.dat, test 47:
" \n "
(That is, two spaces separated by a newline)

| <html>
|   <head>
|   <body>

Also pass tests through test 49:
<!DOCTYPE html><script>
</script>  <title>x</title>  </head>

R=nigeltao
CC=golang-dev
https://golang.org/cl/5422043
parent 86c08e96
...@@ -319,9 +319,17 @@ func (p *parser) resetInsertionMode() { ...@@ -319,9 +319,17 @@ func (p *parser) resetInsertionMode() {
p.im = inBodyIM p.im = inBodyIM
} }
const whitespace = " \t\r\n\f"
// Section 11.2.5.4.1. // Section 11.2.5.4.1.
func initialIM(p *parser) bool { func initialIM(p *parser) bool {
switch p.tok.Type { switch p.tok.Type {
case TextToken:
p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
if len(p.tok.Data) == 0 {
// It was all whitespace, so ignore it.
return true
}
case CommentToken: case CommentToken:
p.doc.Add(&Node{ p.doc.Add(&Node{
Type: CommentNode, Type: CommentNode,
...@@ -345,6 +353,12 @@ func initialIM(p *parser) bool { ...@@ -345,6 +353,12 @@ func initialIM(p *parser) bool {
// Section 11.2.5.4.2. // Section 11.2.5.4.2.
func beforeHTMLIM(p *parser) bool { func beforeHTMLIM(p *parser) bool {
switch p.tok.Type { switch p.tok.Type {
case TextToken:
p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
if len(p.tok.Data) == 0 {
// It was all whitespace, so ignore it.
return true
}
case StartTagToken: case StartTagToken:
if p.tok.Data == "html" { if p.tok.Data == "html" {
p.addElement(p.tok.Data, p.tok.Attr) p.addElement(p.tok.Data, p.tok.Attr)
...@@ -383,7 +397,11 @@ func beforeHeadIM(p *parser) bool { ...@@ -383,7 +397,11 @@ func beforeHeadIM(p *parser) bool {
case ErrorToken: case ErrorToken:
implied = true implied = true
case TextToken: case TextToken:
// TODO: distinguish whitespace text from others. p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
if len(p.tok.Data) == 0 {
// It was all whitespace, so ignore it.
return true
}
implied = true implied = true
case StartTagToken: case StartTagToken:
switch p.tok.Data { switch p.tok.Data {
...@@ -417,8 +435,6 @@ func beforeHeadIM(p *parser) bool { ...@@ -417,8 +435,6 @@ func beforeHeadIM(p *parser) bool {
return !implied return !implied
} }
const whitespace = " \t\r\n\f"
// Section 11.2.5.4.4. // Section 11.2.5.4.4.
func inHeadIM(p *parser) bool { func inHeadIM(p *parser) bool {
var ( var (
......
...@@ -134,7 +134,7 @@ func TestParser(t *testing.T) { ...@@ -134,7 +134,7 @@ func TestParser(t *testing.T) {
}{ }{
// TODO(nigeltao): Process all the test cases from all the .dat files. // TODO(nigeltao): Process all the test cases from all the .dat files.
{"tests1.dat", -1}, {"tests1.dat", -1},
{"tests2.dat", 47}, {"tests2.dat", 50},
{"tests3.dat", 0}, {"tests3.dat", 0},
} }
for _, tf := range testFiles { for _, tf := range testFiles {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment