Commit e5f3dc8b authored by Nigel Tao's avatar Nigel Tao

html: refactor the tokenizer; parse "</>" correctly.

Previously, Next would call either nextText or nextTag, but nextTag
could also call nextText. Both nextText and nextTag were responsible
for detecting "</a" end tags and "<!" comments. This change simplifies
the call chain and puts that responsibility in a single place.

R=andybalholm
CC=golang-dev
https://golang.org/cl/5263050
parent d2b73730
This diff is collapsed.
...@@ -57,19 +57,16 @@ var tokenTests = []tokenTest{ ...@@ -57,19 +57,16 @@ var tokenTests = []tokenTest{
"</", "</",
"&lt;/", "&lt;/",
}, },
/* {
// TODO: re-enable these tests when we tokenize them correctly. "not a tag #2",
{ "</>",
"not a tag #2", "",
"</>", },
"", {
}, "not a tag #3",
{ "a</>b",
"not a tag #3", "a$b",
"a</>b", },
"a$b",
},
*/
{ {
"not a tag #4", "not a tag #4",
"</ >", "</ >",
...@@ -77,21 +74,31 @@ var tokenTests = []tokenTest{ ...@@ -77,21 +74,31 @@ var tokenTests = []tokenTest{
}, },
{ {
"not a tag #5", "not a tag #5",
"</.",
"<!--.-->",
},
{
"not a tag #6",
"</.>",
"<!--.-->",
},
{
"not a tag #7",
"a < b", "a < b",
"a &lt; b", "a &lt; b",
}, },
{ {
"not a tag #6", "not a tag #8",
"<.>", "<.>",
"&lt;.&gt;", "&lt;.&gt;",
}, },
{ {
"not a tag #7", "not a tag #9",
"a<<<b>>>c", "a<<<b>>>c",
"a&lt;&lt;$<b>$&gt;&gt;c", "a&lt;&lt;$<b>$&gt;&gt;c",
}, },
{ {
"not a tag #8", "not a tag #10",
"if x<0 and y < 0 then x*y>0", "if x<0 and y < 0 then x*y>0",
"if x&lt;0 and y &lt; 0 then x*y&gt;0", "if x&lt;0 and y &lt; 0 then x*y&gt;0",
}, },
...@@ -345,7 +352,7 @@ var tokenTests = []tokenTest{ ...@@ -345,7 +352,7 @@ var tokenTests = []tokenTest{
func TestTokenizer(t *testing.T) { func TestTokenizer(t *testing.T) {
loop: loop:
for _, tt := range tokenTests { for _, tt := range tokenTests {
z := NewTokenizer(bytes.NewBuffer([]byte(tt.html))) z := NewTokenizer(strings.NewReader(tt.html))
z.ReturnComments = true z.ReturnComments = true
if tt.golden != "" { if tt.golden != "" {
for i, s := range strings.Split(tt.golden, "$") { for i, s := range strings.Split(tt.golden, "$") {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment