Commit 740e589b authored by Brad Fitzpatrick's avatar Brad Fitzpatrick

html: lazily populate Unescape tables

Saves ~105KB of heap for callers who don't use html.UnescapeString.
(EscapeString is much more common).

Also saves 70KB of binary size, because now the linker can do dead
code elimination. (because #2559 is still open and global maps always
generate init code)

Fixes #26727
Updates #6853

Change-Id: I18fe9a273097e2c7e0cb7f88205cae1bb60fa89b
Reviewed-on: https://go-review.googlesource.com/127075
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: default avatarEmmanuel Odeke <emm.odeke@gmail.com>
Reviewed-by: default avatarIan Lance Taylor <iant@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent 04c09588
...@@ -4,6 +4,8 @@ ...@@ -4,6 +4,8 @@
package html package html
import "sync"
// All entities that do not end with ';' are 6 or fewer bytes long. // All entities that do not end with ';' are 6 or fewer bytes long.
const longestEntityWithoutSemicolon = 6 const longestEntityWithoutSemicolon = 6
...@@ -13,7 +15,17 @@ const longestEntityWithoutSemicolon = 6 ...@@ -13,7 +15,17 @@ const longestEntityWithoutSemicolon = 6
// //
// Note that the HTML5 list is larger than the HTML4 list at // Note that the HTML5 list is larger than the HTML4 list at
// http://www.w3.org/TR/html4/sgml/entities.html // http://www.w3.org/TR/html4/sgml/entities.html
var entity = map[string]rune{ var entity map[string]rune
// HTML entities that are two unicode codepoints.
var entity2 map[string][2]rune
// populateMapsOnce guards calling populateMaps.
var populateMapsOnce sync.Once
// populateMaps populates entity and entity2.
func populateMaps() {
entity = map[string]rune{
"AElig;": '\U000000C6', "AElig;": '\U000000C6',
"AMP;": '\U00000026', "AMP;": '\U00000026',
"Aacute;": '\U000000C1', "Aacute;": '\U000000C1',
...@@ -2152,10 +2164,9 @@ var entity = map[string]rune{ ...@@ -2152,10 +2164,9 @@ var entity = map[string]rune{
"yacute": '\U000000FD', "yacute": '\U000000FD',
"yen": '\U000000A5', "yen": '\U000000A5',
"yuml": '\U000000FF', "yuml": '\U000000FF',
} }
// HTML entities that are two unicode codepoints. entity2 = map[string][2]rune{
var entity2 = map[string][2]rune{
// TODO(nigeltao): Handle replacements that are wider than their names. // TODO(nigeltao): Handle replacements that are wider than their names.
// "nLt;": {'\u226A', '\u20D2'}, // "nLt;": {'\u226A', '\u20D2'},
// "nGt;": {'\u226B', '\u20D2'}, // "nGt;": {'\u226B', '\u20D2'},
...@@ -2250,4 +2261,5 @@ var entity2 = map[string][2]rune{ ...@@ -2250,4 +2261,5 @@ var entity2 = map[string][2]rune{
"vsubne;": {'\u228A', '\uFE00'}, "vsubne;": {'\u228A', '\uFE00'},
"vsupnE;": {'\u2ACC', '\uFE00'}, "vsupnE;": {'\u2ACC', '\uFE00'},
"vsupne;": {'\u228B', '\uFE00'}, "vsupne;": {'\u228B', '\uFE00'},
}
} }
...@@ -9,7 +9,15 @@ import ( ...@@ -9,7 +9,15 @@ import (
"unicode/utf8" "unicode/utf8"
) )
func init() {
UnescapeString("") // force load of entity maps
}
func TestEntityLength(t *testing.T) { func TestEntityLength(t *testing.T) {
if len(entity) == 0 || len(entity2) == 0 {
t.Fatal("maps not loaded")
}
// We verify that the length of UTF-8 encoding of each value is <= 1 + len(key). // We verify that the length of UTF-8 encoding of each value is <= 1 + len(key).
// The +1 comes from the leading "&". This property implies that the length of // The +1 comes from the leading "&". This property implies that the length of
// unescaped text is <= the length of escaped text. // unescaped text is <= the length of escaped text.
......
...@@ -185,6 +185,7 @@ func EscapeString(s string) string { ...@@ -185,6 +185,7 @@ func EscapeString(s string) string {
// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't // UnescapeString(EscapeString(s)) == s always holds, but the converse isn't
// always true. // always true.
func UnescapeString(s string) string { func UnescapeString(s string) string {
populateMapsOnce.Do(populateMaps)
i := strings.IndexByte(s, '&') i := strings.IndexByte(s, '&')
if i < 0 { if i < 0 {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment