Commit 740e589b authored by Brad Fitzpatrick's avatar Brad Fitzpatrick

html: lazily populate Unescape tables

Saves ~105KB of heap for callers who don't use html.UnescapeString.
(EscapeString is much more common).

Also saves 70KB of binary size, because now the linker can do dead
code elimination. (because #2559 is still open and global maps always
generate init code)

Fixes #26727
Updates #6853

Change-Id: I18fe9a273097e2c7e0cb7f88205cae1bb60fa89b
Reviewed-on: https://go-review.googlesource.com/127075
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: default avatarEmmanuel Odeke <emm.odeke@gmail.com>
Reviewed-by: default avatarIan Lance Taylor <iant@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent 04c09588
......@@ -4,6 +4,8 @@
package html
import "sync"
// All entities that do not end with ';' are 6 or fewer bytes long.
const longestEntityWithoutSemicolon = 6
......@@ -13,7 +15,17 @@ const longestEntityWithoutSemicolon = 6
//
// Note that the HTML5 list is larger than the HTML4 list at
// http://www.w3.org/TR/html4/sgml/entities.html
var entity = map[string]rune{
var entity map[string]rune
// HTML entities that are two unicode codepoints.
var entity2 map[string][2]rune
// populateMapsOnce guards calling populateMaps.
var populateMapsOnce sync.Once
// populateMaps populates entity and entity2.
func populateMaps() {
entity = map[string]rune{
"AElig;": '\U000000C6',
"AMP;": '\U00000026',
"Aacute;": '\U000000C1',
......@@ -2152,10 +2164,9 @@ var entity = map[string]rune{
"yacute": '\U000000FD',
"yen": '\U000000A5',
"yuml": '\U000000FF',
}
}
// HTML entities that are two unicode codepoints.
var entity2 = map[string][2]rune{
entity2 = map[string][2]rune{
// TODO(nigeltao): Handle replacements that are wider than their names.
// "nLt;": {'\u226A', '\u20D2'},
// "nGt;": {'\u226B', '\u20D2'},
......@@ -2250,4 +2261,5 @@ var entity2 = map[string][2]rune{
"vsubne;": {'\u228A', '\uFE00'},
"vsupnE;": {'\u2ACC', '\uFE00'},
"vsupne;": {'\u228B', '\uFE00'},
}
}
......@@ -9,7 +9,15 @@ import (
"unicode/utf8"
)
func init() {
UnescapeString("") // force load of entity maps
}
func TestEntityLength(t *testing.T) {
if len(entity) == 0 || len(entity2) == 0 {
t.Fatal("maps not loaded")
}
// We verify that the length of UTF-8 encoding of each value is <= 1 + len(key).
// The +1 comes from the leading "&". This property implies that the length of
// unescaped text is <= the length of escaped text.
......
......@@ -185,6 +185,7 @@ func EscapeString(s string) string {
// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't
// always true.
func UnescapeString(s string) string {
populateMapsOnce.Do(populateMaps)
i := strings.IndexByte(s, '&')
if i < 0 {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment