Commit 2d3599e5 authored by Daniel Martí's avatar Daniel Martí

encoding/json: encode struct field names ahead of time

Struct field names are static, so we can run HTMLEscape on them when
building each struct type encoder. Then, when running the struct
encoder, we can select either the original or the escaped field name to
write directly.

When the encoder is not escaping HTML, using the original string works
because neither Go struct field names nor JSON tags allow any characters
that would need to be escaped, like '"', '\\', or '\n'.

When the encoder is escaping HTML, the only difference is that '<', '>',
and '&' are allowed via JSON struct field tags, hence why we use
HTMLEscape to properly escape them.

All of the above lets us encode field names with a simple if/else and
WriteString calls, which are considerably simpler and faster than
encoding an arbitrary string.

While at it, also include the quotes and colon in these strings, to
avoid three WriteByte calls in the loop hot path.

Also added a few tests, to ensure that the behavior in these edge cases
is not broken. The output of the tests is the same if this optimization
is reverted.

name           old time/op    new time/op    delta
CodeEncoder-4    7.12ms ± 0%    6.14ms ± 0%  -13.85%  (p=0.004 n=6+5)

name           old speed      new speed      delta
CodeEncoder-4   272MB/s ± 0%   316MB/s ± 0%  +16.08%  (p=0.004 n=6+5)

name           old alloc/op   new alloc/op   delta
CodeEncoder-4    91.9kB ± 0%    93.2kB ± 0%   +1.43%  (p=0.002 n=6+6)

name           old allocs/op  new allocs/op  delta
CodeEncoder-4      0.00           0.00          ~     (all equal)

Updates #5683.

Change-Id: I6f6a340d0de4670799ce38cf95b2092822d2e3ef
Reviewed-on: https://go-review.googlesource.com/122460
Run-TryBot: Daniel Martí <mvdan@mvdan.cc>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarBrad Fitzpatrick <bradfitz@golang.org>
parent e7f59f02
...@@ -142,7 +142,7 @@ var ( ...@@ -142,7 +142,7 @@ var (
umstructXY = ustructText{unmarshalerText{"x", "y"}} umstructXY = ustructText{unmarshalerText{"x", "y"}}
ummapType = map[unmarshalerText]bool{} ummapType = map[unmarshalerText]bool{}
ummapXY = map[unmarshalerText]bool{unmarshalerText{"x", "y"}: true} ummapXY = map[unmarshalerText]bool{{"x", "y"}: true}
) )
// Test data structures for anonymous fields. // Test data structures for anonymous fields.
......
...@@ -641,8 +641,11 @@ func (se *structEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) { ...@@ -641,8 +641,11 @@ func (se *structEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) {
} else { } else {
e.WriteByte(',') e.WriteByte(',')
} }
e.string(f.name, opts.escapeHTML) if opts.escapeHTML {
e.WriteByte(':') e.WriteString(f.nameEscHTML)
} else {
e.WriteString(f.nameNonEsc)
}
opts.quoted = f.quoted opts.quoted = f.quoted
se.fieldEncs[i](e, fv, opts) se.fieldEncs[i](e, fv, opts)
} }
...@@ -1036,6 +1039,9 @@ type field struct { ...@@ -1036,6 +1039,9 @@ type field struct {
nameBytes []byte // []byte(name) nameBytes []byte // []byte(name)
equalFold func(s, t []byte) bool // bytes.EqualFold or equivalent equalFold func(s, t []byte) bool // bytes.EqualFold or equivalent
nameNonEsc string // `"` + name + `":`
nameEscHTML string // `"` + HTMLEscape(name) + `":`
tag bool tag bool
index []int index []int
typ reflect.Type typ reflect.Type
...@@ -1086,6 +1092,9 @@ func typeFields(t reflect.Type) []field { ...@@ -1086,6 +1092,9 @@ func typeFields(t reflect.Type) []field {
// Fields found. // Fields found.
var fields []field var fields []field
// Buffer to run HTMLEscape on field names.
var nameEscBuf bytes.Buffer
for len(next) > 0 { for len(next) > 0 {
current, next = next, current[:0] current, next = next, current[:0]
count, nextCount = nextCount, map[reflect.Type]int{} count, nextCount = nextCount, map[reflect.Type]int{}
...@@ -1152,14 +1161,24 @@ func typeFields(t reflect.Type) []field { ...@@ -1152,14 +1161,24 @@ func typeFields(t reflect.Type) []field {
if name == "" { if name == "" {
name = sf.Name name = sf.Name
} }
fields = append(fields, fillField(field{ field := fillField(field{
name: name, name: name,
tag: tagged, tag: tagged,
index: index, index: index,
typ: ft, typ: ft,
omitEmpty: opts.Contains("omitempty"), omitEmpty: opts.Contains("omitempty"),
quoted: quoted, quoted: quoted,
})) })
// Build nameEscHTML and nameNonEsc ahead of time.
nameEscBuf.Reset()
nameEscBuf.WriteString(`"`)
HTMLEscape(&nameEscBuf, field.nameBytes)
nameEscBuf.WriteString(`":`)
field.nameEscHTML = nameEscBuf.String()
field.nameNonEsc = `"` + field.name + `":`
fields = append(fields, field)
if count[f.typ] > 1 { if count[f.typ] > 1 {
// If there were multiple instances, add a second, // If there were multiple instances, add a second,
// so that the annihilation code will see a duplicate. // so that the annihilation code will see a duplicate.
......
...@@ -995,3 +995,18 @@ func TestMarshalPanic(t *testing.T) { ...@@ -995,3 +995,18 @@ func TestMarshalPanic(t *testing.T) {
Marshal(&marshalPanic{}) Marshal(&marshalPanic{})
t.Error("Marshal should have panicked") t.Error("Marshal should have panicked")
} }
func TestMarshalUncommonFieldNames(t *testing.T) {
v := struct {
A0, À, int
}{}
b, err := Marshal(v)
if err != nil {
t.Fatal("Marshal:", err)
}
want := `{"A0":0,"À":0,"Aβ":0}`
got := string(b)
if got != want {
t.Fatalf("Marshal: got %s want %s", got, want)
}
}
...@@ -93,6 +93,10 @@ func TestEncoderIndent(t *testing.T) { ...@@ -93,6 +93,10 @@ func TestEncoderIndent(t *testing.T) {
func TestEncoderSetEscapeHTML(t *testing.T) { func TestEncoderSetEscapeHTML(t *testing.T) {
var c C var c C
var ct CText var ct CText
var tagStruct struct {
Valid int `json:"<>&#! "`
Invalid int `json:"\\"`
}
for _, tt := range []struct { for _, tt := range []struct {
name string name string
v interface{} v interface{}
...@@ -102,6 +106,11 @@ func TestEncoderSetEscapeHTML(t *testing.T) { ...@@ -102,6 +106,11 @@ func TestEncoderSetEscapeHTML(t *testing.T) {
{"c", c, `"\u003c\u0026\u003e"`, `"<&>"`}, {"c", c, `"\u003c\u0026\u003e"`, `"<&>"`},
{"ct", ct, `"\"\u003c\u0026\u003e\""`, `"\"<&>\""`}, {"ct", ct, `"\"\u003c\u0026\u003e\""`, `"\"<&>\""`},
{`"<&>"`, "<&>", `"\u003c\u0026\u003e"`, `"<&>"`}, {`"<&>"`, "<&>", `"\u003c\u0026\u003e"`, `"<&>"`},
{
"tagStruct", tagStruct,
`{"\u003c\u003e\u0026#! ":0,"Invalid":0}`,
`{"<>&#! ":0,"Invalid":0}`,
},
} { } {
var buf bytes.Buffer var buf bytes.Buffer
enc := NewEncoder(&buf) enc := NewEncoder(&buf)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment