Commit 49c79ab9 authored by Kirill Smelkov's avatar Kirill Smelkov Committed by Kamil Kisiel

decoder: Teach loadUnicode to return ErrUnexpectedEOF, not paper it over in tests (#44)

While doing a5094338 (encoder: More unexpected EOF handling) I was a bit
disappointed that https://golang.org/cl/37052 was not going to be
accepted and somehow added hack to tests that was doing

	strconv.ErrSyntax -> io.ErrUnexpectedEOF

error conversion if the test name contained "unicode".

Today I was refactoring tests and noticed that it is better we teach loadUnicode
to return io.ErrUnexpectedEOF in the first place, and the next easy way for this
(after fixing strconv.UnquoteChar itself) is to append many "0" to string and
see if strconv.UnquoteChar still returns ErrSyntax.

So do it in our custom unquoteChar wrapper.
parent 9eb8c4c6
......@@ -672,7 +672,7 @@ func (d *Decoder) loadUnicode() error {
if len(sline) == 0 {
break
}
r, _, sline, err = strconv.UnquoteChar(sline, '\'')
r, _, sline, err = unquoteChar(sline, '\'')
if err != nil {
return err
}
......@@ -1058,6 +1058,37 @@ func (d *Decoder) loadMemoize() error {
return nil
}
// unquoteChar is like strconv.UnquoteChar, but returns io.ErrUnexpectedEOF
// instead of strconv.ErrSyntax, when input is prematurely terminted.
//
// XXX remove if ever something like https://golang.org/cl/37052 is accepted.
func unquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) {
// strconv.UnquoteChar("") panics before Go1.11
if s == "" {
return 0, false, "", io.ErrUnexpectedEOF
}
value, multibyte, tail, err = strconv.UnquoteChar(s, quote)
if err == nil {
return
}
// now we have to find out whether it was due to input cut.
if len(s) > 10 { // \U12345678
return
}
// + "0"*9 should make s valid if it was cut, e.g. "\U012" becomes "\U012000000000".
// On the other hand, if s was invalid, e.g. "\Uz"
// it will remain invaild even with the suffix.
_, _, _, err2 := strconv.UnquoteChar(s + "000000000", quote)
if err2 == nil {
err = io.ErrUnexpectedEOF
}
return
}
// decodeLong takes a byte array of 2's compliment little-endian binary words and converts them
// to a big integer
func decodeLong(data string) (*big.Int, error) {
......
......@@ -8,7 +8,6 @@ import (
"math/big"
"reflect"
"strconv"
"strings"
"testing"
)
......@@ -119,12 +118,6 @@ func TestDecode(t *testing.T) {
dec := NewDecoder(buf)
//println(test.name, l)
v, err := dec.Decode()
// strconv.UnquoteChar used in loadUnicode always returns
// SyntaxError, at least unless the following CL is accepted:
// https://go-review.googlesource.com/37052
if err == strconv.ErrSyntax && strings.HasPrefix(test.name, "unicode") {
err = io.ErrUnexpectedEOF
}
if !(v == nil && err == io.ErrUnexpectedEOF) {
t.Errorf("%s: no ErrUnexpectedEOF on [:%d] truncated stream: v = %#v err = %#v", test.name, l, v, err)
}
......@@ -426,3 +419,48 @@ func BenchmarkEncode(b *testing.B) {
}
}
}
var misquotedChars = []struct{
in string
err error
}{
{`\000`, nil}, // nil mean unquoteChar should be ok -> test for io.ErrUnexpectedEOF
{`\x00`, nil}, // on truncated input
{`\u0000`, nil},
{`\U00000000`, nil},
{`"`, strconv.ErrSyntax},
{`\'`, strconv.ErrSyntax},
{`\q`, strconv.ErrSyntax},
{`\z`, strconv.ErrSyntax},
{`\008`, strconv.ErrSyntax},
{`\400`, strconv.ErrSyntax},
{`\x0z`, strconv.ErrSyntax},
{`\u000z`, strconv.ErrSyntax},
{`\U0000000z`, strconv.ErrSyntax},
{`\U12345678`, strconv.ErrSyntax},
}
// verify that our unquoteChar properly returns ErrUnexpectedEOF instead of ErrSyntax.
func TestUnquoteCharEOF(t *testing.T) {
for _, tt := range misquotedChars {
_, _, _, err := unquoteChar(tt.in, '"')
if err != tt.err {
t.Errorf("unquoteChar(%#q) -> err = %v want %v", tt.in, err, tt.err)
}
if tt.err != nil {
continue
}
// truncated valid input should result in unexpected EOF
for l := len(tt.in) - 1; l >= 0; l-- {
_, _, _, err2 := unquoteChar(tt.in[:l], '"')
if err2 != io.ErrUnexpectedEOF {
t.Errorf("unquoteChar(%#q) -> err = %v want %v", tt.in[:l], err2, io.ErrUnexpectedEOF)
}
}
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment