decode.go 21.5 KB
Newer Older
1
// Copyright 2010 The Go Authors. All rights reserved.
2 3 4 5 6 7 8 9 10
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Represents JSON data structure using native Go types: booleans, floats,
// strings, arrays, and maps.

package json

import (
11
	"encoding/base64"
12
	"os"
13 14 15 16 17 18 19
	"reflect"
	"runtime"
	"strconv"
	"strings"
	"unicode"
	"utf16"
	"utf8"
20 21
)

22
// Unmarshal parses the JSON-encoded data and stores the result
23
// in the value pointed to by v.
Russ Cox's avatar
Russ Cox committed
24
//
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
// Unmarshal traverses the value v recursively.
// If an encountered value implements the Unmarshaler interface,
// Unmarshal calls its UnmarshalJSON method with a well-formed
// JSON encoding.
//
// Otherwise, Unmarshal uses the inverse of the encodings that
// Marshal uses, allocating maps, slices, and pointers as necessary,
// with the following additional rules:
//
// To unmarshal a JSON value into a nil interface value, the
// type stored in the interface value is one of:
//
//	bool, for JSON booleans
//	float64, for JSON numbers
//	string, for JSON strings
//	[]interface{}, for JSON arrays
//	map[string]interface{}, for JSON objects
//	nil for JSON null
//
// If a JSON value is not appropriate for a given target type,
// or if a JSON number overflows the target type, Unmarshal
// skips that field and completes the unmarshalling as best it can.
// If no more serious errors are encountered, Unmarshal returns
// an UnmarshalTypeError describing the earliest such error.
//
func Unmarshal(data []byte, v interface{}) os.Error {
	d := new(decodeState).init(data)

	// Quick check for well-formedness.
	// Avoids filling out half a data structure
	// before discovering a JSON syntax error.
	err := checkValid(data, &d.scan)
	if err != nil {
		return err
59
	}
60 61

	return d.unmarshal(v)
62 63
}

64 65 66 67 68 69 70
// Unmarshaler is the interface implemented by objects
// that can unmarshal a JSON description of themselves.
// The input can be assumed to be a valid JSON object
// encoding.  UnmarshalJSON must copy the JSON data
// if it wishes to retain the data after returning.
type Unmarshaler interface {
	UnmarshalJSON([]byte) os.Error
71 72
}

73 74 75 76 77
// An UnmarshalTypeError describes a JSON value that was
// not appropriate for a value of a specific Go type.
type UnmarshalTypeError struct {
	Value string       // description of JSON value - "bool", "array", "number -5"
	Type  reflect.Type // type of Go value it could not be assigned to
78 79
}

80 81 82
func (e *UnmarshalTypeError) String() string {
	return "json: cannot unmarshal " + e.Value + " into Go value of type " + e.Type.String()
}
83

84 85 86 87
// An UnmarshalFieldError describes a JSON object key that
// led to an unexported (and therefore unwritable) struct field.
type UnmarshalFieldError struct {
	Key   string
88
	Type  reflect.Type
89 90 91 92 93 94 95
	Field reflect.StructField
}

func (e *UnmarshalFieldError) String() string {
	return "json: cannot unmarshal object key " + strconv.Quote(e.Key) + " into unexported field " + e.Field.Name + " of type " + e.Type.String()
}

96 97 98 99 100
// An InvalidUnmarshalError describes an invalid argument passed to Unmarshal.
// (The argument to Unmarshal must be a non-nil pointer.)
type InvalidUnmarshalError struct {
	Type reflect.Type
}
101

102 103 104 105
func (e *InvalidUnmarshalError) String() string {
	if e.Type == nil {
		return "json: Unmarshal(nil)"
	}
106

107
	if e.Type.Kind() != reflect.Ptr {
108 109 110 111
		return "json: Unmarshal(non-pointer " + e.Type.String() + ")"
	}
	return "json: Unmarshal(nil " + e.Type.String() + ")"
}
112

113 114 115 116 117 118 119 120 121
func (d *decodeState) unmarshal(v interface{}) (err os.Error) {
	defer func() {
		if r := recover(); r != nil {
			if _, ok := r.(runtime.Error); ok {
				panic(r)
			}
			err = r.(os.Error)
		}
	}()
122

Russ Cox's avatar
Russ Cox committed
123
	rv := reflect.ValueOf(v)
124
	pv := rv
125
	if pv.Kind() != reflect.Ptr || pv.IsNil() {
Russ Cox's avatar
Russ Cox committed
126
		return &InvalidUnmarshalError{reflect.TypeOf(v)}
127
	}
128

129
	d.scan.reset()
130 131 132
	// We decode rv not pv.Elem because the Unmarshaler interface
	// test must be applied at the top level of the value.
	d.value(rv)
133 134
	return d.savedError
}
135

136 137 138 139 140 141 142 143
// decodeState represents the state while decoding a JSON value.
type decodeState struct {
	data       []byte
	off        int // read offset in data
	scan       scanner
	nextscan   scanner // for calls to nextValue
	savedError os.Error
}
144

145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
// errPhase is used for errors that should not happen unless
// there is a bug in the JSON decoder or something is editing
// the data slice while the decoder executes.
var errPhase = os.NewError("JSON decoder out of sync - data changing underfoot?")

func (d *decodeState) init(data []byte) *decodeState {
	d.data = data
	d.off = 0
	d.savedError = nil
	return d
}

// error aborts the decoding by panicking with err.
func (d *decodeState) error(err os.Error) {
	panic(err)
}

// saveError saves the first err it is called with,
// for reporting at the end of the unmarshal.
func (d *decodeState) saveError(err os.Error) {
	if d.savedError == nil {
		d.savedError = err
	}
}

// next cuts off and returns the next full JSON value in d.data[d.off:].
// The next value is known to be an object or array, not a literal.
func (d *decodeState) next() []byte {
	c := d.data[d.off]
	item, rest, err := nextValue(d.data[d.off:], &d.nextscan)
	if err != nil {
		d.error(err)
	}
	d.off = len(d.data) - len(rest)

	// Our scanner has seen the opening brace/bracket
	// and thinks we're still in the middle of the object.
	// invent a closing brace/bracket to get it out.
	if c == '{' {
		d.scan.step(&d.scan, '}')
	} else {
		d.scan.step(&d.scan, ']')
187
	}
188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215

	return item
}

// scanWhile processes bytes in d.data[d.off:] until it
// receives a scan code not equal to op.
// It updates d.off and returns the new scan code.
func (d *decodeState) scanWhile(op int) int {
	var newOp int
	for {
		if d.off >= len(d.data) {
			newOp = d.scan.eof()
			d.off = len(d.data) + 1 // mark processed EOF with len+1
		} else {
			c := int(d.data[d.off])
			d.off++
			newOp = d.scan.step(&d.scan, c)
		}
		if newOp != op {
			break
		}
	}
	return newOp
}

// value decodes a JSON value from d.data[d.off:] into the value.
// it updates d.off to point past the decoded value.
func (d *decodeState) value(v reflect.Value) {
216
	if !v.IsValid() {
217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253
		_, rest, err := nextValue(d.data[d.off:], &d.nextscan)
		if err != nil {
			d.error(err)
		}
		d.off = len(d.data) - len(rest)

		// d.scan thinks we're still at the beginning of the item.
		// Feed in an empty string - the shortest, simplest value -
		// so that it knows we got to the end of the value.
		if d.scan.step == stateRedo {
			panic("redo")
		}
		d.scan.step(&d.scan, '"')
		d.scan.step(&d.scan, '"')
		return
	}

	switch op := d.scanWhile(scanSkipSpace); op {
	default:
		d.error(errPhase)

	case scanBeginArray:
		d.array(v)

	case scanBeginObject:
		d.object(v)

	case scanBeginLiteral:
		d.literal(v)
	}
}

// indirect walks down v allocating pointers as needed,
// until it gets to a non-pointer.
// if it encounters an Unmarshaler, indirect stops and returns that.
// if wantptr is true, indirect stops at the last pointer.
func (d *decodeState) indirect(v reflect.Value, wantptr bool) (Unmarshaler, reflect.Value) {
Russ Cox's avatar
Russ Cox committed
254 255 256 257 258 259
	// If v is a named type and is addressable,
	// start with its address, so that if the type has pointer methods,
	// we find them.
	if v.Kind() != reflect.Ptr && v.Type().Name() != "" && v.CanAddr() {
		v = v.Addr()
	}
260 261 262 263 264 265 266 267 268
	for {
		var isUnmarshaler bool
		if v.Type().NumMethod() > 0 {
			// Remember that this is an unmarshaler,
			// but wait to return it until after allocating
			// the pointer (if necessary).
			_, isUnmarshaler = v.Interface().(Unmarshaler)
		}

269
		if iv := v; iv.Kind() == reflect.Interface && !iv.IsNil() {
270 271 272
			v = iv.Elem()
			continue
		}
273

274 275
		pv := v
		if pv.Kind() != reflect.Ptr {
276 277
			break
		}
278

279
		if pv.Elem().Kind() != reflect.Ptr && wantptr && pv.CanSet() && !isUnmarshaler {
280 281
			return nil, pv
		}
282
		if pv.IsNil() {
283
			pv.Set(reflect.New(pv.Type().Elem()))
284
		}
285 286 287 288 289 290 291 292 293
		if isUnmarshaler {
			// Using v.Interface().(Unmarshaler)
			// here means that we have to use a pointer
			// as the struct field.  We cannot use a value inside
			// a pointer to a struct, because in that case
			// v.Interface() is the value (x.f) not the pointer (&x.f).
			// This is an unfortunate consequence of reflect.
			// An alternative would be to look up the
			// UnmarshalJSON method and return a FuncValue.
294
			return v.Interface().(Unmarshaler), reflect.Value{}
295 296
		}
		v = pv.Elem()
297
	}
298
	return nil, v
299 300
}

301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316
// array consumes an array from d.data[d.off-1:], decoding into the value v.
// the first byte of the array ('[') has been read already.
func (d *decodeState) array(v reflect.Value) {
	// Check for unmarshaler.
	unmarshaler, pv := d.indirect(v, false)
	if unmarshaler != nil {
		d.off--
		err := unmarshaler.UnmarshalJSON(d.next())
		if err != nil {
			d.error(err)
		}
		return
	}
	v = pv

	// Decoding into nil interface?  Switch to non-reflect code.
317 318
	iv := v
	ok := iv.Kind() == reflect.Interface
319
	if ok {
Russ Cox's avatar
Russ Cox committed
320
		iv.Set(reflect.ValueOf(d.arrayInterface()))
321 322 323 324
		return
	}

	// Check type of target.
325 326
	av := v
	if av.Kind() != reflect.Array && av.Kind() != reflect.Slice {
327
		d.saveError(&UnmarshalTypeError{"array", v.Type()})
328 329 330
		d.off--
		d.next()
		return
331 332
	}

333
	sv := v
334 335 336 337 338 339 340 341 342 343 344 345 346 347

	i := 0
	for {
		// Look ahead for ] - can only happen on first iteration.
		op := d.scanWhile(scanSkipSpace)
		if op == scanEndArray {
			break
		}

		// Back up so d.value can have the byte we just read.
		d.off--
		d.scan.undo(op)

		// Get element of array, growing if necessary.
348
		if i >= av.Cap() && sv.IsValid() {
349 350 351 352
			newcap := sv.Cap() + sv.Cap()/2
			if newcap < 4 {
				newcap = 4
			}
353
			newv := reflect.MakeSlice(sv.Type(), sv.Len(), newcap)
354
			reflect.Copy(newv, sv)
355 356
			sv.Set(newv)
		}
357
		if i >= av.Len() && sv.IsValid() {
358 359 360 361 362 363
			// Must be slice; gave up on array during i >= av.Cap().
			sv.SetLen(i + 1)
		}

		// Decode into element.
		if i < av.Len() {
364
			d.value(av.Index(i))
365 366
		} else {
			// Ran out of fixed array: skip.
367
			d.value(reflect.Value{})
368 369 370 371 372 373 374 375 376 377 378 379 380
		}
		i++

		// Next token must be , or ].
		op = d.scanWhile(scanSkipSpace)
		if op == scanEndArray {
			break
		}
		if op != scanArrayValue {
			d.error(errPhase)
		}
	}
	if i < av.Len() {
381
		if !sv.IsValid() {
382
			// Array.  Zero the rest.
383
			z := reflect.Zero(av.Type().Elem())
384
			for ; i < av.Len(); i++ {
385
				av.Index(i).Set(z)
386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413
			}
		} else {
			sv.SetLen(i)
		}
	}
}

// matchName returns true if key should be written to a field named name.
func matchName(key, name string) bool {
	return strings.ToLower(key) == strings.ToLower(name)
}

// object consumes an object from d.data[d.off-1:], decoding into the value v.
// the first byte of the object ('{') has been read already.
func (d *decodeState) object(v reflect.Value) {
	// Check for unmarshaler.
	unmarshaler, pv := d.indirect(v, false)
	if unmarshaler != nil {
		d.off--
		err := unmarshaler.UnmarshalJSON(d.next())
		if err != nil {
			d.error(err)
		}
		return
	}
	v = pv

	// Decoding into nil interface?  Switch to non-reflect code.
414 415
	iv := v
	if iv.Kind() == reflect.Interface {
Russ Cox's avatar
Russ Cox committed
416
		iv.Set(reflect.ValueOf(d.objectInterface()))
417 418 419 420 421
		return
	}

	// Check type of target: struct or map[string]T
	var (
422 423
		mv reflect.Value
		sv reflect.Value
424
	)
425 426
	switch v.Kind() {
	case reflect.Map:
427
		// map must have string type
428
		t := v.Type()
Russ Cox's avatar
Russ Cox committed
429
		if t.Key() != reflect.TypeOf("") {
430 431 432 433 434
			d.saveError(&UnmarshalTypeError{"object", v.Type()})
			break
		}
		mv = v
		if mv.IsNil() {
435
			mv.Set(reflect.MakeMap(t))
436
		}
437
	case reflect.Struct:
438 439 440 441 442
		sv = v
	default:
		d.saveError(&UnmarshalTypeError{"object", v.Type()})
	}

443
	if !mv.IsValid() && !sv.IsValid() {
444 445 446 447 448
		d.off--
		d.next() // skip over { } in input
		return
	}

449 450
	var mapElem reflect.Value

451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470
	for {
		// Read opening " of string key or closing }.
		op := d.scanWhile(scanSkipSpace)
		if op == scanEndObject {
			// closing } - can only happen on first iteration.
			break
		}
		if op != scanBeginLiteral {
			d.error(errPhase)
		}

		// Read string key.
		start := d.off - 1
		op = d.scanWhile(scanContinue)
		item := d.data[start : d.off-1]
		key, ok := unquote(item)
		if !ok {
			d.error(errPhase)
		}

471
		// Figure out field corresponding to key.
472
		var subv reflect.Value
473
		if mv.IsValid() {
474 475 476 477 478 479 480
			elemType := mv.Type().Elem()
			if !mapElem.IsValid() {
				mapElem = reflect.New(elemType).Elem()
			} else {
				mapElem.Set(reflect.Zero(elemType))
			}
			subv = mapElem
481
		} else {
482 483
			var f reflect.StructField
			var ok bool
484
			st := sv.Type()
Russ Cox's avatar
Russ Cox committed
485 486 487 488
			// First try for field with that tag.
			if isValidTag(key) {
				for i := 0; i < sv.NumField(); i++ {
					f = st.Field(i)
489
					if tagName(f.Tag.Get("json")) == key {
Russ Cox's avatar
Russ Cox committed
490 491 492
						ok = true
						break
					}
493 494
				}
			}
495
			if !ok {
496
				// Second, exact match.
497 498 499 500 501 502 503 504 505 506 507 508 509
				f, ok = st.FieldByName(key)
			}
			if !ok {
				// Third, case-insensitive match.
				f, ok = st.FieldByNameFunc(func(s string) bool { return matchName(key, s) })
			}

			// Extract value; name must be exported.
			if ok {
				if f.PkgPath != "" {
					d.saveError(&UnmarshalFieldError{key, st, f})
				} else {
					subv = sv.FieldByIndex(f.Index)
510
				}
511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526
			}
		}

		// Read : before value.
		if op == scanSkipSpace {
			op = d.scanWhile(scanSkipSpace)
		}
		if op != scanObjectKey {
			d.error(errPhase)
		}

		// Read value.
		d.value(subv)

		// Write value back to map;
		// if using struct, subv points into struct already.
527
		if mv.IsValid() {
Russ Cox's avatar
Russ Cox committed
528
			mv.SetMapIndex(reflect.ValueOf(key), subv)
529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568
		}

		// Next token must be , or }.
		op = d.scanWhile(scanSkipSpace)
		if op == scanEndObject {
			break
		}
		if op != scanObjectValue {
			d.error(errPhase)
		}
	}
}

// literal consumes a literal from d.data[d.off-1:], decoding into the value v.
// The first byte of the literal has been read already
// (that's how the caller knows it's a literal).
func (d *decodeState) literal(v reflect.Value) {
	// All bytes inside literal return scanContinue op code.
	start := d.off - 1
	op := d.scanWhile(scanContinue)

	// Scan read one byte too far; back up.
	d.off--
	d.scan.undo(op)
	item := d.data[start:d.off]

	// Check for unmarshaler.
	wantptr := item[0] == 'n' // null
	unmarshaler, pv := d.indirect(v, wantptr)
	if unmarshaler != nil {
		err := unmarshaler.UnmarshalJSON(item)
		if err != nil {
			d.error(err)
		}
		return
	}
	v = pv

	switch c := item[0]; c {
	case 'n': // null
569
		switch v.Kind() {
570 571
		default:
			d.saveError(&UnmarshalTypeError{"null", v.Type()})
572 573
		case reflect.Interface, reflect.Ptr, reflect.Map:
			v.Set(reflect.Zero(v.Type()))
574 575 576 577
		}

	case 't', 'f': // true, false
		value := c == 't'
578
		switch v.Kind() {
579 580
		default:
			d.saveError(&UnmarshalTypeError{"bool", v.Type()})
581 582 583
		case reflect.Bool:
			v.SetBool(value)
		case reflect.Interface:
Russ Cox's avatar
Russ Cox committed
584
			v.Set(reflect.ValueOf(value))
585 586 587
		}

	case '"': // string
588
		s, ok := unquoteBytes(item)
589 590 591
		if !ok {
			d.error(errPhase)
		}
592
		switch v.Kind() {
593 594
		default:
			d.saveError(&UnmarshalTypeError{"string", v.Type()})
595
		case reflect.Slice:
596 597 598 599 600 601 602 603 604 605
			if v.Type() != byteSliceType {
				d.saveError(&UnmarshalTypeError{"string", v.Type()})
				break
			}
			b := make([]byte, base64.StdEncoding.DecodedLen(len(s)))
			n, err := base64.StdEncoding.Decode(b, s)
			if err != nil {
				d.saveError(err)
				break
			}
Russ Cox's avatar
Russ Cox committed
606
			v.Set(reflect.ValueOf(b[0:n]))
607 608 609
		case reflect.String:
			v.SetString(string(s))
		case reflect.Interface:
Russ Cox's avatar
Russ Cox committed
610
			v.Set(reflect.ValueOf(string(s)))
611 612 613 614 615 616 617
		}

	default: // number
		if c != '-' && (c < '0' || c > '9') {
			d.error(errPhase)
		}
		s := string(item)
618
		switch v.Kind() {
619 620
		default:
			d.error(&UnmarshalTypeError{"number", v.Type()})
621
		case reflect.Interface:
622 623 624 625 626
			n, err := strconv.Atof64(s)
			if err != nil {
				d.saveError(&UnmarshalTypeError{"number " + s, v.Type()})
				break
			}
Russ Cox's avatar
Russ Cox committed
627
			v.Set(reflect.ValueOf(n))
628

629
		case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
630
			n, err := strconv.Atoi64(s)
631
			if err != nil || v.OverflowInt(n) {
632 633 634
				d.saveError(&UnmarshalTypeError{"number " + s, v.Type()})
				break
			}
635
			v.SetInt(n)
636

637
		case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
638
			n, err := strconv.Atoui64(s)
639
			if err != nil || v.OverflowUint(n) {
640 641 642
				d.saveError(&UnmarshalTypeError{"number " + s, v.Type()})
				break
			}
643
			v.SetUint(n)
644

645
		case reflect.Float32, reflect.Float64:
646
			n, err := strconv.AtofN(s, v.Type().Bits())
647
			if err != nil || v.OverflowFloat(n) {
648 649 650
				d.saveError(&UnmarshalTypeError{"number " + s, v.Type()})
				break
			}
651
			v.SetFloat(n)
652
		}
653 654 655
	}
}

656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676
// The xxxInterface routines build up a value to be stored
// in an empty interface.  They are not strictly necessary,
// but they avoid the weight of reflection in this common case.

// valueInterface is like value but returns interface{}
func (d *decodeState) valueInterface() interface{} {
	switch d.scanWhile(scanSkipSpace) {
	default:
		d.error(errPhase)
	case scanBeginArray:
		return d.arrayInterface()
	case scanBeginObject:
		return d.objectInterface()
	case scanBeginLiteral:
		return d.literalInterface()
	}
	panic("unreachable")
}

// arrayInterface is like array but returns []interface{}.
func (d *decodeState) arrayInterface() []interface{} {
677
	var v []interface{}
678 679 680 681 682 683 684 685 686 687 688
	for {
		// Look ahead for ] - can only happen on first iteration.
		op := d.scanWhile(scanSkipSpace)
		if op == scanEndArray {
			break
		}

		// Back up so d.value can have the byte we just read.
		d.off--
		d.scan.undo(op)

689
		v = append(v, d.valueInterface())
690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744

		// Next token must be , or ].
		op = d.scanWhile(scanSkipSpace)
		if op == scanEndArray {
			break
		}
		if op != scanArrayValue {
			d.error(errPhase)
		}
	}
	return v
}

// objectInterface is like object but returns map[string]interface{}.
func (d *decodeState) objectInterface() map[string]interface{} {
	m := make(map[string]interface{})
	for {
		// Read opening " of string key or closing }.
		op := d.scanWhile(scanSkipSpace)
		if op == scanEndObject {
			// closing } - can only happen on first iteration.
			break
		}
		if op != scanBeginLiteral {
			d.error(errPhase)
		}

		// Read string key.
		start := d.off - 1
		op = d.scanWhile(scanContinue)
		item := d.data[start : d.off-1]
		key, ok := unquote(item)
		if !ok {
			d.error(errPhase)
		}

		// Read : before value.
		if op == scanSkipSpace {
			op = d.scanWhile(scanSkipSpace)
		}
		if op != scanObjectKey {
			d.error(errPhase)
		}

		// Read value.
		m[key] = d.valueInterface()

		// Next token must be , or }.
		op = d.scanWhile(scanSkipSpace)
		if op == scanEndObject {
			break
		}
		if op != scanObjectValue {
			d.error(errPhase)
		}
745
	}
746
	return m
747 748
}

749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779
// literalInterface is like literal but returns an interface value.
func (d *decodeState) literalInterface() interface{} {
	// All bytes inside literal return scanContinue op code.
	start := d.off - 1
	op := d.scanWhile(scanContinue)

	// Scan read one byte too far; back up.
	d.off--
	d.scan.undo(op)
	item := d.data[start:d.off]

	switch c := item[0]; c {
	case 'n': // null
		return nil

	case 't', 'f': // true, false
		return c == 't'

	case '"': // string
		s, ok := unquote(item)
		if !ok {
			d.error(errPhase)
		}
		return s

	default: // number
		if c != '-' && (c < '0' || c > '9') {
			d.error(errPhase)
		}
		n, err := strconv.Atof64(string(item))
		if err != nil {
Russ Cox's avatar
Russ Cox committed
780
			d.saveError(&UnmarshalTypeError{"number " + string(item), reflect.TypeOf(0.0)})
781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802
		}
		return n
	}
	panic("unreachable")
}

// getu4 decodes \uXXXX from the beginning of s, returning the hex value,
// or it returns -1.
func getu4(s []byte) int {
	if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
		return -1
	}
	rune, err := strconv.Btoui64(string(s[2:6]), 16)
	if err != nil {
		return -1
	}
	return int(rune)
}

// unquote converts a quoted JSON string literal s into an actual string t.
// The rules are different than for Go, so cannot use strconv.Unquote.
func unquote(s []byte) (t string, ok bool) {
803 804 805 806 807 808
	s, ok = unquoteBytes(s)
	t = string(s)
	return
}

func unquoteBytes(s []byte) (t []byte, ok bool) {
809 810 811
	if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' {
		return
	}
812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836
	s = s[1 : len(s)-1]

	// Check for unusual characters. If there are none,
	// then no unquoting is needed, so return a slice of the
	// original bytes.
	r := 0
	for r < len(s) {
		c := s[r]
		if c == '\\' || c == '"' || c < ' ' {
			break
		}
		if c < utf8.RuneSelf {
			r++
			continue
		}
		rune, size := utf8.DecodeRune(s[r:])
		if rune == utf8.RuneError && size == 1 {
			break
		}
		r += size
	}
	if r == len(s) {
		return s, true
	}

837
	b := make([]byte, len(s)+2*utf8.UTFMax)
838 839
	w := copy(b, s[0:r])
	for r < len(s) {
840 841 842 843 844 845 846 847 848 849 850
		// Out of room?  Can only happen if s is full of
		// malformed UTF-8 and we're replacing each
		// byte with RuneError.
		if w >= len(b)-2*utf8.UTFMax {
			nb := make([]byte, (len(b)+utf8.UTFMax)*2)
			copy(nb, b[0:w])
			b = nb
		}
		switch c := s[r]; {
		case c == '\\':
			r++
851
			if r >= len(s) {
852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892
				return
			}
			switch s[r] {
			default:
				return
			case '"', '\\', '/', '\'':
				b[w] = s[r]
				r++
				w++
			case 'b':
				b[w] = '\b'
				r++
				w++
			case 'f':
				b[w] = '\f'
				r++
				w++
			case 'n':
				b[w] = '\n'
				r++
				w++
			case 'r':
				b[w] = '\r'
				r++
				w++
			case 't':
				b[w] = '\t'
				r++
				w++
			case 'u':
				r--
				rune := getu4(s[r:])
				if rune < 0 {
					return
				}
				r += 6
				if utf16.IsSurrogate(rune) {
					rune1 := getu4(s[r:])
					if dec := utf16.DecodeRune(rune, rune1); dec != unicode.ReplacementChar {
						// A valid pair; consume.
						r += 6
893
						w += utf8.EncodeRune(b[w:], dec)
894 895 896 897 898
						break
					}
					// Invalid surrogate; fall back to replacement rune.
					rune = unicode.ReplacementChar
				}
899
				w += utf8.EncodeRune(b[w:], rune)
900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915
			}

		// Quote, control characters are invalid.
		case c == '"', c < ' ':
			return

		// ASCII
		case c < utf8.RuneSelf:
			b[w] = c
			r++
			w++

		// Coerce to well-formed UTF-8.
		default:
			rune, size := utf8.DecodeRune(s[r:])
			r += size
916
			w += utf8.EncodeRune(b[w:], rune)
917
		}
918
	}
919
	return b[0:w], true
920
}
921 922 923 924 925 926 927 928 929 930

// tagName extracts the field name part out of the "json" struct tag
// value. The json struct tag format is an optional name, followed by
// zero or more ",option" values.
func tagName(v string) string {
	if idx := strings.Index(v, ","); idx != -1 {
		return v[:idx]
	}
	return v
}