Commit d51cc146 authored by Kirill Smelkov's avatar Kirill Smelkov Committed by Kamil Kisiel

Allow to hook-in application-level logic to handle persistent references (#38)

Continuing 84598fe1 (Add support for persistent references) theme let's
develop support for persistent references more: the usual situation (at
least in ZODB context, which is actually the original reason
persistent references exist) is that decode converts a reference of
(type, oid) tuple form into corresponding "ghost" object - an instance
of type, but not yet loaded with database object's data. This way
resulted object tree is created right away with types application
expects (and ZODB/py further cares to automatically load object data
when that ghost objects are accessed).

To mimic this on Go side, with current state, after decoding, one would
need to make a full reflect pass on the resulted decoded objects, find
Refs, and convert them to instances of other types, also caring to
patch pointers in other objects consistently that were pointing to such
Refs. In other words it is some work and it coincides almost 100% with
what Decoder already does by itself.

Thus, not to duplicate that work and conducting parallel with Python
world, let's add ability to configure Decoder and Encoder so that
persistent references could be handled with user-specified application
logic right in the process, and e.g. Decoder would return resulted
object with references converted to corresponding Go types.

To do so we introduce DecoderConfig and EncoderConfig - configurations
to tune decode/encode process. To maintain backward compatibility
NewDecoder and NewEncoder signatures are not adjusted and instead
NewDecoderWithConfig and NewEncoderWithConfig are introduces. We should
be sure we won't need e.g. NewDecoderWithConfigAndXXX in the future,
since from now on we could be adding fields to configuration structs
without breaking backward compatibility.

For decoding there is DecoderConfig.PersistentLoad which mimics
Unpickler.persistent_load in Python:

https://docs.python.org/3/library/pickle.html#pickle.Unpickler.persistent_load

For encoding there is EncoderConfig.PersistentRef which mimics
Pickler.persistent_id in Python:

https://docs.python.org/3/library/pickle.html#pickle.Pickler.persistent_id

( for Persistent{Load,Ref}, following suggestion from @kisielk, the
  choice was made to explicitly pass in/out Ref, instead of raw
  interface{} pid, because that makes the API cleaner. )

Then both Decoder and Encoder are adjusted correspondingly with tests
added.

About tests: I was contemplating to patch-in support for decoder and
encoder configurations, and handling errors, into our main test
TestDecode, but for now decided not to go that way and to put the test
as separate TestPersistentRefs.

By the way, with having configurations in place, we could start to add
other things there - e.g. the protocol version to use for Encoder.
Currently we have several places where we either don't use opcodes from
higher protocol versions (fearing to break compatibility), or instead
always use higher-version opcodes without checking we should be able to
do so by allowed protocol version.
parent 56e0ff77
...@@ -20,12 +20,31 @@ func (te *TypeError) Error() string { ...@@ -20,12 +20,31 @@ func (te *TypeError) Error() string {
// An Encoder encodes Go data structures into pickle byte stream // An Encoder encodes Go data structures into pickle byte stream
type Encoder struct { type Encoder struct {
w io.Writer w io.Writer
config *EncoderConfig
}
// EncoderConfig allows to tune Encoder.
type EncoderConfig struct {
// PersistentRef, if !nil, will be used by encoder to encode objects as persistent references.
//
// Whenever the encoders sees pointer to a Go struct object, it will call
// PersistentRef to find out how to encode that object. If PersistentRef
// returns nil, the object is encoded regularly. If !nil - the object
// will be encoded as an object reference.
//
// See Ref documentation for more details.
PersistentRef func(obj interface{}) *Ref
} }
// NewEncoder returns a new Encoder struct with default values // NewEncoder returns a new Encoder struct with default values
func NewEncoder(w io.Writer) *Encoder { func NewEncoder(w io.Writer) *Encoder {
return &Encoder{w: w} return NewEncoderWithConfig(w, &EncoderConfig{})
}
// NewEncoderWithConfig is similar to NewEncoder, but allows specifying the encoder configuration.
func NewEncoderWithConfig(w io.Writer, config *EncoderConfig) *Encoder {
return &Encoder{w: w, config: config}
} }
// Encode writes the pickle encoding of v to w, the encoder's writer // Encode writes the pickle encoding of v to w, the encoder's writer
...@@ -77,6 +96,14 @@ func (e *Encoder) encode(rv reflect.Value) error { ...@@ -77,6 +96,14 @@ func (e *Encoder) encode(rv reflect.Value) error {
case reflect.Ptr: case reflect.Ptr:
if rv.Elem().Kind() == reflect.Struct { if rv.Elem().Kind() == reflect.Struct {
// check if we have to encode this object as persistent reference.
if getref := e.config.PersistentRef; getref != nil {
ref := getref(rv.Interface())
if ref != nil {
return e.encodeRef(ref)
}
}
switch rv.Elem().Interface().(type) { switch rv.Elem().Interface().(type) {
case None: case None:
return e.encodeStruct(rv.Elem()) return e.encodeStruct(rv.Elem())
......
...@@ -111,9 +111,10 @@ type Tuple []interface{} ...@@ -111,9 +111,10 @@ type Tuple []interface{}
// Decoder is a decoder for pickle streams. // Decoder is a decoder for pickle streams.
type Decoder struct { type Decoder struct {
r *bufio.Reader r *bufio.Reader
stack []interface{} config *DecoderConfig
memo map[string]interface{} stack []interface{}
memo map[string]interface{}
// a reusable buffer that can be used by the various decoding functions // a reusable buffer that can be used by the various decoding functions
// functions using this should call buf.Reset to clear the old contents // functions using this should call buf.Reset to clear the old contents
...@@ -123,10 +124,37 @@ type Decoder struct { ...@@ -123,10 +124,37 @@ type Decoder struct {
line []byte line []byte
} }
// DecoderConfig allows to tune Decoder.
type DecoderConfig struct {
// PersistentLoad, if !nil, will be used by decoder to handle persistent references.
//
// Whenever the decoder finds an object reference in the pickle stream
// it will call PersistentLoad. If PersistentLoad returns !nil object
// without error, the decoder will use that object instead of Ref in
// the resulted built Go object.
//
// An example use-case for PersistentLoad is to transform persistent
// references in a ZODB database of form (type, oid) tuple, into
// equivalent-to-type Go ghost object, e.g. equivalent to zodb.BTree.
//
// See Ref documentation for more details.
PersistentLoad func(ref Ref) (interface{}, error)
}
// NewDecoder constructs a new Decoder which will decode the pickle stream in r. // NewDecoder constructs a new Decoder which will decode the pickle stream in r.
func NewDecoder(r io.Reader) *Decoder { func NewDecoder(r io.Reader) *Decoder {
return NewDecoderWithConfig(r, &DecoderConfig{})
}
// NewDecoderWithConfig is similar to NewDecoder, but allows specifying decoder configuration.
func NewDecoderWithConfig(r io.Reader, config *DecoderConfig) *Decoder {
reader := bufio.NewReader(r) reader := bufio.NewReader(r)
return &Decoder{r: reader, stack: make([]interface{}, 0), memo: make(map[string]interface{})} return &Decoder{
r: reader,
config: config,
stack: make([]interface{}, 0),
memo: make(map[string]interface{}),
}
} }
// Decode decodes the pickle stream and returns the result or an error. // Decode decodes the pickle stream and returns the result or an error.
...@@ -470,12 +498,16 @@ func (d *Decoder) loadNone() error { ...@@ -470,12 +498,16 @@ func (d *Decoder) loadNone() error {
} }
// Ref represents Python's persistent reference. // Ref is the default representation for a Python persistent reference.
// //
// Such references are used when one pickle somehow references another pickle // Such references are used when one pickle somehow references another pickle
// in e.g. a database. // in e.g. a database.
// //
// See https://docs.python.org/3/library/pickle.html#pickle-persistent for details. // See https://docs.python.org/3/library/pickle.html#pickle-persistent for details.
//
// See DecoderConfig.PersistentLoad and EncoderConfig.PersistentRef for ways to
// tune Decoder and Encoder to handle persistent references with user-specified
// application logic.
type Ref struct { type Ref struct {
// persistent ID of referenced object. // persistent ID of referenced object.
// //
...@@ -491,8 +523,7 @@ func (d *Decoder) loadPersid() error { ...@@ -491,8 +523,7 @@ func (d *Decoder) loadPersid() error {
return err return err
} }
d.push(Ref{Pid: string(pid)}) return d.handleRef(Ref{Pid: string(pid)})
return nil
} }
// Push a persistent object id from items on the stack // Push a persistent object id from items on the stack
...@@ -501,7 +532,24 @@ func (d *Decoder) loadBinPersid() error { ...@@ -501,7 +532,24 @@ func (d *Decoder) loadBinPersid() error {
if err != nil { if err != nil {
return err return err
} }
d.push(Ref{Pid: pid}) return d.handleRef(Ref{Pid: pid})
}
// handleRef is common place to handle Refs.
func (d *Decoder) handleRef(ref Ref) error {
if load := d.config.PersistentLoad; load != nil {
obj, err := load(ref)
if err != nil {
return fmt.Errorf("pickle: handleRef: %s", err)
}
if obj == nil {
// PersistentLoad asked to leave the reference as is.
obj = ref
}
d.push(obj)
} else {
d.push(ref)
}
return nil return nil
} }
......
...@@ -3,6 +3,7 @@ package ogórek ...@@ -3,6 +3,7 @@ package ogórek
import ( import (
"bytes" "bytes"
"encoding/hex" "encoding/hex"
"errors"
"io" "io"
"math/big" "math/big"
"reflect" "reflect"
...@@ -306,6 +307,111 @@ func TestDecodeError(t *testing.T) { ...@@ -306,6 +307,111 @@ func TestDecodeError(t *testing.T) {
} }
} }
// verify how decoder/encoder handle application-level settings wrt Refs.
func TestPersistentRefs(t *testing.T) {
// ZBTree mimics BTree from ZODB.
type ZBTree struct {
oid string
}
errInvalidRef := errors.New("invalid reference")
// Ref -> ? object
loadref := func(ref Ref) (interface{}, error) {
// pretend we handle "zodb.BTree" -> ZBTree.
t, ok := ref.Pid.(Tuple)
if !ok || len(t) != 2 {
return nil, errInvalidRef
}
class, ok1 := t[0].(Class)
oid, ok2 := t[1].(string)
if !(ok1 && ok2) {
return nil, errInvalidRef
}
switch class {
case Class{Module: "zodb", Name: "BTree"}:
return &ZBTree{oid}, nil
default:
// leave it as is
return nil, nil
}
}
// object -> ? Ref
getref := func(obj interface{}) *Ref {
// pretend we handle ZBTree.
switch obj := obj.(type) {
default:
return nil
case *ZBTree:
return &Ref{Pid: Tuple{Class{Module: "zodb", Name: "BTree"}, obj.oid}}
}
}
dconf := &DecoderConfig{PersistentLoad: loadref}
econf := &EncoderConfig{PersistentRef: getref}
testv := []struct {
input string
expected interface{}
}{
{"Pabc\n.", errInvalidRef},
{"\x80\x01S'abc'\nQ.", errInvalidRef},
{"\x80\x01S'abc'\nS'123'\n\x86Q.", errInvalidRef},
{"\x80\x01cfoo\nbar\nS'123'\n\x86Q.", Ref{Tuple{Class{Module: "foo", Name: "bar"}, "123"}}},
{"\x80\x01czodb\nBTree\nS'123'\n\x86Q.", &ZBTree{oid: "123"}},
}
for _, tt := range testv {
// decode(input) -> expected
buf := bytes.NewBufferString(tt.input)
dec := NewDecoderWithConfig(buf, dconf)
v, err := dec.Decode()
expected := tt.expected
errExpect := ""
if e, iserr := expected.(error); iserr {
expected = nil
errExpect = "pickle: handleRef: " + e.Error()
}
if !(reflect.DeepEqual(v, expected) &&
((err == nil && errExpect == "") || err.Error() == errExpect)) {
t.Errorf("%q: decode -> %#v, %q; want %#v, %q",
tt.input, v, err, expected, errExpect)
}
if err != nil {
continue
}
// expected -> encode -> decode = identity
buf.Reset()
enc := NewEncoderWithConfig(buf, econf)
err = enc.Encode(tt.expected)
if err != nil {
t.Errorf("%q: encode(expected) -> %q", tt.input, err)
continue
}
dec = NewDecoderWithConfig(buf, dconf)
v, err = dec.Decode()
if err != nil {
t.Errorf("%q: expected -> encode -> decode: %q", tt.input, err)
continue
}
if !reflect.DeepEqual(v, tt.expected) {
t.Errorf("%q: expected -> encode -> decode != identity\nhave: %#v\nwant: %#v",
tt.input, v, tt.expected)
}
}
}
func TestFuzzCrashers(t *testing.T) { func TestFuzzCrashers(t *testing.T) {
crashers := []string{ crashers := []string{
"(dS''\n(lc\n\na2a2a22aasS''\na", "(dS''\n(lc\n\na2a2a22aasS''\na",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment