Commit 1e87fe5b authored by Kirill Smelkov's avatar Kirill Smelkov

go/zodbpickle: Switch to unpickle with PyDict=y mode

Previously we were unpickling dicts into map[any]any and it was making a
difference from which Python version the dict was saved: if it was saved
from py2 with str keys, then the dict needed to be accessed with ByteString
keys, while if it was saved from py3 with str keys, the dict needed to
be accessed with string keys. This peculiarity is one of the reasons why
Map test currently fails on py3_pickle3 - because there - both in the
test and inside Map.PySetState, dictionaries are accessed via ByteString
keys which works only wrt py2-saved data.

-> Fix it by switching the unpickler to work in PyDict=y mode(*) which
results in dicts unpickled as ogórek.Dict instead of map[any]any
with the ogórek.Dict considering ByteString to be equal to both string and Bytes
with the same underlying content. This allows programs to access Dict via
string/bytes keys following Python3 model, while still being able to handle
dictionaries generated from under Python2.

For ZODB/go this is change in behaviour exposed to outside. However
there is currently only one known ZODB/go user - WCFS in Wendelin.core -
and that user will be updated correspondingly as well.

The change fixes the following failure in go/zodb tests

    --- FAIL: TestPersistentMapListLoad/py3_pickle3 (0.70s)
        persistent_x_test.go:57: persistent.mapping.PersistentMapping(0000000000000000): activate: pysetstate: noone of ["data" "_container"] is present in state dict

but one more remains there which will be addressed in the next patch.

(*) see https://github.com/kisielk/og-rek/pull/75 for details.
parent 90799e25
......@@ -35,6 +35,11 @@
// At application level utilities like ogórek.AsBytes and ogórek.AsString are
// handy to work with unpickled data for pickles generated by either py2 or py3.
//
// The decoder also uses PyDict=y mode to generally adhere to Python semantic
// and to allow programs to access unpickled dictionaries via string/bytes keys
// following py3 model, while still being able to handle dictionaries
// generated from under py2.
//
// The encoder emits pickles with protocol=3 to natively support all py bytes
// and strings types, and to stay interoperable with both py2 and py3: both ZODB4
// and ZODB5 use zodbpickle which supports protocol=3 on py2 and py3, and ZODB5
......@@ -78,6 +83,7 @@ func NewPickler(w io.Writer, getref func(obj any) *pickle.Ref) *pickle.Encoder {
func NewUnpickler(r io.Reader, loadref func(ref pickle.Ref) (any, error)) *pickle.Decoder {
return pickle.NewDecoderWithConfig(r, &pickle.DecoderConfig{
StrictUnicode: true, // see top-level doc
PyDict: true, // see top-level doc
PersistentLoad: loadref,
})
}
......
......@@ -580,22 +580,19 @@ var brokenZClass = &zclass{
type Map struct {
Persistent
// XXX it is not possible to embed map. And even if we embed a map via
// another type = map, then it is not possible to use indexing and
// range over Map. -> just provide access to the map as .Data .
Data map[interface{}]interface{}
pickle.Dict
}
type mapState Map // hide state methods from public API
func (m *mapState) DropState() {
m.Data = nil
m.Dict = pickle.Dict{}
}
func (m *mapState) PyGetState() interface{} {
return map[interface{}]interface{}{
"data": m.Data,
}
return pickle.NewDictWithData(
"data", m.Dict,
)
}
func (m *mapState) PySetState(pystate interface{}) error {
......@@ -606,12 +603,12 @@ func (m *mapState) PySetState(pystate interface{}) error {
return err
}
data, ok := xdata.(map[interface{}]interface{})
data, ok := xdata.(pickle.Dict)
if !ok {
return fmt.Errorf("state data must be dict, not %T", xdata)
}
m.Data = data
m.Dict = data
return nil
}
......@@ -619,7 +616,9 @@ func (m *mapState) PySetState(pystate interface{}) error {
type List struct {
Persistent
// XXX it is not possible to embed slice - see Map for similar issue and more details.
// XXX it is not possible to embed slice. And even if we embed a slice via
// another type = slice, then it is not possible to use indexing and
// range over List. -> just provide access to the slice as .Data .
Data []interface{}
}
......@@ -630,9 +629,9 @@ func (l *listState) DropState() {
}
func (l *listState) PyGetState() interface{} {
return map[interface{}]interface{}{
"data": l.Data,
}
return pickle.NewDictWithData(
"data", l.Data,
)
}
func (l *listState) PySetState(pystate interface{}) error {
......@@ -653,17 +652,17 @@ func (l *listState) PySetState(pystate interface{}) error {
// pystateDict1 decodes pystate that is expected to be {} with single key and
// returns data for that key.
func pystateDict1(pystate interface{}, acceptKeys ...string) (data interface{}, _ error) {
d, ok := pystate.(map[interface{}]interface{})
d, ok := pystate.(pickle.Dict)
if !ok {
return nil, fmt.Errorf("state must be dict, not %T", pystate)
}
if l := len(d); l != 1 {
if l := d.Len(); l != 1 {
return nil, fmt.Errorf("state dict has %d keys, must be only 1", l)
}
for _, key := range acceptKeys {
data, ok := d[pickle.ByteString(key)]
data, ok := d.Get_(key)
if ok {
return data, nil
}
......
......@@ -51,6 +51,7 @@ func _TestPersistentMapListLoad(t0 *testing.T, z *ZTestData) {
defer tdb.Close()
t := tdb.Open(&zodb.ConnOptions{})
X := xtesting.FatalIf(t0)
xroot := t.GetAny(0)
root, ok := xroot.(*zodb.Map)
......@@ -62,18 +63,14 @@ func _TestPersistentMapListLoad(t0 *testing.T, z *ZTestData) {
defer root.PDeactivate()
// see py/pydata-gen-testdata
assert.Equal(len(root.Data), 3)
assert.Equal(root.Data[pickle.ByteString("int1")], int64(1))
assert.Equal(root.Len(), 3)
assert.Equal(root.Get("int1"), int64(1))
xabc := root.Data[pickle.ByteString("strABC")]
__, ok := xabc.(pickle.ByteString)
if !ok {
t.Fatalf("root[strABC]: got %T ; expect str", xabc)
}
abc := string(__)
xabc := root.Get("strABC")
abc, err := pickle.AsString(xabc); X(err)
assert.Equal(abc, "abc")
xplist := root.Data[pickle.ByteString("plist")]
xplist := root.Get("plist")
plist, ok := xplist.(*zodb.List)
if !ok {
t.Fatalf("plist: got %T ; expect List", xplist)
......@@ -84,7 +81,7 @@ func _TestPersistentMapListLoad(t0 *testing.T, z *ZTestData) {
assert.Equal(len(plist.Data), 3)
xa := plist.Data[0]
__, ok = xa.(pickle.ByteString)
__, ok := xa.(pickle.ByteString)
if !ok {
t.Fatalf("plist[0]: got %T ; expect str", xa)
}
......
......@@ -340,7 +340,7 @@ func (r rpc) zeo4Error(arg interface{}) error {
// Callable: ogórek.Class{Module:"ZODB.POSException", Name:"_recon"},
// Args: ogórek.Tuple{
// ogórek.Class{Module:"ZODB.POSException", Name:"POSKeyError"},
// map[interface {}]interface {}{
// ogórek.Dict{
// "args":ogórek.Tuple{"\x00\x00\x00\x00\x00\x00\bP"}
// }
// }
......@@ -373,14 +373,14 @@ func (r rpc) zeo4Error(arg interface{}) error {
}
klass, ok1 := argv[0].(pickle.Class)
state, ok2 := argv[1].(map[interface{}]interface{})
state, ok2 := argv[1].(pickle.Dict)
if !(ok1 && ok2) {
return r.ereplyf("except4: %s: got (%T, %T); expect (class, dict)", exc, argv[0], argv[1])
}
args, ok := state["args"].(pickle.Tuple)
args, ok := state.Get("args").(pickle.Tuple)
if !ok {
return r.ereplyf("except4: %s: state.args = %#v; expect tuple", exc, state["args"])
return r.ereplyf("except4: %s: state.args = %#v; expect tuple", exc, state.Get("args"))
}
exc = klass.Module + "." + klass.Name
......
......@@ -150,7 +150,7 @@
//
// --------
//
// (*) for pickle support package github.com/kisielk/og-rek is used in StrictUnicode mode.
// (*) for pickle support package github.com/kisielk/og-rek is used in StrictUnicode + PyDict mode.
//
//
// Storage drivers
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment