Commit db852511 authored by Kirill Smelkov's avatar Kirill Smelkov

go/zodb/fs1: Teach it to support notifications on database change

Following-up on 4d2c8b1d (go/zodb: Require drivers to provide
notifications for database change events) let's teach FileStorage to
support watching for database changes:

- add watcher who observes changes to data file via fsnotify.

- when we see that the file was updated - there is a tricky case to distinguish
  in-progress writes from just some garbage added at tail. See comments in
  _watcher for details.

- if the watcher sees there is indeed new transactions, the watcher
  updates index & txnh{Min,Max} and sends corresponding event to watchq.

- since index / txnh{Min,Max} can now be changed (by watcher) -> they
  are now protected by mu.

- consequently operations like LastTid, LastOid, Load, ... are all taught to
  observe index / txnh{Min,Max} state in read-snapshot way.

- add down & friends to indicate that the storage is no longer
  operational, if watcher sees there is a problem with data file.

- Open is reworked to start from loading/rebuilding index first, and then tail
  to watcher to detect whether what's after found topPos is garbage or another
  in-progress transaction. Consequently it is now possible to correctly open
  filestorage that is being currently written to and has in-progress transaction
  at tail.

The patch is a bit big, but the changes here are all tightly
interrelated.
parent 8eb0988e
This diff is collapsed.
// Copyright (C) 2017 Nexedi SA and Contributors. // Copyright (C) 2017-2019 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com> // Kirill Smelkov <kirr@nexedi.com>
// //
// This program is free software: you can Use, Study, Modify and Redistribute // This program is free software: you can Use, Study, Modify and Redistribute
...@@ -20,15 +20,21 @@ ...@@ -20,15 +20,21 @@
package fs1 package fs1
import ( import (
"bytes"
"context" "context"
"fmt" "fmt"
"io" "io"
"io/ioutil"
"os"
"os/exec"
"reflect" "reflect"
"testing" "testing"
"lab.nexedi.com/kirr/neo/go/internal/xtesting"
"lab.nexedi.com/kirr/neo/go/zodb" "lab.nexedi.com/kirr/neo/go/zodb"
"lab.nexedi.com/kirr/go123/exc" "lab.nexedi.com/kirr/go123/exc"
"lab.nexedi.com/kirr/go123/xerr"
) )
// one database transaction record // one database transaction record
...@@ -109,7 +115,13 @@ func checkLoad(t *testing.T, fs *FileStorage, xid zodb.Xid, expect objState) { ...@@ -109,7 +115,13 @@ func checkLoad(t *testing.T, fs *FileStorage, xid zodb.Xid, expect objState) {
} }
func xfsopen(t testing.TB, path string) *FileStorage { func xfsopen(t testing.TB, path string) *FileStorage {
fs, err := Open(context.Background(), path) t.Helper()
return xfsopenopt(t, path, &zodb.DriverOptions{ReadOnly: true})
}
func xfsopenopt(t testing.TB, path string, opt *zodb.DriverOptions) *FileStorage {
t.Helper()
fs, err := Open(context.Background(), path, opt)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
...@@ -117,7 +129,7 @@ func xfsopen(t testing.TB, path string) *FileStorage { ...@@ -117,7 +129,7 @@ func xfsopen(t testing.TB, path string) *FileStorage {
} }
func TestLoad(t *testing.T) { func TestLoad(t *testing.T) {
fs := xfsopen(t, "testdata/1.fs") // TODO open read-only fs := xfsopen(t, "testdata/1.fs")
defer exc.XRun(fs.Close) defer exc.XRun(fs.Close)
// current knowledge of what was "before" for an oid as we scan over // current knowledge of what was "before" for an oid as we scan over
...@@ -266,7 +278,7 @@ func testIterate(t *testing.T, fs *FileStorage, tidMin, tidMax zodb.Tid, expectv ...@@ -266,7 +278,7 @@ func testIterate(t *testing.T, fs *FileStorage, tidMin, tidMax zodb.Tid, expectv
} }
func TestIterate(t *testing.T) { func TestIterate(t *testing.T) {
fs := xfsopen(t, "testdata/1.fs") // TODO open ro fs := xfsopen(t, "testdata/1.fs")
defer exc.XRun(fs.Close) defer exc.XRun(fs.Close)
// all []tids in test database // all []tids in test database
...@@ -302,7 +314,7 @@ func TestIterate(t *testing.T) { ...@@ -302,7 +314,7 @@ func TestIterate(t *testing.T) {
} }
func BenchmarkIterate(b *testing.B) { func BenchmarkIterate(b *testing.B) {
fs := xfsopen(b, "testdata/1.fs") // TODO open ro fs := xfsopen(b, "testdata/1.fs")
defer exc.XRun(fs.Close) defer exc.XRun(fs.Close)
ctx := context.Background() ctx := context.Background()
...@@ -341,3 +353,188 @@ func BenchmarkIterate(b *testing.B) { ...@@ -341,3 +353,188 @@ func BenchmarkIterate(b *testing.B) {
b.StopTimer() b.StopTimer()
} }
// TestWatch verifies that watcher can observe commits done from outside.
func TestWatch(t *testing.T) {
X := exc.Raiseif
xtesting.NeedPy(t, "zodbtools")
workdir := xworkdir(t)
tfs := workdir + "/t.fs"
// Object represents object state to be committed.
type Object struct {
oid zodb.Oid
data string
}
// zcommit commits new transaction into tfs with data specified by objv.
zcommit := func(at zodb.Tid, objv ...Object) (_ zodb.Tid, err error) {
defer xerr.Contextf(&err, "zcommit @%s", at)
// prepare text input for `zodb commit`
zin := &bytes.Buffer{}
fmt.Fprintf(zin, "user %q\n", "author")
fmt.Fprintf(zin, "description %q\n", fmt.Sprintf("test commit; at=%s", at))
fmt.Fprintf(zin, "extension %q\n", "")
for _, obj := range objv {
fmt.Fprintf(zin, "obj %s %d null:00\n", obj.oid, len(obj.data))
zin.WriteString(obj.data)
zin.WriteString("\n")
}
zin.WriteString("\n")
// run py `zodb commit`
cmd:= exec.Command("python2", "-m", "zodbtools.zodb", "commit", tfs, at.String())
cmd.Stdin = zin
cmd.Stderr = os.Stderr
out, err := cmd.Output()
if err != nil {
return zodb.InvalidTid, err
}
out = bytes.TrimSuffix(out, []byte("\n"))
tid, err := zodb.ParseTid(string(out))
if err != nil {
return zodb.InvalidTid, fmt.Errorf("committed, but invalid output: %s", err)
}
return tid, nil
}
xcommit := func(at zodb.Tid, objv ...Object) zodb.Tid {
t.Helper()
tid, err := zcommit(at, objv...); X(err)
return tid
}
// force tfs creation & open tfs at go side
at := xcommit(0, Object{0, "data0"})
watchq := make(chan zodb.CommitEvent)
fs := xfsopenopt(t, tfs, &zodb.DriverOptions{ReadOnly: true, Watchq: watchq})
ctx := context.Background()
checkLastTid := func(lastOk zodb.Tid) {
t.Helper()
head, err := fs.LastTid(ctx); X(err)
if head != lastOk {
t.Fatalf("check last_tid: got %s; want %s", head, lastOk)
}
}
checkLastTid(at)
checkLoad := func(at zodb.Tid, oid zodb.Oid, dataOk string, serialOk zodb.Tid) {
t.Helper()
xid := zodb.Xid{at, oid}
buf, serial, err := fs.Load(ctx, xid); X(err)
data := string(buf.XData())
if !(data == dataOk && serial == serialOk) {
t.Fatalf("check load %s:\nhave: %q %s\nwant: %q %s",
xid, data, serial, dataOk, serialOk)
}
}
// commit -> check watcher observes what we committed.
//
// XXX python `import pkg_resources` takes ~ 300ms.
// https://github.com/pypa/setuptools/issues/510
//
// Since pkg_resources are used everywhere (e.g. in zodburi to find all
// uri resolvers) this import slowness becomes the major part of time to
// run py `zodb commit`.
//
// if one day it is either fixed, or worked around, we could ↑ 10 to 100.
for i := zodb.Oid(1); i <= 10; i++ {
data0 := fmt.Sprintf("data0.%d", i)
datai := fmt.Sprintf("data%d", i)
at = xcommit(at,
Object{0, data0},
Object{i, datai})
// TODO also test for watcher errors
e := <-watchq
if objvWant := []zodb.Oid{0, i}; !(e.Tid == at && reflect.DeepEqual(e.Changev, objvWant)) {
t.Fatalf("watch:\nhave: %s %s\nwant: %s %s", e.Tid, e.Changev, at, objvWant)
}
checkLastTid(at)
// make sure we can load what was committed.
checkLoad(at, 0, data0, at)
checkLoad(at, i, datai, at)
}
err := fs.Close(); X(err)
e, ok := <-watchq
if ok {
t.Fatalf("watch after close -> %v; want closed", e)
}
}
// TestOpenRecovery verifies how Open handles data file with not-finished voted
// transaction in the end.
func TestOpenRecovery(t *testing.T) {
X := exc.Raiseif
main, err := ioutil.ReadFile("testdata/1.fs"); X(err)
index, err := ioutil.ReadFile("testdata/1.fs.index"); X(err)
lastTidOk := _1fs_dbEntryv[len(_1fs_dbEntryv)-1].Header.Tid
topPos := int64(_1fs_indexTopPos)
voteTail, err := ioutil.ReadFile("testdata/1voted.tail"); X(err)
workdir := xworkdir(t)
ctx := context.Background()
// checkL runs f on main + voteTail[:l] . Two cases are verified:
// 1) when index is already present, and
// 2) when initially there is no index.
checkL := func(t *testing.T, l int, f func(t *testing.T, tfs string)) {
tfs := fmt.Sprintf("%s/1+vote%d.fs", workdir, l)
t.Run(fmt.Sprintf("oldindex=n/tail=+vote%d", l), func(t *testing.T) {
err := ioutil.WriteFile(tfs, append(main, voteTail[:l]...), 0600); X(err)
f(t, tfs)
})
t.Run(fmt.Sprintf("oldindex=y/tail=+vote%d", l), func(t *testing.T) {
err := ioutil.WriteFile(tfs+".index", index, 0600); X(err)
f(t, tfs)
})
}
// if txn header can be fully read - it should be all ok
lok := []int{0}
for l := len(voteTail); l >= TxnHeaderFixSize; l-- {
lok = append(lok, l)
}
for _, l := range lok {
checkL(t, l, func(t *testing.T, tfs string) {
fs := xfsopen(t, tfs)
defer func() {
err = fs.Close(); X(err)
}()
head, err := fs.LastTid(ctx); X(err)
if head != lastTidOk {
t.Fatalf("last_tid: %s ; expected: %s", head, lastTidOk)
}
})
}
// if txn header is stably incomplete - open should fail
// XXX better check 0..sizeof(txnh)-1 but in this range each check is slow.
for _, l := range []int{TxnHeaderFixSize-1,1} {
checkL(t, l, func(t *testing.T, tfs string) {
_, err := Open(ctx, tfs, &zodb.DriverOptions{ReadOnly: true})
estr := ""
if err != nil {
estr = err.Error()
}
ewant := fmt.Sprintf("open %s: checking whether it is garbage @%d: %s",
tfs, topPos, &RecordError{tfs, "transaction record", topPos, "read", io.ErrUnexpectedEOF})
if estr != ewant {
t.Fatalf("unexpected error:\nhave: %q\nwant: %q", estr, ewant)
}
})
}
}
// Copyright (C) 2017-2018 Nexedi SA and Contributors. // Copyright (C) 2017-2019 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com> // Kirill Smelkov <kirr@nexedi.com>
// //
// This program is free software: you can Use, Study, Modify and Redistribute // This program is free software: you can Use, Study, Modify and Redistribute
...@@ -152,7 +152,7 @@ func (fh *FileHeader) Load(r io.ReaderAt) error { ...@@ -152,7 +152,7 @@ func (fh *FileHeader) Load(r io.ReaderAt) error {
_, err := r.ReadAt(fh.Magic[:], 0) _, err := r.ReadAt(fh.Magic[:], 0)
err = okEOF(err) err = okEOF(err)
if err != nil { if err != nil {
return err return fmt.Errorf("%sread magic: %s", ioprefix(r), err)
} }
if string(fh.Magic[:]) != Magic { if string(fh.Magic[:]) != Magic {
return fmt.Errorf("%sinvalid fs1 magic %q", ioprefix(r), fh.Magic) return fmt.Errorf("%sinvalid fs1 magic %q", ioprefix(r), fh.Magic)
......
// Copyright (C) 2017-2018 Nexedi SA and Contributors. // Copyright (C) 2017-2019 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com> // Kirill Smelkov <kirr@nexedi.com>
// //
// This program is free software: you can Use, Study, Modify and Redistribute // This program is free software: you can Use, Study, Modify and Redistribute
...@@ -453,7 +453,10 @@ func (index *Index) Update(ctx context.Context, r io.ReaderAt, topPos int64, pro ...@@ -453,7 +453,10 @@ func (index *Index) Update(ctx context.Context, r io.ReaderAt, topPos int64, pro
return err return err
} }
// XXX check txnh.Status != TxnInprogress // this transaction was only voted, not fully committed yet.
if it.Txnh.Status == zodb.TxnInprogress {
return nil
}
// check for topPos overlapping txn & whether we are done. // check for topPos overlapping txn & whether we are done.
// topPos=-1 will never match here // topPos=-1 will never match here
...@@ -469,7 +472,9 @@ func (index *Index) Update(ctx context.Context, r io.ReaderAt, topPos int64, pro ...@@ -469,7 +472,9 @@ func (index *Index) Update(ctx context.Context, r io.ReaderAt, topPos int64, pro
// do not update the index immediately so that in case of error // do not update the index immediately so that in case of error
// in the middle of txn's data, index stays consistent and // in the middle of txn's data, index stays consistent and
// correct for topPos pointing to previous transaction. // correct for topPos pointing to previous transaction.
update := map[zodb.Oid]int64{} // XXX malloc every time -> better reuse //
// (keep in sync with FileStorage.watcher)
update := map[zodb.Oid]int64{}
for { for {
err = it.NextData() err = it.NextData()
if err != nil { if err != nil {
...@@ -693,7 +698,9 @@ func (index *Index) VerifyForFile(ctx context.Context, path string, ntxn int, pr ...@@ -693,7 +698,9 @@ func (index *Index) VerifyForFile(ctx context.Context, path string, ntxn int, pr
return nil, err return nil, err
} }
topPos := fi.Size() // XXX there might be last TxnInprogress transaction // FIXME there might be last TxnInprogress transaction.
// TODO -> try to read txn header, and if it is ø or in-progress - that's ok.
topPos := fi.Size()
if index.TopPos != topPos { if index.TopPos != topPos {
return nil, indexCorrupt(f, "topPos mismatch: data=%v index=%v", topPos, index.TopPos) return nil, indexCorrupt(f, "topPos mismatch: data=%v index=%v", topPos, index.TopPos)
} }
......
#!/usr/bin/env python2 #!/usr/bin/env python2
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2017-2018 Nexedi SA and Contributors. # Copyright (C) 2017-2019 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com> # Kirill Smelkov <kirr@nexedi.com>
# #
# This program is free software: you can Use, Study, Modify and Redistribute # This program is free software: you can Use, Study, Modify and Redistribute
...@@ -21,7 +21,10 @@ ...@@ -21,7 +21,10 @@
"""generate reference fs1 database and index for tests""" """generate reference fs1 database and index for tests"""
from ZODB.FileStorage import FileStorage from ZODB.FileStorage import FileStorage
from zodbtools.test.gen_testdata import gen_testdb from ZODB import DB
from zodbtools.test.gen_testdata import gen_testdb, precommit
from os import stat, remove
from shutil import copyfile
from golang.gcompat import qq from golang.gcompat import qq
import struct import struct
...@@ -133,6 +136,38 @@ def main(): ...@@ -133,6 +136,38 @@ def main():
emit("\t\t},") emit("\t\t},")
emit("\t},") emit("\t},")
emit("}") emit("}")
stor.close()
# prepare file with voted (not fully committed) tail
voted = "testdata/1voted.fs"
copyfile(outfs, voted)
vstor = FileStorage(voted)
vdb = DB(vstor)
vconn = vdb.open()
vroot = vconn.root()
vroot._p_activate() # to know its current serial
txn = precommit(u"author", u"description", {'aaa': 'bbb'})
vstor.tpc_begin(txn)
vstor.store(vroot._p_oid, vroot._p_serial, '000 data 000', '', txn)
vstor.tpc_vote(txn)
# NO tpc_finish here so that status remain 'c' (voted) instead of ' ' (committed)
st = stat(outfs)
l = st.st_size
vf = open(voted, 'rb')
vf.seek(l)
voted_tail = vf.read()
assert voted_tail[-1+8+8+1] == 'c' # voted, not finished (' ')
with open("testdata/1voted.tail", "wb") as vt:
vt.write(voted_tail)
remove(voted)
remove(voted+".index")
remove(voted+".tmp")
remove(voted+".lock")
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -22,8 +22,6 @@ package fs1 ...@@ -22,8 +22,6 @@ package fs1
import ( import (
"context" "context"
"fmt"
"log"
"net/url" "net/url"
"lab.nexedi.com/kirr/neo/go/zodb" "lab.nexedi.com/kirr/neo/go/zodb"
...@@ -34,21 +32,7 @@ func openByURL(ctx context.Context, u *url.URL, opt *zodb.DriverOptions) (zodb.I ...@@ -34,21 +32,7 @@ func openByURL(ctx context.Context, u *url.URL, opt *zodb.DriverOptions) (zodb.I
// XXX u.Path is not always raw path - recheck and fix // XXX u.Path is not always raw path - recheck and fix
path := u.Host + u.Path path := u.Host + u.Path
// XXX readonly stub fs, err := Open(ctx, path, opt)
// XXX place = ?
if !opt.ReadOnly {
return nil, fmt.Errorf("fs1: %s: TODO write mode not implemented", path)
}
// FIXME handle opt.Watchq
// for now we pretend as if the database is not changing.
if opt.Watchq != nil {
log.Print("fs1: FIXME: watchq support not implemented - there" +
"won't be notifications about database changes")
}
fs, err := Open(ctx, path)
fs.watchq = opt.Watchq
return fs, err return fs, err
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment