Commit d8e9d7a9 authored by Kirill Smelkov's avatar Kirill Smelkov

go/zodb: ΔTail

Add ΔTail - utility class to keep track of history tail of revisional
changes. This will be needed for both DB (to handle invalidations) and
for raw Cache(*). It also might be useful in places outside of zodb/go -
for example WCFS from Wendelin.core uses ΔTail to keep track of which
ZODB revision changed which block of file(+).

Please see ΔTail comments for details.

(*) to check a bit ahead on load to see whether loaded serial is
actually the latest one for ≤ Cache.head, and, if yes, mark
corresponding cache entry as having .head="current" so that the entry
coverage could be extended when Cache.head is extended to cover more
database state instead of loading the same data from the database again.

(+) that's why we have δtail.go.cat-generic, so that third-party
packages could adapt ΔTail to needed types.
parent 030b0c42
// Copyright (C) 2018-2019 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com>
//
// This program is free software: you can Use, Study, Modify and Redistribute
// it under the terms of the GNU General Public License version 3, or (at your
// option) any later version, as published by the Free Software Foundation.
//
// You can also Link and Combine this program with other software covered by
// the terms of any of the Free Software licenses or any of the Open Source
// Initiative approved licenses and Convey the resulting work. Corresponding
// source of such a combination shall include the source code for all other
// software used.
//
// This program is distributed WITHOUT ANY WARRANTY; without even the implied
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
//
// See COPYING file for full licensing terms.
// See https://www.nexedi.com/licensing for rationale and options.
package zodb
// XXX do we really need ΔTail to be exported from zodb?
// (other users are low level caches + maybe ZEO/NEO -> zplumbing? but then import cycle)
import (
"fmt"
)
// ΔTail represents tail of revisional changes.
//
// It semantically consists of
//
// [](rev↑, []id) ; rev ∈ (tail, head]
//
// and index
//
// {} id -> max(rev: rev changed id)
//
// where
//
// rev - is ZODB revision,
// id - is an identifier of what has been changed(*), and
// (tail, head] - is covered revision range
//
// It provides operations to
//
// - append information to the tail about next revision,
// - forget information in the tail past specified revision,
// - query the tail for slice with rev ∈ (lo, hi],
// - query the tail about what is last revision that changed an id,
// - query the tail for len and (tail, head].
//
// ΔTail is safe to access for multiple-readers / single writer.
//
// (*) examples of id:
//
// oid - ZODB object identifier, when ΔTail represents changes to ZODB objects,
// #blk - file block number, when ΔTail represents changes to a file.
type ΔTail struct {
head Tid
tail Tid
tailv []ΔRevEntry
lastRevOf map[Oid]Tid // index for LastRevOf queries
// XXX -> lastRevOf = {} oid -> []rev↑ if linear scan in LastRevOf starts to eat cpu
}
// ΔRevEntry represents information of what have been changed in one revision.
type ΔRevEntry struct {
Rev Tid
Changev []Oid
}
// NewΔTail creates new ΔTail object.
//
// Initial coverage of created ΔTail is (at₀, at₀].
func NewΔTail(at0 Tid) *ΔTail {
return &ΔTail{
head: at0,
tail: at0,
lastRevOf: make(map[Oid]Tid),
}
}
// Len returns number of revisions.
func (δtail *ΔTail) Len() int {
return len(δtail.tailv)
}
// Head returns newest database state for which δtail has history coverage.
//
// Head is ↑ on Append, in particular it does not ↓ on Forget even if δtail becomes empty.
func (δtail *ΔTail) Head() Tid {
return δtail.head
}
// Tail returns lower-bound of database state for which δtail has history coverage.
//
// Tail is ↑= on Forget, even if δtail becomes empty.
func (δtail *ΔTail) Tail() Tid {
return δtail.tail
}
// SliceByRev returns δtail slice of elements with .rev ∈ (low, high].
//
// it must be called with the following condition:
//
// tail ≤ low ≤ high ≤ head
//
// the caller must not modify returned slice.
//
// Note: contrary to regular go slicing, low is exclusive while high is inclusive.
func (δtail *ΔTail) SliceByRev(low, high Tid) /*readonly*/ []ΔRevEntry {
tail := δtail.Tail()
head := δtail.head
if !(tail <= low && low <= high && high <= head) {
panic(fmt.Sprintf("δtail.Slice: invalid query: (%s, %s]; (tail, head] = (%s, %s]", low, high, tail, head))
}
tailv := δtail.tailv
// ex (0,0] tail..head = 0..0
if len(tailv) == 0 {
return tailv
}
// find max j : [j].rev ≤ high XXX linear scan -> binary search
j := len(tailv)-1
for ; j >= 0 && tailv[j].Rev > high; j-- {}
if j < 0 {
return nil // ø
}
// find max i : [i].rev > low XXX linear scan -> binary search
i := j
for ; i >= 0 && tailv[i].Rev > low; i-- {}
i++
return tailv[i:j+1]
}
// XXX add way to extend coverage without appending changed data? (i.e. if a
// txn did not change file at all) -> but then it is simply .Append(rev, nil)?
// Append appends to δtail information about what have been changed in next revision.
//
// rev must be ↑.
func (δtail *ΔTail) Append(rev Tid, changev []Oid) {
// check rev↑
if δtail.head >= rev {
panic(fmt.Sprintf("δtail.Append: rev not ↑: %s -> %s", δtail.head, rev))
}
δtail.head = rev
δtail.tailv = append(δtail.tailv, ΔRevEntry{rev, changev})
for _, id := range changev {
δtail.lastRevOf[id] = rev
}
}
// ForgetPast discards all δtail entries with rev ≤ revCut.
func (δtail *ΔTail) ForgetPast(revCut Tid) {
// revCut ≤ tail: nothing to do; don't let .tail go ↓
if revCut <= δtail.tail {
return
}
icut := 0
for i, δ := range δtail.tailv {
rev := δ.Rev
if rev > revCut {
break
}
icut = i+1
// if forgotten revision was last for id, we have to update lastRevOf index
for _, id := range δ.Changev {
if δtail.lastRevOf[id] == rev {
delete(δtail.lastRevOf, id)
}
}
}
// tailv = tailv[icut:] but without
// 1) growing underlying storage array indefinitely
// 2) keeping underlying storage after forget
l := len(δtail.tailv)-icut
tailv := make([]ΔRevEntry, l)
copy(tailv, δtail.tailv[icut:])
δtail.tailv = tailv
δtail.tail = revCut
}
// LastRevOf tries to return what was the last revision that changed id as of at database state.
//
// Depending on current information in δtail it returns either exact result, or
// an upper-bound estimate for the last id revision, assuming id was changed ≤ at:
//
// 1) if δtail does not cover at, at is returned:
//
// # at ∉ [min(rev ∈ δtail), max(rev ∈ δtail)]
// LastRevOf(id, at) = at
//
// 2) if δtail has an entry corresponding to id change, it gives exactly the
// last revision that changed id:
//
// # at ∈ [min(rev ∈ δtail), max(rev ∈ δtail)]
// # ∃ rev ∈ δtail: rev changed id && rev ≤ at
// LastRevOf(id, at) = max(rev: rev changed id && rev ≤ at)
//
// 3) if δtail does not contain appropriate record with id - it returns δtail's
// lower bound as the estimate for the upper bound of the last id revision:
//
// # at ∈ [min(rev ∈ δtail), max(rev ∈ δtail)]
// # ∄ rev ∈ δtail: rev changed id && rev ≤ at
// LastRevOf(id, at) = min(rev ∈ δtail)
//
// On return exact indicates whether returned revision is exactly the last
// revision of id, or only an upper-bound estimate of it.
func (δtail *ΔTail) LastRevOf(id Oid, at Tid) (_ Tid, exact bool) {
// check if we have no coverage at all
l := len(δtail.tailv)
if l == 0 {
return at, false
}
revMin := δtail.tailv[0].Rev
revMax := δtail.tailv[l-1].Rev
if !(revMin <= at && at <= revMax) {
return at, false
}
// we have the coverage
rev, ok := δtail.lastRevOf[id]
if !ok {
return δtail.tailv[0].Rev, false
}
if rev <= at {
return rev, true
}
// what's in index is after at - scan tailv back to find appropriate entry
// XXX linear scan - see .lastRevOf comment.
for i := l - 1; i >= 0; i-- {
δ := δtail.tailv[i]
if δ.Rev > at {
continue
}
for _, δid := range δ.Changev {
if id == δid {
return δ.Rev, true
}
}
}
// nothing found
return δtail.tailv[0].Rev, false
}
#!/bin/bash
# δtail.go.cat-generic - cat to stdout δtail.go variant suitable for templating.
# Copyright (C) 2018-2019 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com>
#
# This program is free software: you can Use, Study, Modify and Redistribute
# it under the terms of the GNU General Public License version 3, or (at your
# option) any later version, as published by the Free Software Foundation.
#
# You can also Link and Combine this program with other software covered by
# the terms of any of the Free Software licenses or any of the Open Source
# Initiative approved licenses and Convey the resulting work. Corresponding
# source of such a combination shall include the source code for all other
# software used.
#
# This program is distributed WITHOUT ANY WARRANTY; without even the implied
# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# See COPYING file for full licensing terms.
# See https://www.nexedi.com/licensing for rationale and options.
# This program emits δtail.go that is suitable for templating with the
# following parameters:
#
# PACKAGE
# ID
#
# The types that are emitted are
#
# ΔTail
# ΔRevEntry
input=$(dirname $0)/δtail.go
cat "$input" |sed \
-e 's/package zodb/package PACKAGE/g' \
-e '/package PACKAGE/a \\nimport "lab.nexedi.com/kirr/neo/go/zodb"' \
-e 's/Tid/zodb.Tid/g' \
-e 's/Oid/ID/g' \
-e 's/CommitEvent/ΔRevEntry/g'
// Copyright (C) 2018-2019 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com>
//
// This program is free software: you can Use, Study, Modify and Redistribute
// it under the terms of the GNU General Public License version 3, or (at your
// option) any later version, as published by the Free Software Foundation.
//
// You can also Link and Combine this program with other software covered by
// the terms of any of the Free Software licenses or any of the Open Source
// Initiative approved licenses and Convey the resulting work. Corresponding
// source of such a combination shall include the source code for all other
// software used.
//
// This program is distributed WITHOUT ANY WARRANTY; without even the implied
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
//
// See COPYING file for full licensing terms.
// See https://www.nexedi.com/licensing for rationale and options.
package zodb
import (
"fmt"
"reflect"
"testing"
)
func TestΔTail(t *testing.T) {
var δtail *ΔTail
// R is syntactic sugar to create 1 ΔRevEntry
R := func(rev Tid, changev ...Oid) ΔRevEntry {
return ΔRevEntry{rev, changev}
}
// δAppend is syntactic sugar for δtail.Append
δAppend := func(δ ΔRevEntry) {
δtail.Append(δ.Rev, δ.Changev)
}
// δCheck verifies that δtail state corresponds to provided tailv
δCheck := func(tail, head Tid, tailv ...ΔRevEntry) {
t.Helper()
for i := 1; i < len(tailv); i++ {
if !(tailv[i-1].Rev < tailv[i].Rev) {
panic("test tailv: rev not ↑")
}
}
// Head/Tail/Data
if h := δtail.Head(); h != head {
t.Fatalf("Head() -> %s ; want %s", h, head)
}
if tt := δtail.Tail(); tt != tail {
t.Fatalf("Tail() -> %s ; want %s", tt, tail)
}
if !tailvEqual(δtail.tailv, tailv) {
t.Fatalf("tailv:\nhave: %v\nwant: %v", δtail.tailv, tailv)
}
if l := δtail.Len(); l != len(tailv) {
t.Fatalf("Len() -> %d ; want %d", l, len(tailv))
}
// SliceByRev
// check that δtail.SliceByRev(rlo, rhi) == tailv[ilo:ihi).
//fmt.Printf("\nwhole: (%s, %s] %v\n", δtail.Tail(), δtail.Head(), tailv)
sliceByRev := func(rlo, rhi Tid, ilo, ihi int) {
t.Helper()
//fmt.Printf("(%s, %s] -> [%d:%d)\n", rlo, rhi, ilo, ihi)
have := δtail.SliceByRev(rlo, rhi)
want := tailv[ilo:ihi]
if !tailvEqual(have, want) {
t.Fatalf("SliceByRev(%s, %s) -> %v ; want %v", rlo, rhi, have, want)
}
if len(have) == 0 {
return
}
// make sure returned region is indeed correct
tbefore := Tid(0)
if ilo-1 >= 0 {
tbefore = tailv[ilo-1].Rev-1
}
tail := tailv[ilo].Rev-1
head := tailv[ihi-1].Rev
hafter := TidMax
if ihi < len(tailv) {
hafter = tailv[ihi].Rev
}
if !(tbefore < rlo && rlo <= tail && head <= rhi && rhi < hafter) {
t.Fatalf("SliceByRev(%s, %s) -> %v ; edges do not match query:\n" +
"%s (%s, %s] %s", rlo, rhi, have, tbefore, tail, head, hafter)
}
}
for ilo := 0; ilo < len(tailv); ilo++ {
for ihi := ilo; ihi < len(tailv); ihi++ {
// [ilo, ihi)
sliceByRev(
tailv[ilo].Rev - 1,
tailv[ihi].Rev - 1,
ilo, ihi,
)
// [ilo, ihi]
sliceByRev(
tailv[ilo].Rev - 1,
tailv[ihi].Rev,
ilo, ihi+1,
)
// (ilo, ihi]
if ilo+1 < len(tailv) {
sliceByRev(
tailv[ilo].Rev,
tailv[ihi].Rev,
ilo+1, ihi+1,
)
}
// (ilo, ihi)
if ilo+1 < len(tailv) && ilo+1 <= ihi {
sliceByRev(
tailv[ilo].Rev,
tailv[ihi].Rev - 1,
ilo+1, ihi,
)
}
}
}
// verify lastRevOf query / index
lastRevOf := make(map[Oid]Tid)
for _, δ := range tailv {
for _, id := range δ.Changev {
idRev, exact := δtail.LastRevOf(id, δ.Rev)
if !(idRev == δ.Rev && exact) {
t.Fatalf("LastRevOf(%v, at=%s) -> %s, %v ; want %s, %v", id, δ.Rev, idRev, exact, δ.Rev, true)
}
lastRevOf[id] = δ.Rev
}
}
if !reflect.DeepEqual(δtail.lastRevOf, lastRevOf) {
t.Fatalf("lastRevOf:\nhave: %v\nwant: %v", δtail.lastRevOf, lastRevOf)
}
}
// δCheckLastUP verifies that δtail.LastRevOf(id, at) gives lastOk and exact=false.
// (we don't need to check for exact=true as those cases are covered in δCheck)
δCheckLastUP := func(id Oid, at, lastOk Tid) {
t.Helper()
last, exact := δtail.LastRevOf(id, at)
if !(last == lastOk && exact == false) {
t.Fatalf("LastRevOf(%v, at=%s) -> %s, %v ; want %s, %v", id, at, last, exact, lastOk, false)
}
}
δtail = NewΔTail(3)
δCheck(3,3)
δCheckLastUP(4, 12, 12) // δtail = ø
δAppend(R(10, 3,5))
δCheck(3,10, R(10, 3,5))
δCheckLastUP(3, 2, 2) // at < δtail
δCheckLastUP(3, 12, 12) // at > δtail
δCheckLastUP(4, 10, 10) // id ∉ δtail
δAppend(R(11, 7))
δCheck(3,11, R(10, 3,5), R(11, 7))
δAppend(R(12, 7))
δCheck(3,12, R(10, 3,5), R(11, 7), R(12, 7))
δAppend(R(14, 3,8))
δCheck(3,14, R(10, 3,5), R(11, 7), R(12, 7), R(14, 3,8))
δCheckLastUP(8, 12, 10) // id ∈ δtail, but has no entry with rev ≤ at
δtail.ForgetPast(9)
δCheck(9,14, R(10, 3,5), R(11, 7), R(12, 7), R(14, 3,8))
δtail.ForgetPast(10)
δCheck(10,14, R(11, 7), R(12, 7), R(14, 3,8))
δtail.ForgetPast(12)
δCheck(12,14, R(14, 3,8))
δtail.ForgetPast(14)
δCheck(14,14)
δtail.ForgetPast(12)
δCheck(14,14) // .tail should not go ↓
// Append panics on non-↑ rev
// δAppendPanic verifies that Append(δ.rev = rev) panics.
δAppendPanic := func(rev Tid) {
defer func() {
r := recover()
if r == nil {
t.Fatalf("append(rev=%s) non-↑: not panicked", rev)
}
want := fmt.Sprintf("δtail.Append: rev not ↑: %s -> %s", δtail.head, rev)
if r != want {
t.Fatalf("append non-↑:\nhave: %q\nwant: %q", r, want)
}
}()
δAppend(R(rev))
}
// on empty δtail
δAppendPanic(14)
δAppendPanic(13)
δAppendPanic(12)
// on !empty δtail
δAppend(R(15, 1))
δCheck(14,15, R(15, 1))
δAppendPanic(15)
δAppendPanic(14)
// .tailv underlying storage is not kept after forget
δtail.ForgetPast(15)
δCheck(15,15)
const N = 1E3
for rev, i := Tid(16), 0; i < N; i, rev = i+1, rev+1 {
δAppend(R(rev, 1))
}
capN := cap(δtail.tailv)
δtail.ForgetPast(N)
if c := cap(δtail.tailv); !(c < capN/10) {
t.Fatalf("forget: tailv storage did not shrink: cap%v: %d -> cap: %d", N, capN, c)
}
// .tailv underlying storage does not grow indefinitely
// XXX cannot test as the growth here goes to left and we cannot get
// access to whole underlying array from a slice.
}
func tailvEqual(a, b []ΔRevEntry) bool {
// for empty one can be nil and another !nil [] = reflect.DeepEqual
// does not think those are equal.
return (len(a) == 0 && len(b) == 0) ||
reflect.DeepEqual(a, b)
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment