Merge pull request #8 from navytux/splitx

splitX bugfix test + simplification In the process of going through b code I've noticed that splitX bug fix (commit 5c732b36) handling logic was not covered by tests for edge cases and that this edge-handling logic itself can be simplified. Please find patches which add corresponding test and do the simplification attached. The simplified logic will be handy for upcoming work. Please see details in individual commit messages. Thanks beforehand, Kirill

Merge pull request #8 from navytux/splitx
splitX bugfix test + simplification In the process of going through b code I've noticed that splitX bug fix (commit 5c732b36) handling logic was not covered by tests for edge cases and that this edge-handling logic itself can be simplified. Please find patches which add corresponding test and do the simplification attached. The simplified logic will be handy for upcoming work. Please see details in individual commit messages. Thanks beforehand, Kirill
6955404b · Kirill Smelkov · GitHub · aaaa43c9 · 9fff7486 · 6955404b
Commit 6955404b authored Apr 13, 2017 by Kirill Smelkov Committed by GitHub Apr 13, 2017
Showing with 159 additions and 80 deletions

AUTHORS AUTHORS +1 -0

CONTRIBUTORS CONTRIBUTORS +1 -0

all_test.go all_test.go +65 -0

btree.go btree.go +13 -27

example/all_test.go example/all_test.go +49 -4

example/int.go example/int.go +30 -49

No files found.
--- a/AUTHORS
+++ b/AUTHORS
@@ -9,3 +9,4 @@
 # Please keep the list sorted.
 Jan Mercl <0xjnml@gmail.com>
+Nexedi <jp@nexedi.com>
--- a/CONTRIBUTORS
+++ b/CONTRIBUTORS
@@ -10,4 +10,5 @@ Andrey Elenskiy <andrey.elenskiy@gmail.com>
 Brian Fallik <bfallik@gmail.com>
 Dan Kortschak <dan.kortschak@adelaide.edu.au>
 Jan Mercl <0xjnml@gmail.com>
+Kirill Smelkov <kirr@nexedi.com>
 Nikifor Seryakov <nikandfor@gmail.com>
--- a/all_test.go
+++ b/all_test.go
@@ -292,6 +292,71 @@ func TestPrealloc(*testing.T) {
 	r.Close()
 }
+func TestSplitXOnEdge(t *testing.T) {
+	// verify how splitX works when splitting X for k pointing directly at split edge
+	tr := TreeNew(cmp)
+	// one index page with 2*kx+2 elements (last has .k=∞  so x.c=2*kx+1)
+	// which will splitX on next Set
+	for i := 0; i <= (2*kx + 1) * 2*kd; i++ {
+		// odd keys are left to be filled in second test
+		tr.Set(2*i, 2*i)
+	}
+	x0 := tr.r.(*x)
+	if x0.c != 2*kx+1 {
+		t.Fatalf("x0.c: %v  ; expected %v", x0.c, 2*kx+1)
+	}
+	// set element with k directly at x0[kx].k
+	kedge := 2 * (kx + 1) * (2*kd)
+	if x0.x[kx].k != kedge {
+		t.Fatalf("edge key before splitX: %v  ; expected %v", x0.x[kx].k, kedge)
+	}
+	tr.Set(kedge, 777)
+	// if splitX was wrong kedge:777 would land into wrong place with Get failing
+	v, ok := tr.Get(kedge)
+	if !(v==777 && ok) {
+		t.Fatalf("after splitX: Get(%v) -> %v, %v  ; expected 777, true", v, ok)
+	}
+	// now check the same when splitted X has parent
+	xr := tr.r.(*x)
+	if xr.c != 1 { // second x comes with k=∞ with .c index
+		t.Fatalf("after splitX: xr.c: %v  ; expected 1", xr.c)
+	}
+	if xr.x[0].ch != x0 {
+		t.Fatal("xr[0].ch is not x0")
+	}
+	for i := 0; i <= (2*kx) * kd; i++ {
+		tr.Set(2*i+1, 2*i+1)
+	}
+	// check x0 is in pre-splitX condition and still at the right place
+	if x0.c != 2*kx+1 {
+		t.Fatalf("x0.c: %v  ; expected %v", x0.c, 2*kx+1)
+	}
+	if xr.x[0].ch != x0 {
+		t.Fatal("xr[0].ch is not x0")
+	}
+	// set element with k directly at x0[kx].k
+	kedge = (kx + 1) * (2*kd)
+	if x0.x[kx].k != kedge {
+		t.Fatalf("edge key before splitX: %v  ; expected %v", x0.x[kx].k, kedge)
+	}
+	tr.Set(kedge, 888)
+	// if splitX was wrong kedge:888 would land into wrong place
+	v, ok = tr.Get(kedge)
+	if !(v==888 && ok) {
+		t.Fatalf("after splitX: Get(%v) -> %v, %v  ; expected 888, true", v, ok)
+	}
+}
 func BenchmarkSetSeq1e3(b *testing.B) {
 	benchmarkSetSeq(b, 1e3)
 }

--- a/btree.go
+++ b/btree.go
@@ -547,12 +547,13 @@ func (t *Tree) Set(k interface{} /*K*/, v interface{} /*V*/) {
 		if ok {
 			switch x := q.(type) {
 			case *x:
+				i++
 				if x.c > 2*kx {
 					x, i = t.splitX(p, x, pi, i)
 				}
-				pi = i + 1
+				pi = i
 				p = x
-				q = x.x[i+1].ch
+				q = x.x[i].ch
 				continue
 			case *d:
 				x.d[i].v = v
@@ -614,12 +615,13 @@ func (t *Tree) Put(k interface{} /*K*/, upd func(oldV interface{} /*V*/, exists
 		if ok {
 			switch x := q.(type) {
 			case *x:
+				i++
 				if x.c > 2*kx {
 					x, i = t.splitX(p, x, pi, i)
 				}
-				pi = i + 1
+				pi = i
 				p = x
-				q = x.x[i+1].ch
+				q = x.x[i].ch
 				continue
 			case *d:
 				oldV = x.d[i].v
@@ -701,36 +703,20 @@ func (t *Tree) splitX(p *x, q *x, pi int, i int) (*x, int) {
 	r.c = kx
 	if pi >= 0 {
 		p.insert(pi, q.x[kx].k, r)
-		q.x[kx].k = zk
+	} else {
-		for i := range q.x[kx+1:] {
+		t.r = newX(q).insert(0, q.x[kx].k, r)
-			q.x[kx+i+1] = zxe
-		}
-		switch {
-		case i < kx:
-			return q, i
-		case i == kx:
-			return p, pi
-		default: // i > kx
-			return r, i - kx - 1
-		}
 	}
-	nr := newX(q).insert(0, q.x[kx].k, r)
-	t.r = nr
 	q.x[kx].k = zk
 	for i := range q.x[kx+1:] {
 		q.x[kx+i+1] = zxe
 	}
+	if i > kx {
+		q = r
+		i -= kx + 1
+	}
-	switch {
-	case i < kx:
 	return q, i
-	case i == kx:
-		return nr, 0
-	default: // i > kx
-		return r, i - kx - 1
-	}
 }
 func (t *Tree) underflow(p *x, q *d, pi int) {

--- a/example/all_test.go
+++ b/example/all_test.go
@@ -620,13 +620,13 @@ func TestEnumeratorPrev(t *testing.T) {
 		hit  bool
 		keys []int
 	}{
-		{5, false, []int{10}},
+		{5, false, []int{}},
 		{10, true, []int{10}},
-		{15, false, []int{20, 10}},
+		{15, false, []int{10}},
 		{20, true, []int{20, 10}},
-		{25, false, []int{30, 20, 10}},
+		{25, false, []int{20, 10}},
 		{30, true, []int{30, 20, 10}},
-		{35, false, []int{}},
+		{35, false, []int{30, 20, 10}},
 	}
 	for i, test := range table {
@@ -683,6 +683,51 @@ func TestEnumeratorPrev(t *testing.T) {
 	}
 }
+func TestEnumeratorPrevSanity(t *testing.T) {
+	// seeking within 3 keys: 10, 20, 30
+	table := []struct {
+		k      int
+		hit    bool
+		kOut   int
+		vOut   int
+		errOut error
+	}{
+		{10, true, 10, 100, nil},
+		{20, true, 20, 200, nil},
+		{30, true, 30, 300, nil},
+		{35, false, 30, 300, nil},
+		{25, false, 20, 200, nil},
+		{15, false, 10, 100, nil},
+		{5, false, 0, 0, io.EOF},
+	}
+	for i, test := range table {
+		r := TreeNew(cmp)
+		r.Set(10, 100)
+		r.Set(20, 200)
+		r.Set(30, 300)
+		en, hit := r.Seek(test.k)
+		if g, e := hit, test.hit; g != e {
+			t.Fatal(i, g, e)
+		}
+		k, v, err := en.Prev()
+		if g, e := err, test.errOut; g != e {
+			t.Fatal(i, g, e)
+		}
+		if g, e := k, test.kOut; g != e {
+			t.Fatal(i, g, e)
+		}
+		if g, e := v, test.vOut; g != e {
+			t.Fatal(i, g, e)
+		}
+	}
+}
 func BenchmarkSeekSeq1e3(b *testing.B) {
 	benchmarkSeekSeq(b, 1e3)
 }

--- a/example/int.go
+++ b/example/int.go
@@ -76,7 +76,7 @@ type (
 	//
 	// However, once an Enumerator returns io.EOF to signal "no more
 	// items", it does no more attempt to "resync" on tree mutation(s).  In
-	// other words, io.EOF from an Enumaretor is "sticky" (idempotent).
+	// other words, io.EOF from an Enumerator is "sticky" (idempotent).
 	Enumerator struct {
 		err error
 		hit bool
@@ -450,7 +450,7 @@ func (t *Tree) overflow(p *x, q *d, pi, i int, k int, v int) {
 	t.ver++
 	l, r := p.siblings(pi)
-	if l != nil && l.c < 2*kd {
+	if l != nil && l.c < 2*kd && i != 0 {
 		l.mvL(q, 1)
 		t.insert(q, i-1, k, v)
 		p.x[pi-1].k = q.d[0].k
@@ -473,9 +473,9 @@ func (t *Tree) overflow(p *x, q *d, pi, i int, k int, v int) {
 	t.split(p, q, pi, i, k, v)
 }
-// Seek returns an Enumerator positioned on a an item such that k >= item's
+// Seek returns an Enumerator positioned on an item such that k >= item's key.
-// key. ok reports if k == item.key The Enumerator's position is possibly
+// ok reports if k == item.key The Enumerator's position is possibly after the
-// after the last item in the tree.
+// last item in the tree.
 func (t *Tree) Seek(k int) (e *Enumerator, ok bool) {
 	q := t.r
 	if q == nil {
@@ -547,12 +547,13 @@ func (t *Tree) Set(k int, v int) {
 		if ok {
 			switch x := q.(type) {
 			case *x:
+				i++
 				if x.c > 2*kx {
 					x, i = t.splitX(p, x, pi, i)
 				}
-				pi = i + 1
+				pi = i
 				p = x
-				q = x.x[i+1].ch
+				q = x.x[i].ch
 				continue
 			case *d:
 				x.d[i].v = v
@@ -614,12 +615,13 @@ func (t *Tree) Put(k int, upd func(oldV int, exists bool) (newV int, write bool)
 		if ok {
 			switch x := q.(type) {
 			case *x:
+				i++
 				if x.c > 2*kx {
 					x, i = t.splitX(p, x, pi, i)
 				}
-				pi = i + 1
+				pi = i
 				p = x
-				q = x.x[i+1].ch
+				q = x.x[i].ch
 				continue
 			case *d:
 				oldV = x.d[i].v
@@ -701,36 +703,20 @@ func (t *Tree) splitX(p *x, q *x, pi int, i int) (*x, int) {
 	r.c = kx
 	if pi >= 0 {
 		p.insert(pi, q.x[kx].k, r)
-		q.x[kx].k = zk
+	} else {
-		for i := range q.x[kx+1:] {
+		t.r = newX(q).insert(0, q.x[kx].k, r)
-			q.x[kx+i+1] = zxe
-		}
-		switch {
-		case i < kx:
-			return q, i
-		case i == kx:
-			return p, pi
-		default: // i > kx
-			return r, i - kx - 1
-		}
 	}
-	nr := newX(q).insert(0, q.x[kx].k, r)
-	t.r = nr
 	q.x[kx].k = zk
 	for i := range q.x[kx+1:] {
 		q.x[kx+i+1] = zxe
 	}
+	if i > kx {
+		q = r
+		i -= kx + 1
+	}
-	switch {
-	case i < kx:
 	return q, i
-	case i == kx:
-		return nr, 0
-	default: // i > kx
-		return r, i - kx - 1
-	}
 }
 func (t *Tree) underflow(p *x, q *d, pi int) {
@@ -826,13 +812,7 @@ func (e *Enumerator) Next() (k int, v int, err error) {
 	}
 	if e.ver != e.t.ver {
-		f, hit := e.t.Seek(e.k)
+		f, _ := e.t.Seek(e.k)
-		if !e.hit && hit {
-			if err = f.next(); err != nil {
-				return
-			}
-		}
 		*e = *f
 		f.Close()
 	}
@@ -849,7 +829,7 @@ func (e *Enumerator) Next() (k int, v int, err error) {
 	i := e.q.d[e.i]
 	k, v = i.k, i.v
-	e.k, e.hit = k, false
+	e.k, e.hit = k, true
 	e.next()
 	return
 }
@@ -880,13 +860,7 @@ func (e *Enumerator) Prev() (k int, v int, err error) {
 	}
 	if e.ver != e.t.ver {
-		f, hit := e.t.Seek(e.k)
+		f, _ := e.t.Seek(e.k)
-		if !e.hit && hit {
-			if err = f.prev(); err != nil {
-				return
-			}
-		}
 		*e = *f
 		f.Close()
 	}
@@ -895,15 +869,22 @@ func (e *Enumerator) Prev() (k int, v int, err error) {
 		return
 	}
+	if !e.hit {
+		// move to previous because Seek overshoots if there's no hit
+		if err = e.prev(); err != nil {
+			return
+		}
+	}
 	if e.i >= e.q.c {
-		if err = e.next(); err != nil {
+		if err = e.prev(); err != nil {
 			return
 		}
 	}
 	i := e.q.d[e.i]
 	k, v = i.k, i.v
-	e.k, e.hit = k, false
+	e.k, e.hit = k, true
 	e.prev()
 	return
 }