Commit 6eef7fdb authored by Fred Drake's avatar Fred Drake

Merge pull request #8 from zopefoundation/node-sizes

Node sizes
parents a8dae6c3 3d07a663
......@@ -53,8 +53,8 @@
*/
#define MODULE_NAME "BTrees." MOD_NAME_PREFIX "BTree."
static PyObject *sort_str, *reverse_str, *__setstate___str,
*_bucket_type_str;
static PyObject *sort_str, *reverse_str, *__setstate___str;
static PyObject *_bucket_type_str, *max_internal_size_str, *max_leaf_size_str;
static PyObject *ConflictError = NULL;
static void PyVar_Assign(PyObject **v, PyObject *e) { Py_XDECREF(*v); *v=e;}
......@@ -63,8 +63,6 @@ static void PyVar_Assign(PyObject **v, PyObject *e) { Py_XDECREF(*v); *v=e;}
#define OBJECT(O) ((PyObject*)(O))
#define MIN_BUCKET_ALLOC 16
#define MAX_BTREE_SIZE(B) DEFAULT_MAX_BTREE_SIZE
#define MAX_BUCKET_SIZE(B) DEFAULT_MAX_BUCKET_SIZE
#define SameType_Check(O1, O2) (Py_TYPE((O1))==Py_TYPE((O2)))
......@@ -223,6 +221,8 @@ typedef struct BTree_s {
* data[len].key is positive infinity.
*/
BTreeItem *data;
long max_internal_size;
long max_leaf_size;
} BTree;
static PyTypeObject BTreeType;
......@@ -542,6 +542,13 @@ module_init(void)
if (!_bucket_type_str)
return NULL;
max_internal_size_str = INTERN("max_internal_size");
if (! max_internal_size_str)
return NULL;
max_leaf_size_str = INTERN("max_leaf_size");
if (! max_leaf_size_str)
return NULL;
/* Grab the ConflictError class */
interfaces = PyImport_ImportModule("BTrees.Interfaces");
if (interfaces != NULL)
......
......@@ -15,6 +15,55 @@
#define BTREETEMPLATE_C "$Id$\n"
static long
_get_max_size(BTree *self, PyObject *name, long default_max)
{
PyObject *size;
long isize;
size = PyObject_GetAttr(OBJECT(OBJECT(self)->ob_type), name);
if (size == NULL) {
PyErr_Clear();
return default_max;
}
#ifdef PY3K
isize = PyLong_AsLong(size);
#else
isize = PyInt_AsLong(size);
#endif
Py_DECREF(size);
if (isize <= 0 && ! PyErr_Occurred()) {
PyErr_SetString(PyExc_ValueError,
"non-positive max size in BTree subclass");
return -1;
}
return isize;
}
static int
_max_internal_size(BTree *self)
{
long isize;
if (self->max_internal_size > 0) return self->max_internal_size;
isize = _get_max_size(self, max_internal_size_str, DEFAULT_MAX_BTREE_SIZE);
self->max_internal_size = isize;
return isize;
}
static int
_max_leaf_size(BTree *self)
{
long isize;
if (self->max_leaf_size > 0) return self->max_leaf_size;
isize = _get_max_size(self, max_leaf_size_str, DEFAULT_MAX_BUCKET_SIZE);
self->max_leaf_size = isize;
return isize;
}
/* Sanity-check a BTree. This is a private helper for BTree_check. Return:
* -1 Error. If it's an internal inconsistency in the BTree,
* AssertionError is set.
......@@ -410,6 +459,9 @@ BTree_grow(BTree *self, int index, int noval)
if (self->len)
{
long max_size = _max_internal_size(self);
if (max_size < 0) return -1;
d = self->data + index;
v = d->child;
/* Create a new object of the same type as the target value */
......@@ -459,7 +511,7 @@ BTree_grow(BTree *self, int index, int noval)
d->child = e;
self->len++;
if (self->len >= MAX_BTREE_SIZE(self) * 2) /* the root is huge */
if (self->len >= max_size * 2) /* the root is huge */
return BTree_split_root(self, noval);
}
else
......@@ -727,11 +779,16 @@ _BTree_set(BTree *self, PyObject *keyarg, PyObject *value,
int toobig;
assert(status == 1); /* can be 2 only on deletes */
if (SameType_Check(self, d->child))
toobig = childlength > MAX_BTREE_SIZE(d->child);
else
toobig = childlength > MAX_BUCKET_SIZE(d->child);
if (SameType_Check(self, d->child)) {
long max_size = _max_internal_size(self);
if (max_size < 0) return -1;
toobig = childlength > max_size;
}
else {
long max_size = _max_leaf_size(self);
if (max_size < 0) return -1;
toobig = childlength > max_size;
}
if (toobig) {
if (BTree_grow(self, min, noval) < 0)
goto Error;
......@@ -2178,6 +2235,9 @@ BTree_init(PyObject *self, PyObject *args, PyObject *kwds)
{
PyObject *v = NULL;
BTREE(self)->max_leaf_size = 0;
BTREE(self)->max_internal_size = 0;
if (!PyArg_ParseTuple(args, "|O:" MOD_NAME_PREFIX "BTree", &v))
return -1;
......
......@@ -44,7 +44,6 @@ _TREE_SIZE = 500
using64bits = False
class IFBucketPy(Bucket):
MAX_SIZE = _BUCKET_SIZE
_to_key = _to_key
_to_value = _to_value
MERGE = MERGE
......@@ -53,7 +52,6 @@ class IFBucketPy(Bucket):
class IFSetPy(Set):
MAX_SIZE = _BUCKET_SIZE
_to_key = _to_key
MERGE = MERGE
MERGE_WEIGHT = MERGE_WEIGHT_numeric
......@@ -61,7 +59,8 @@ class IFSetPy(Set):
class IFBTreePy(BTree):
MAX_SIZE = _TREE_SIZE
max_leaf_size = _BUCKET_SIZE
max_internal_size = _TREE_SIZE
_to_key = _to_key
_to_value = _to_value
MERGE = MERGE
......@@ -70,7 +69,8 @@ class IFBTreePy(BTree):
class IFTreeSetPy(TreeSet):
MAX_SIZE = _TREE_SIZE
max_leaf_size = _BUCKET_SIZE
max_internal_size = _TREE_SIZE
_to_key = _to_key
MERGE = MERGE
MERGE_WEIGHT = MERGE_WEIGHT_numeric
......
......@@ -45,7 +45,6 @@ using64bits = False
class IIBucketPy(Bucket):
MAX_SIZE = _BUCKET_SIZE
_to_key = _to_key
_to_value = _to_value
MERGE = MERGE
......@@ -54,7 +53,6 @@ class IIBucketPy(Bucket):
class IISetPy(Set):
MAX_SIZE = _BUCKET_SIZE
_to_key = _to_key
MERGE = MERGE
MERGE_WEIGHT = MERGE_WEIGHT_numeric
......@@ -62,7 +60,8 @@ class IISetPy(Set):
class IIBTreePy(BTree):
MAX_SIZE = _TREE_SIZE
max_leaf_size = _BUCKET_SIZE
max_internal_size = _TREE_SIZE
_to_key = _to_key
_to_value = _to_value
MERGE = MERGE
......@@ -71,7 +70,8 @@ class IIBTreePy(BTree):
class IITreeSetPy(TreeSet):
MAX_SIZE = _TREE_SIZE
max_leaf_size = _BUCKET_SIZE
max_internal_size = _TREE_SIZE
_to_key = _to_key
MERGE = MERGE
MERGE_WEIGHT = MERGE_WEIGHT_numeric
......
......@@ -40,26 +40,26 @@ using64bits = False
class IOBucketPy(Bucket):
MAX_SIZE = _BUCKET_SIZE
_to_key = _to_key
_to_value = _to_value
MERGE_WEIGHT = MERGE_WEIGHT_default
class IOSetPy(Set):
MAX_SIZE = _BUCKET_SIZE
_to_key = _to_key
class IOBTreePy(BTree):
MAX_SIZE = _TREE_SIZE
max_leaf_size = _BUCKET_SIZE
max_internal_size = _TREE_SIZE
_to_key = _to_key
_to_value = _to_value
MERGE_WEIGHT = MERGE_WEIGHT_default
class IOTreeSetPy(TreeSet):
MAX_SIZE = _TREE_SIZE
max_leaf_size = _BUCKET_SIZE
max_internal_size = _TREE_SIZE
_to_key = _to_key
class IOTreeIteratorPy(_TreeIterator):
......
......@@ -45,7 +45,6 @@ using64bits = True
class LFBucketPy(Bucket):
MAX_SIZE = _BUCKET_SIZE
_to_key = _to_key
_to_value = _to_value
MERGE = MERGE
......@@ -54,7 +53,6 @@ class LFBucketPy(Bucket):
class LFSetPy(Set):
MAX_SIZE = _BUCKET_SIZE
_to_key = _to_key
MERGE = MERGE
MERGE_WEIGHT = MERGE_WEIGHT_numeric
......@@ -62,7 +60,8 @@ class LFSetPy(Set):
class LFBTreePy(BTree):
MAX_SIZE = _TREE_SIZE
max_leaf_size = _BUCKET_SIZE
max_internal_size = _TREE_SIZE
_to_key = _to_key
_to_value = _to_value
MERGE = MERGE
......@@ -71,7 +70,8 @@ class LFBTreePy(BTree):
class LFTreeSetPy(TreeSet):
MAX_SIZE = _TREE_SIZE
max_leaf_size = _BUCKET_SIZE
max_internal_size = _TREE_SIZE
_to_key = _to_key
MERGE = MERGE
MERGE_WEIGHT = MERGE_WEIGHT_numeric
......
......@@ -45,7 +45,6 @@ using64bits = True
class LLBucketPy(Bucket):
MAX_SIZE = _BUCKET_SIZE
_to_key = _to_key
_to_value = _to_value
MERGE = MERGE
......@@ -54,7 +53,6 @@ class LLBucketPy(Bucket):
class LLSetPy(Set):
MAX_SIZE = _BUCKET_SIZE
_to_key = _to_key
MERGE = MERGE
MERGE_WEIGHT = MERGE_WEIGHT_numeric
......@@ -62,7 +60,8 @@ class LLSetPy(Set):
class LLBTreePy(BTree):
MAX_SIZE = _TREE_SIZE
max_leaf_size = _BUCKET_SIZE
max_internal_size = _TREE_SIZE
_to_key = _to_key
_to_value = _to_value
MERGE = MERGE
......@@ -71,7 +70,8 @@ class LLBTreePy(BTree):
class LLTreeSetPy(TreeSet):
MAX_SIZE = _TREE_SIZE
max_leaf_size = _BUCKET_SIZE
max_internal_size = _TREE_SIZE
_to_key = _to_key
MERGE = MERGE
MERGE_WEIGHT = MERGE_WEIGHT_numeric
......
......@@ -40,26 +40,26 @@ using64bits = True
class LOBucketPy(Bucket):
MAX_SIZE = _BUCKET_SIZE
_to_key = _to_key
_to_value = _to_value
MERGE_WEIGHT = MERGE_WEIGHT_default
class LOSetPy(Set):
MAX_SIZE = _BUCKET_SIZE
_to_key = _to_key
class LOBTreePy(BTree):
MAX_SIZE = _TREE_SIZE
max_leaf_size = _BUCKET_SIZE
max_internal_size = _TREE_SIZE
_to_key = _to_key
_to_value = _to_value
MERGE_WEIGHT = MERGE_WEIGHT_default
class LOTreeSetPy(TreeSet):
MAX_SIZE = _TREE_SIZE
max_leaf_size = _BUCKET_SIZE
max_internal_size = _TREE_SIZE
_to_key = _to_key
......
......@@ -43,7 +43,6 @@ _TREE_SIZE = 250
using64bits = True
class OIBucketPy(Bucket):
MAX_SIZE = _BUCKET_SIZE
_to_key = _to_key
_to_value = _to_value
MERGE = MERGE
......@@ -52,7 +51,6 @@ class OIBucketPy(Bucket):
class OISetPy(Set):
MAX_SIZE = _BUCKET_SIZE
_to_key = _to_key
MERGE = MERGE
MERGE_WEIGHT = MERGE_WEIGHT_numeric
......@@ -60,7 +58,8 @@ class OISetPy(Set):
class OIBTreePy(BTree):
MAX_SIZE = _TREE_SIZE
max_leaf_size = _BUCKET_SIZE
max_internal_size = _TREE_SIZE
_to_key = _to_key
_to_value = _to_value
MERGE = MERGE
......@@ -69,7 +68,8 @@ class OIBTreePy(BTree):
class OITreeSetPy(TreeSet):
MAX_SIZE = _TREE_SIZE
max_leaf_size = _BUCKET_SIZE
max_internal_size = _TREE_SIZE
_to_key = _to_key
MERGE = MERGE
MERGE_WEIGHT = MERGE_WEIGHT_numeric
......
......@@ -44,7 +44,6 @@ using64bits = True
class OLBucketPy(Bucket):
MAX_SIZE = _BUCKET_SIZE
_to_key = _to_key
_to_value = _to_value
MERGE = MERGE
......@@ -53,7 +52,6 @@ class OLBucketPy(Bucket):
class OLSetPy(Set):
MAX_SIZE = _BUCKET_SIZE
_to_key = _to_key
MERGE = MERGE
MERGE_WEIGHT = MERGE_WEIGHT_numeric
......@@ -61,7 +59,8 @@ class OLSetPy(Set):
class OLBTreePy(BTree):
MAX_SIZE = _TREE_SIZE
max_leaf_size = _BUCKET_SIZE
max_internal_size = _TREE_SIZE
_to_key = _to_key
_to_value = _to_value
MERGE = MERGE
......@@ -70,7 +69,8 @@ class OLBTreePy(BTree):
class OLTreeSetPy(TreeSet):
MAX_SIZE = _TREE_SIZE
max_leaf_size = _BUCKET_SIZE
max_internal_size = _TREE_SIZE
_to_key = _to_key
MERGE = MERGE
MERGE_WEIGHT = MERGE_WEIGHT_numeric
......
......@@ -38,24 +38,24 @@ using64bits = False
class OOBucketPy(Bucket):
MAX_SIZE = _BUCKET_SIZE
_to_key = _to_key
_to_value = _to_value
class OOSetPy(Set):
MAX_SIZE = _BUCKET_SIZE
_to_key = _to_key
class OOBTreePy(BTree):
MAX_SIZE = _TREE_SIZE
max_leaf_size = _BUCKET_SIZE
max_internal_size = _TREE_SIZE
_to_key = _to_key
_to_value = _to_value
class OOTreeSetPy(TreeSet):
MAX_SIZE = _TREE_SIZE
max_leaf_size = _BUCKET_SIZE
max_internal_size = _TREE_SIZE
_to_key = _to_key
......
......@@ -866,7 +866,12 @@ class _Tree(_Base):
result = child._set(key, value, ifunset)
grew = result[0]
if grew and child.size > child.MAX_SIZE:
if grew:
if child.__class__ is self.__class__:
max_size = self.max_internal_size
else:
max_size = self.max_leaf_size
if child.size > max_size:
self._grow(child, index)
elif (grew is not None and
child.__class__ is self._bucket_type and
......@@ -880,7 +885,7 @@ class _Tree(_Base):
self._p_changed = True
new_child = child._split()
self._data.insert(index+1, _TreeItem(new_child.minKey(), new_child))
if len(self._data) > self.MAX_SIZE * 2:
if len(self._data) >= self.max_internal_size * 2:
self._split_root()
def _split_root(self):
......
......@@ -43,7 +43,6 @@ _to_value = _to_bytes(6)
class fsBucketPy(Bucket):
MAX_SIZE = _BUCKET_SIZE
_to_key = _to_key
_to_value = _to_value
......@@ -66,18 +65,19 @@ class fsBucketPy(Bucket):
class fsSetPy(Set):
MAX_SIZE = _BUCKET_SIZE
_to_key = _to_key
class fsBTreePy(BTree):
MAX_SIZE = _TREE_SIZE
max_leaf_size = _BUCKET_SIZE
max_internal_size = _TREE_SIZE
_to_key = _to_key
_to_value = _to_value
class fsTreeSetPy(TreeSet):
MAX_SIZE = _TREE_SIZE
max_leaf_size = _BUCKET_SIZE
max_internal_size = _TREE_SIZE
_to_key = _to_key
......
......@@ -30,6 +30,8 @@ class Test_Base(unittest.TestCase):
def _makeOne(self, items=None):
class _Test(self._getTargetClass()):
max_leaf_size = 10
max_internal_size = 15
def clear(self):
self._data = {}
def update(self, d):
......@@ -1396,13 +1398,13 @@ class Test_Tree(unittest.TestCase):
def _makeOne(self, items=None):
from .._base import Bucket
class _Bucket(Bucket):
MAX_SIZE = 10
def _to_key(self, k):
return k
class _Test(self._getTargetClass()):
_to_key = _to_value = lambda self, x: x
_bucket_type = _Bucket
MAX_SIZE = 15
max_leaf_size = 10
max_internal_size = 15
return _Test(items)
def test_setdefault_miss(self):
......@@ -2135,7 +2137,6 @@ class Test_TreeItems(unittest.TestCase):
def _makeBucket(self, items=None):
from .._base import Bucket
class _Bucket(Bucket):
MAX_SIZE = 10
def _to_key(self, k):
return k
return _Bucket(items)
......@@ -2214,13 +2215,13 @@ class TreeTests(unittest.TestCase):
def _makeOne(self, items=None):
from .._base import Bucket
class _Bucket(Bucket):
MAX_SIZE = 10
def _to_key(self, k):
return k
class _Test(self._getTargetClass()):
_to_key = _to_value = lambda self, x: x
_bucket_type = _Bucket
MAX_SIZE = 15
max_leaf_size = 10
max_internal_size = 15
return _Test(items)
def test_get_empty_miss(self):
......@@ -2359,13 +2360,13 @@ class TreeSetTests(unittest.TestCase):
def _makeOne(self, items=None):
from .._base import Bucket
class _Bucket(Bucket):
MAX_SIZE = 10
def _to_key(self, k):
return k
class _Test(self._getTargetClass()):
_to_key = _to_value = lambda self, x: x
_bucket_type = _Bucket
MAX_SIZE = 15
max_leaf_size = 10
max_internal_size = 15
return _Test(items)
def test_add_new_key(self):
......
......@@ -18,6 +18,11 @@ class B(OOBucket):
class T(OOBTree):
_bucket_type = B
max_leaf_size = 2
max_internal_size = 3
class S(T):
pass
import unittest
......@@ -27,18 +32,23 @@ class SubclassTest(unittest.TestCase):
# test that a subclass that defines _bucket_type gets buckets
# of that type
t = T()
t[0] = 0
self.assertTrue(t._firstbucket.__class__ is B)
# There's no good way to get a bucket at the moment.
# __getstate__() is as good as it gets, but the default
# getstate explicitly includes the pickle of the bucket
# for small trees, so we have to be clever :-(
# make sure there is more than one bucket in the tree
for i in range(1000):
def testCustomNodeSizes(self):
# We override btree and bucket split sizes in BTree subclasses.
t = S()
for i in range(8):
t[i] = i
state = t.__getstate__()
self.assertTrue(state[0][0].__class__ is B)
state = t.__getstate__()[0]
self.assertEqual(len(state), 5)
sub = state[0]
self.assertEqual(sub.__class__, S)
sub = sub.__getstate__()[0]
self.assertEqual(len(sub), 5)
sub = sub[0]
self.assertEqual(sub.__class__, B)
self.assertEqual(len(sub), 1)
def test_suite():
return unittest.makeSuite(SubclassTest)
``BTrees`` Changelog
====================
- BTree subclasses can define max_leaf_size or max_internal_size to
control maximum sizes for Bucket/Set and BTree/TreeSet nodes.
- Fixed: integer overflow on 32-bit machines wasn't detected correctly
under Python 3.
4.0.9 (unreleased)
------------------
......
......@@ -375,6 +375,41 @@ of the keys. Example
[]
>>>
BTree node sizes
----------------
BTrees (and TreeSets) are made up of a tree of Buckets (and Sets) and
internal nodes. There are maximum sizes of these notes configured for
the various key and value types:
======== ========== ========================== =============================
Key Type Value Type Maximum Bucket or Set Size Maximum BTree or TreeSet Size
======== ========== ========================== =============================
Integer Float 120 500
Integer Integer 120 500
Integer Object 60 500
Long Float 120 500
Long Long 120 500
Long Object 60 500
Object Integer 60 250
Object Long 60 250
Object Object 30 250
======== ========== ========================== =============================
For your application, especially when using object keys or values, you
may want to override the default sizes. You can do this by
subclassing any of the BTree (or TreeSet) classes and specifying new
values for ``max_leaf_size`` or ``max_internal_size`` in your subclass::
import BTrees.OOBTree
class MyBTree(BTrees.OOBTree.BTree):
max_leaf_size = 500
max_internal_size = 1000
``max_leaf_size`` is used for leaf nodes in a BTree, either Buckets or
Sets. ``max_internal_size`` is used for internal nodes, either BTrees
or TreeSets.
BTree Diagnostic Tools
----------------------
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment