Commit 16671146 authored by Hanno Schlichting's avatar Hanno Schlichting

Merge in the essential parts of unimr's queryplan

parent 37eb25f1
...@@ -482,7 +482,7 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base): ...@@ -482,7 +482,7 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
continue continue
order.append((ILimitedResultIndex.providedBy(index), name)) order.append((ILimitedResultIndex.providedBy(index), name))
order.sort() order.sort()
return order return [i[1] for i in order]
def search(self, query, sort_index=None, reverse=0, limit=None, merge=1): def search(self, query, sort_index=None, reverse=0, limit=None, merge=1):
"""Iterate through the indexes, applying the query to each one. If """Iterate through the indexes, applying the query to each one. If
...@@ -506,17 +506,22 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base): ...@@ -506,17 +506,22 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
# Canonicalize the request into a sensible query before passing it on # Canonicalize the request into a sensible query before passing it on
query = self.make_query(query) query = self.make_query(query)
cr = self.getCatalogReport(query) cr = self.getCatalogReport(query)
cr.start() cr.start()
for limit_result, i in self._sorted_search_indexes(query): plan = cr.plan()
if not plan:
plan = self._sorted_search_indexes(query)
for i in plan:
index = self.getIndex(i) index = self.getIndex(i)
_apply_index = getattr(index, "_apply_index", None) _apply_index = getattr(index, "_apply_index", None)
if _apply_index is None: if _apply_index is None:
continue continue
cr.split(i) cr.start_split(i)
if limit_result: if ILimitedResultIndex.providedBy(index):
r = _apply_index(query, rs) r = _apply_index(query, rs)
else: else:
r = _apply_index(query) r = _apply_index(query)
...@@ -528,14 +533,15 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base): ...@@ -528,14 +533,15 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
# once we don't need to support the "return everything" case # once we don't need to support the "return everything" case
# anymore # anymore
if r is not None and not r: if r is not None and not r:
cr.split(i, None) cr.stop_split(i, None)
return LazyCat([]) return LazyCat([])
cr.split(i, r)
cr.stop_split(i, r)
w, rs = weightedIntersection(rs, r) w, rs = weightedIntersection(rs, r)
if not rs: if not rs:
break break
else: else:
cr.split(i, None) cr.stop_split(i, None)
cr.stop() cr.stop()
......
...@@ -25,6 +25,7 @@ value_indexes_lock = allocate_lock() ...@@ -25,6 +25,7 @@ value_indexes_lock = allocate_lock()
value_indexes = frozenset() value_indexes = frozenset()
MAX_DISTINCT_VALUES = 10 MAX_DISTINCT_VALUES = 10
REFRESH_RATE = 100
def determine_value_indexes(indexes): def determine_value_indexes(indexes):
...@@ -101,67 +102,121 @@ def make_key(catalog, query): ...@@ -101,67 +102,121 @@ def make_key(catalog, query):
return key return key
class StopWatch(object): class CatalogReport(object):
""" Simple stopwatch class """ """Catalog report class to meassure and identify catalog queries.
"""
def __init__(self): def __init__(self, catalog, query=None, threshold=0.1):
self.init() self.init()
self.catalog = catalog
self.query = query
self._key = None
self.threshold = threshold
parent = aq_parent(catalog)
path = getattr(aq_base(parent), 'getPhysicalPath', None)
if path is None:
path = ('', 'NonPersistentCatalog')
else:
path = tuple(parent.getPhysicalPath())
self.cid = path
def init(self): def init(self):
self.res = [] self.res = []
self.start_time = None self.start_time = None
self.interim = {} self.interim = {}
self.stop_time = None self.stop_time = None
self.duration = None
def prioritymap(self):
# holds the benchmark of each index
prioritymap = getattr(self.catalog, '_v_prioritymap', None)
if prioritymap is None:
prioritymap = self.catalog._v_prioritymap = {}
return prioritymap
def benchmark(self):
# holds the benchmark of each index
return self.prioritymap().get(self.key(), None)
def plan(self):
benchmark = self.benchmark()
if not benchmark:
return None
# sort indexes on (mean hits, mean search time)
ranking = [((v[0], v[1]), k) for k, v in benchmark.items()]
ranking.sort()
return [i[1] for i in ranking]
def start(self): def start(self):
self.init() self.init()
self.start_time = time.time() self.start_time = time.time()
benchmark = self.benchmark()
if benchmark is None:
self.prioritymap()[self.key()] = {}
def split(self, label, result=None): def start_split(self, label, result=None):
current = time.time() self.interim[label] = (time.time(), None)
start_time, stop_time = self.interim.get(label, (None, None))
if start_time is None:
self.interim[label] = (current, None)
return
def stop_split(self, name, result=None):
current = time.time()
start_time, stop_time = self.interim.get(name, (None, None))
length = 0 length = 0
if result is not None: if result is not None:
# TODO: calculating the length can be expensive # TODO: calculating the length can be expensive
length = len(result) length = len(result)
self.interim[label] = (start_time, current) self.interim[name] = (start_time, current)
self.res.append((label, current - start_time, length)) dt = current - start_time
self.res.append((name, current - start_time, length))
# remember index's hits, search time and calls
benchmark = self.benchmark()
if name not in benchmark:
benchmark[name] = (length, dt, 1)
else:
n, t, c = benchmark[name]
n = int(((n*c) + length) / float(c + 1))
t = ((t*c) + dt) / float(c + 1)
# reset adaption
if c % REFRESH_RATE == 0:
c = 0
c += 1
benchmark[name] = (n, t, c)
def stop(self): def stop(self):
self.end_time = time.time() self.end_time = time.time()
self.duration = self.end_time - self.start_time
def result(self): key = self.key()
return (self.end_time - self.start_time, tuple(self.res)) benchmark = self.benchmark()
prioritymap = self.prioritymap()
prioritymap[key] = benchmark
class CatalogReport(StopWatch):
"""Catalog report class to meassure and identify catalog queries.
"""
def __init__(self, catalog, query=None, threshold=0.1): # calculate mean time of search
super(CatalogReport, self).__init__() stats = getattr(self.catalog, '_v_stats', None)
if stats is None:
self.catalog = catalog stats = self.catalog._v_stats = {}
self.query = query
self.threshold = threshold
parent = aq_parent(catalog) if key not in stats:
path = getattr(aq_base(parent), 'getPhysicalPath', None) mt = self.duration
if path is None: c = 1
path = ('', 'NonPersistentCatalog')
else: else:
path = tuple(parent.getPhysicalPath()) mt, c = stats[key]
self.cid = path mt = ((mt * c) + self.duration) / float(c + 1)
c += 1
def stop(self): stats[key] = (mt, c)
super(CatalogReport, self).stop()
self.log() self.log()
def result(self):
return (self.duration, tuple(self.res))
def key(self):
if not self._key:
self._key = make_key(self.catalog, self.query)
return self._key
def log(self): def log(self):
# result of stopwatch # result of stopwatch
res = self.result() res = self.result()
...@@ -171,7 +226,7 @@ class CatalogReport(StopWatch): ...@@ -171,7 +226,7 @@ class CatalogReport(StopWatch):
# The key calculation takes a bit itself, we want to avoid that for # The key calculation takes a bit itself, we want to avoid that for
# any fast queries. This does mean that slow queries get the key # any fast queries. This does mean that slow queries get the key
# calculation overhead added to their runtime. # calculation overhead added to their runtime.
key = make_key(self.catalog, self.query) key = self.key()
reports_lock.acquire() reports_lock.acquire()
try: try:
......
...@@ -290,19 +290,18 @@ class TestCatalog(CatalogBase, unittest.TestCase): ...@@ -290,19 +290,18 @@ class TestCatalog(CatalogBase, unittest.TestCase):
def test_sorted_search_indexes_one(self): def test_sorted_search_indexes_one(self):
result = self._catalog._sorted_search_indexes({'att1': 'a'}) result = self._catalog._sorted_search_indexes({'att1': 'a'})
self.assertEquals(result, [(True, 'att1')]) self.assertEquals(result, ['att1'])
def test_sorted_search_indexes_many(self): def test_sorted_search_indexes_many(self):
query = {'att1': 'a', 'att2': 'b', 'num': 1} query = {'att1': 'a', 'att2': 'b', 'num': 1}
result = self._catalog._sorted_search_indexes(query) result = self._catalog._sorted_search_indexes(query)
indexes = [r[1] for r in result] self.assertEquals(set(result), set(['att1', 'att2', 'num']))
self.assertEquals(set(indexes), set(['att1', 'att2', 'num']))
def test_sorted_search_indexes_priority(self): def test_sorted_search_indexes_priority(self):
# att2 and col2 don't support ILimitedResultIndex, att1 does # att2 and col2 don't support ILimitedResultIndex, att1 does
query = {'att1': 'a', 'att2': 'b', 'col2': 'c'} query = {'att1': 'a', 'att2': 'b', 'col2': 'c'}
result = self._catalog._sorted_search_indexes(query) result = self._catalog._sorted_search_indexes(query)
self.assertEquals(result.index((True, 'att1')), 2) self.assertEquals(result.index('att1'), 2)
# search # search
# sortResults # sortResults
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment