Commit 3ebcfa46 authored by Vincent Pelletier's avatar Vincent Pelletier

Use regexes for URL bases instead of startswith().

Hurts parsing performance by a few percents, sadly, but I don't see any
other way.
parent 75507403
......@@ -249,9 +249,9 @@ class APDEXStats(object):
return float(self.duration_max) / US_PER_S
class GenericSiteStats(object):
def __init__(self, threshold, getDuration, prefix=1, error_detail=False):
def __init__(self, threshold, getDuration, suffix, error_detail=False):
self.threshold = threshold
self.prefix = prefix
self.suffix = suffix
self.error_detail = error_detail
self.getDuration = getDuration
self.status = defaultdict(partial(defaultdict, int))
......@@ -371,8 +371,8 @@ class ERP5SiteStats(GenericSiteStats):
- If a line belongs to a module and has at least 2 slashes after module,
count line as belonging to a document of that module
"""
def __init__(self, threshold, getDuration, prefix=1, error_detail=False):
super(ERP5SiteStats, self).__init__(threshold, getDuration, prefix=prefix,
def __init__(self, threshold, getDuration, suffix, error_detail=False):
super(ERP5SiteStats, self).__init__(threshold, getDuration, suffix,
error_detail=error_detail)
# Key levels:
# - module id (string)
......@@ -383,12 +383,10 @@ class ERP5SiteStats(GenericSiteStats):
self.no_module = defaultdict(partial(APDEXStats, threshold, getDuration))
def accumulate(self, match, url_match, date):
prefix = self.prefix
split = url_match.group('url').split('?', 1)[0].split('/')[1 + prefix:]
if split:
module = split[0]
if module.endswith('_module'):
split = self.suffix(url_match.group('url')).split('?', 1)[0].split('/')
if split and split[0].endswith('_module'):
super(ERP5SiteStats, self).accumulate(match, url_match, date)
module = split[0]
self.module[module][
len(split) > 1 and (split[1] != 'view' and '_view' not in split[1])
][date].accumulate(match)
......@@ -496,13 +494,12 @@ class AggregateSiteUrl(argparse.Action):
action = self.__argument_to_aggregator[option_string]
dest = getattr(namespace, self.dest)
for value in values:
match = re.compile(value).match
if action is not None:
if value[-1:] == '/':
offset = -1
else:
offset = 0
action = partial(action, prefix=value.count('/') + offset)
dest.append((value, action))
match_suffix = re.compile(value + '(?P<suffix>.*)').match
action = partial(action,
suffix=lambda x: match_suffix(x).group('suffix'))
dest.append((value, match, action))
def _asMonthString(timestamp):
dt, tz = timestamp.split(' ')
......@@ -592,8 +589,9 @@ def main():
help='Embed js files instead of linking to them.')
group = parser.add_argument_group('site matching', 'Earlier arguments take '
'precedence. For example: --skip-base /foo/bar --base /foo generates '
'stats for /foo, excluding /foo/bar.')
'precedence. For example: --skip-base "/foo/bar(/|$|\\?)" '
'--base "/foo(/|$|\\?)" generates stats for /foo, excluding /foo/bar. '
'Arguments (except for -d/--default) are interpreted as Python regexes.')
group.add_argument('-d', '--default',
help='Caption for lines matching no prefix, or skip them if not provided.')
group.add_argument('--base', dest='path', default=[], nargs='+',
......@@ -655,7 +653,7 @@ def main():
'specified, nothing to do.'
sys.exit(1)
else:
default_action = partial(GenericSiteStats, prefix=0)
default_action = partial(GenericSiteStats, suffix=lambda x: x)
infile_list = args.logfile
quiet = args.quiet
threshold = args.apdex
......@@ -702,9 +700,8 @@ def main():
url = url_match.group('url')
if url.startswith('http'):
url = splithost(splittype(url)[1])[1]
startswith = url.startswith
for site, action in site_list:
if startswith(site):
for site, prefix_match, action in site_list:
if prefix_match(url) is not None:
break
else:
site = default_site
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment