Commit 3ebcfa46 authored by Vincent Pelletier's avatar Vincent Pelletier

Use regexes for URL bases instead of startswith().

Hurts parsing performance by a few percents, sadly, but I don't see any
other way.
parent 75507403
...@@ -249,9 +249,9 @@ class APDEXStats(object): ...@@ -249,9 +249,9 @@ class APDEXStats(object):
return float(self.duration_max) / US_PER_S return float(self.duration_max) / US_PER_S
class GenericSiteStats(object): class GenericSiteStats(object):
def __init__(self, threshold, getDuration, prefix=1, error_detail=False): def __init__(self, threshold, getDuration, suffix, error_detail=False):
self.threshold = threshold self.threshold = threshold
self.prefix = prefix self.suffix = suffix
self.error_detail = error_detail self.error_detail = error_detail
self.getDuration = getDuration self.getDuration = getDuration
self.status = defaultdict(partial(defaultdict, int)) self.status = defaultdict(partial(defaultdict, int))
...@@ -371,8 +371,8 @@ class ERP5SiteStats(GenericSiteStats): ...@@ -371,8 +371,8 @@ class ERP5SiteStats(GenericSiteStats):
- If a line belongs to a module and has at least 2 slashes after module, - If a line belongs to a module and has at least 2 slashes after module,
count line as belonging to a document of that module count line as belonging to a document of that module
""" """
def __init__(self, threshold, getDuration, prefix=1, error_detail=False): def __init__(self, threshold, getDuration, suffix, error_detail=False):
super(ERP5SiteStats, self).__init__(threshold, getDuration, prefix=prefix, super(ERP5SiteStats, self).__init__(threshold, getDuration, suffix,
error_detail=error_detail) error_detail=error_detail)
# Key levels: # Key levels:
# - module id (string) # - module id (string)
...@@ -383,12 +383,10 @@ class ERP5SiteStats(GenericSiteStats): ...@@ -383,12 +383,10 @@ class ERP5SiteStats(GenericSiteStats):
self.no_module = defaultdict(partial(APDEXStats, threshold, getDuration)) self.no_module = defaultdict(partial(APDEXStats, threshold, getDuration))
def accumulate(self, match, url_match, date): def accumulate(self, match, url_match, date):
prefix = self.prefix split = self.suffix(url_match.group('url')).split('?', 1)[0].split('/')
split = url_match.group('url').split('?', 1)[0].split('/')[1 + prefix:] if split and split[0].endswith('_module'):
if split:
module = split[0]
if module.endswith('_module'):
super(ERP5SiteStats, self).accumulate(match, url_match, date) super(ERP5SiteStats, self).accumulate(match, url_match, date)
module = split[0]
self.module[module][ self.module[module][
len(split) > 1 and (split[1] != 'view' and '_view' not in split[1]) len(split) > 1 and (split[1] != 'view' and '_view' not in split[1])
][date].accumulate(match) ][date].accumulate(match)
...@@ -496,13 +494,12 @@ class AggregateSiteUrl(argparse.Action): ...@@ -496,13 +494,12 @@ class AggregateSiteUrl(argparse.Action):
action = self.__argument_to_aggregator[option_string] action = self.__argument_to_aggregator[option_string]
dest = getattr(namespace, self.dest) dest = getattr(namespace, self.dest)
for value in values: for value in values:
match = re.compile(value).match
if action is not None: if action is not None:
if value[-1:] == '/': match_suffix = re.compile(value + '(?P<suffix>.*)').match
offset = -1 action = partial(action,
else: suffix=lambda x: match_suffix(x).group('suffix'))
offset = 0 dest.append((value, match, action))
action = partial(action, prefix=value.count('/') + offset)
dest.append((value, action))
def _asMonthString(timestamp): def _asMonthString(timestamp):
dt, tz = timestamp.split(' ') dt, tz = timestamp.split(' ')
...@@ -592,8 +589,9 @@ def main(): ...@@ -592,8 +589,9 @@ def main():
help='Embed js files instead of linking to them.') help='Embed js files instead of linking to them.')
group = parser.add_argument_group('site matching', 'Earlier arguments take ' group = parser.add_argument_group('site matching', 'Earlier arguments take '
'precedence. For example: --skip-base /foo/bar --base /foo generates ' 'precedence. For example: --skip-base "/foo/bar(/|$|\\?)" '
'stats for /foo, excluding /foo/bar.') '--base "/foo(/|$|\\?)" generates stats for /foo, excluding /foo/bar. '
'Arguments (except for -d/--default) are interpreted as Python regexes.')
group.add_argument('-d', '--default', group.add_argument('-d', '--default',
help='Caption for lines matching no prefix, or skip them if not provided.') help='Caption for lines matching no prefix, or skip them if not provided.')
group.add_argument('--base', dest='path', default=[], nargs='+', group.add_argument('--base', dest='path', default=[], nargs='+',
...@@ -655,7 +653,7 @@ def main(): ...@@ -655,7 +653,7 @@ def main():
'specified, nothing to do.' 'specified, nothing to do.'
sys.exit(1) sys.exit(1)
else: else:
default_action = partial(GenericSiteStats, prefix=0) default_action = partial(GenericSiteStats, suffix=lambda x: x)
infile_list = args.logfile infile_list = args.logfile
quiet = args.quiet quiet = args.quiet
threshold = args.apdex threshold = args.apdex
...@@ -702,9 +700,8 @@ def main(): ...@@ -702,9 +700,8 @@ def main():
url = url_match.group('url') url = url_match.group('url')
if url.startswith('http'): if url.startswith('http'):
url = splithost(splittype(url)[1])[1] url = splithost(splittype(url)[1])[1]
startswith = url.startswith for site, prefix_match, action in site_list:
for site, action in site_list: if prefix_match(url) is not None:
if startswith(site):
break break
else: else:
site = default_site site = default_site
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment