Commit 7b7271f9 authored by Vincent Pelletier's avatar Vincent Pelletier

Implement per-hour stat generation.

Also, only execute expensive date parsing when actually rendering dates in
a graph, removing the need for PARSE_DATE.
parent 86c128f2
......@@ -33,6 +33,7 @@
# - provide some form of raw data output, not just html
from cgi import escape
from collections import defaultdict
from datetime import datetime, tzinfo, timedelta
from functools import partial
from operator import itemgetter
from urllib import splittype, splithost
......@@ -42,8 +43,6 @@ import os
import re
import sys
PARSE_DATE = False
try:
import matplotlib
except ImportError:
......@@ -54,43 +53,6 @@ else:
from matplotlib.dates import DateFormatter, DayLocator
day_formatter = DateFormatter('%Y-%m-%d')
all_days = DayLocator()
PARSE_DATE = True
# When enabled: nicer dates, massively slower
if PARSE_DATE:
from datetime import datetime, tzinfo, timedelta
import locale
locale.setlocale(locale.LC_TIME, 'C')
class TZ(tzinfo):
def __init__(self, tz):
self.name = tz
tz = int(tz)
self.offset = timedelta(0, ((tz / 100 * 60) + (tz % 100)) * 60)
super(TZ, self).__init__()
def dst(self, dt):
return timedelta(0)
def fromutc(self, dt):
# Taken from python's standard library doc, as of python 2.7
dtoff = dt.utcoffset()
dtdst = dt.dst()
delta = dtoff - dtdst
if delta:
dt += delta
dtdst = dt.dst()
if dtdst:
return dt + dtdst
return dt
def tzname(self):
return self.name
def utcoffset(self, dt):
return self.offset
TZ_CACHE = {}
MONTH_VALUE_DICT = dict((y, x) for (x, y) in enumerate(('Jan', 'Feb', 'Mar',
'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'), 1))
......@@ -343,6 +305,22 @@ class AggregateSiteUrl(argparse.Action):
action = partial(action, prefix=values.count('/') + offset)
getattr(namespace, self.dest).append((values, action))
def _asDayString(timestamp):
dt, tz = timestamp.split(' ')
day, month, year = dt.split(':', 1)[0].split('/')
return '%s/%02i/%s' % (year, MONTH_VALUE_DICT[month], day)
def _asHourString(timestamp):
dt, tz = timestamp.split(' ')
date, hour, _ = dt.split(':', 2)
day, month, year = date.split('/')
return '%s/%02i/%s %s' % (year, MONTH_VALUE_DICT[month], day, hour)
period_parser = {
'day': _asDayString,
'hour': _asHourString,
}
def main():
parser = argparse.ArgumentParser(description='Compute Apdex out of log files')
parser.add_argument('logfile', nargs='+',
......@@ -370,6 +348,8 @@ def main():
parser.add_argument('-o', '--out', default='.',
help='Directory in which statistic files will be generated. '
'Default: %(default)r')
parser.add_argument('-p', '--period', default='day', choices=period_parser,
help='Periodicity of sampling buckets. Default: %(default)r')
parser.add_argument('-q', '--quiet', action='store_true',
help='Suppress warnings about malformed lines.')
args = parser.parse_args()
......@@ -395,6 +375,7 @@ def main():
assert not key, key
matchline = re.compile(line_regex).match
matchrequest = REQUEST_PATTERN.match
asDate = period_parser[args.period]
site_list = args.site_list
default_site = args.default
if default_site is None:
......@@ -442,21 +423,7 @@ def main():
action = default_action
if action is None:
continue
timestamp = match.group('timestamp')
dt, tz = timestamp.split(' ')
if PARSE_DATE:
try:
tz = TZ_CACHE[tz]
except KeyError:
tz = TZ_CACHE[tz] = TZ(tz)
date, hour, minute, second = dt.split(':')
day, month, year = date.split('/')
dt = datetime(int(year), MONTH_VALUE_DICT[month], int(day),
int(hour), int(minute), int(second), tzinfo=tz)
utcdate = (dt - dt.utcoffset()).date()
else:
day, month, year = dt.split(':', 1)[0].split('/')
utcdate = '%s/%02i/%s' % (year, MONTH_VALUE_DICT[month], day)
utcdate = asDate(match.group('timestamp'))
hit_per_day[utcdate] += 1
try:
site_data = per_site[site]
......@@ -473,7 +440,8 @@ def main():
out.write('<h1>Site: %s</h1>' % site_id)
if matplotlib is not None:
daily_data = data.getApdexData()
date_list = [x[0] for x in daily_data]
date_list = [datetime.strptime(x[0], '%Y/%m/%d' + {'hour': ' %H'}.get(
args.period, '')) for x in daily_data]
apdex_plot = pyplot.subplot(2, 1, 1)
apdex_plot.xaxis.set_major_locator(all_days)
apdex_plot.xaxis.set_major_formatter(day_formatter)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment