Commit cd70bb49 authored by Vincent Pelletier's avatar Vincent Pelletier

Add an option to convert timezone.

Do it while parsing, so (for example) day splitting happens at midnight in
chosen timezone instead of midnight in source timezone.
When no timezone is provided, use input's times verbatim.
DST changes cause artefacts in graphs (double or zeroed hour).
Also, display used timezone in generated page.
parent 36fb206e
......@@ -30,7 +30,7 @@ from __future__ import print_function, division, absolute_import, \
unicode_literals
from cgi import escape
from collections import defaultdict, Counter
from datetime import datetime, timedelta, date
from datetime import datetime, timedelta, date, tzinfo
from functools import partial
from operator import itemgetter
from urllib import splittype, splithost, unquote
......@@ -52,6 +52,11 @@ import shlex
import sys
import time
import traceback
try:
import pytz
except ImportError:
pytz = None
def getResource(name, encoding='utf-8'):
return pkgutil.get_data(__name__, name).decode(encoding)
......@@ -876,19 +881,12 @@ class ShlexArgumentParser(argparse.ArgumentParser):
return super(ShlexArgumentParser, self).parse_known_args(args=args,
namespace=namespace)
def _asMonthString(timestamp):
dt, _ = timestamp.split(' ')
_, month, year = dt.split(':', 1)[0].split('/')
return '%s/%02i' % (year, MONTH_VALUE_DICT[month])
_month_offset_cache = {}
def _asWeekString(timestamp):
dt, _ = timestamp.split(' ')
day, month, year = dt.split(':', 1)[0].split('/')
year = int(year)
month = MONTH_VALUE_DICT[month]
day = int(day)
def _asWeekString(dt):
year = dt.year
month = dt.month
day = dt.day
key = (year, month)
try:
offset = _month_offset_cache[key]
......@@ -902,7 +900,7 @@ def _asWeekString(timestamp):
if day < 1:
month -= 1
day += calendar.monthrange(year, month)[1]
assert day > 0 and month > 0, (timestamp, year, month, day)
assert day > 0 and month > 0, (dt, year, month, day)
return '%04i/%02i/%02i' % (year, month, day)
def _weekStringAsQuarterString(timestamp):
......@@ -920,18 +918,6 @@ def _getWeekCoefficient(dt):
# and return value is 7.
return max(1, 7. / (32 - dt.day))
def _asDayString(timestamp):
dt, _ = timestamp.split(' ')
day, month, year = dt.split(':', 1)[0].split('/')
return '%s/%02i/%s' % (year, MONTH_VALUE_DICT[month], day)
def _as6HourString(timestamp):
dt, _ = timestamp.split(' ')
dt_date, hour, _ = dt.split(':', 2)
day, month, year = dt_date.split('/')
return '%s/%02i/%s %02i' % (year, MONTH_VALUE_DICT[month], day,
int(hour) // 6 * 6)
def _round6Hour(dt):
return dt.replace(hour=dt.hour // 6 * 6)
......@@ -939,29 +925,11 @@ def _hourAsWeekString(timestamp):
dt = datetime.strptime(timestamp, '%Y/%m/%d %H')
return (dt - timedelta(dt.weekday())).date().strftime('%Y/%m/%d')
def _asHourString(timestamp):
dt, _ = timestamp.split(' ')
dt_date, hour, _ = dt.split(':', 2)
day, month, year = dt_date.split('/')
return '%s/%02i/%s %s' % (year, MONTH_VALUE_DICT[month], day, hour)
def _asHalfDayString(timestamp):
prefix, _ = timestamp.rsplit(':', 1)
prefix, hours = prefix.split(' ')
return '%s %02i' % (prefix, int(hours) // 12 * 12)
def _as30MinutesString(timestamp):
dt, _ = timestamp.split(' ')
dt_date, hour, minute, _ = dt.split(':', 3)
day, month, year = dt_date.split('/')
return '%s/%02i/%s %s:%02i' % (year, MONTH_VALUE_DICT[month], day, hour, int(minute) // 30 * 30)
def _asMinuteString(timestamp):
dt, _ = timestamp.split(' ')
dt_date, hour, minute, _ = dt.split(':', 3)
day, month, year = dt_date.split('/')
return '%s/%02i/%s %s:%s' % (year, MONTH_VALUE_DICT[month], day, hour, minute)
def _asQuarterHourString(timestamp):
prefix, minute = timestamp.rsplit(':', 1)
return '%s:%02i' % (prefix, int(minute) // 15 * 15)
......@@ -984,7 +952,7 @@ def _asQuarterHourString(timestamp):
# than what it should be.
period_parser = {
'year': (
_asMonthString,
lambda x: x.strftime('%Y/%m'),
lambda x: x.split('/', 1)[0],
'month',
'%Y/%m',
......@@ -1007,7 +975,7 @@ period_parser = {
_getWeekCoefficient,
),
'month': (
_asDayString,
lambda x: x.strftime('%Y/%m/%d'),
lambda x: '/'.join(x.split('/', 2)[:2]),
'day',
'%Y/%m/%d',
......@@ -1018,7 +986,7 @@ period_parser = {
lambda x: 1,
),
'week': (
_as6HourString,
lambda x: x.strftime('%Y/%m/%d ') + '%02i' % (x.hour // 6 * 6),
_hourAsWeekString,
'6 hours',
'%Y/%m/%d %H',
......@@ -1028,7 +996,7 @@ period_parser = {
lambda x: 1,
),
'day': (
_asHourString,
lambda x: x.strftime('%Y/%m/%d %H'),
lambda x: x.split(' ')[0],
'hour',
'%Y/%m/%d %H',
......@@ -1039,7 +1007,7 @@ period_parser = {
lambda x: 1,
),
'halfday': (
_as30MinutesString,
lambda x: x.strftime('%Y/%m/%d %H:') + '%02i' % (x.minute // 30 * 30),
_asHalfDayString,
'30 minutes',
'%Y/%m/%d %H:%M',
......@@ -1048,7 +1016,7 @@ period_parser = {
lambda x: 1,
),
'quarterhour': (
_asMinuteString,
lambda x: x.strftime('%Y/%m/%d %H:%M'),
_asQuarterHourString,
'minute',
'%Y/%m/%d %H:%M',
......@@ -1121,6 +1089,7 @@ def asHTML(out, encoding, per_site, args, default_site, period_parameter_dict,
for caption, value in (
('apdex threshold', '%.2fs' % args.apdex),
('period', args.period or (period + ' (auto)')),
('timezone', args.to_timezone or "(input's)")
):
out.write('<tr><th class="text">%s</th><td>%s</td></tr>' % (
caption, value))
......@@ -1215,6 +1184,44 @@ format_generator = {
'json': (asJSON, 'ascii'),
}
ZERO_TIMEDELTA = timedelta(0, 0)
class AutoTZInfo(tzinfo):
"""
Only for fixed UTC offsets ([+-]HHMM)
Because datetime.strptime doesn't support %z.
"""
def __init__(self, name):
assert len(name) == 5, repr(name)
sign = name[0]
assert sign in '+-', sign
hour = int(name[1:3])
assert 0 <= hour <= 12, hour
minute = int(name[3:])
assert 0 <= minute < 60, minute
if sign == '-':
hour = -hour
minute = -minute
self.offset = timedelta(hours=hour, minutes=minute)
self.name = name
def utcoffset(self, dt):
return self.offset
def dst(self, dt):
return ZERO_TIMEDELTA
def tzname(self, dt):
return self.name
_tz_cache = {}
def getTZInfo(tz):
try:
return _tz_cache[tz]
except KeyError:
_tz_cache[tz] = tzi = AutoTZInfo(tz)
return tzi
def main():
parser = ShlexArgumentParser(description='Compute Apdex out of '
'apache-style log files', fromfile_prefix_chars='@')
......@@ -1233,6 +1240,12 @@ def main():
'Does not imply -q.')
parser.add_argument('--state-file', nargs='+', default=[],
help='Use given JSON files as initial state. Use - for stdin.')
parser.add_argument('--to-timezone', help='Timezone to convert log '
'timestamps to before splitting days. If not provided, no conversion '
'happens. In addition to "Continent/City" format which know about DST '
'but requires pytz module, fixed UTC offsets can be provided in the '
'+hhmm form (ex: -0700 for UTC-7). This form does not require pytz '
'module.')
group = parser.add_argument_group('generated content (all formats)')
group.add_argument('-a', '--apdex', default=1.0, type=float,
......@@ -1346,6 +1359,24 @@ def main():
else:
to_next_period = None
period = args.period
def _matchToDateTime(match):
dt, tz = match.group('timestamp').split()
day, month, rest = dt.split('/', 2)
return datetime.strptime(
'%s/%02i/%s' % (day, MONTH_VALUE_DICT[month], rest),
'%d/%m/%Y:%H:%M:%S').replace(tzinfo=getTZInfo(tz))
if args.to_timezone:
to_timezone = args.to_timezone
if re.match(r'^[+-]\d{4}$', to_timezone):
getTimezoneInfo = getTZInfo
else:
if pytz is None:
raise ValueError('pytz is not available, cannot convert timezone.')
getTimezoneInfo = pytz.timezone
tz_info = getTimezoneInfo(to_timezone)
matchToDateTime = lambda x: _matchToDateTime(x).astimezone(tz_info)
else:
matchToDateTime = _matchToDateTime
asDate, decimator, graph_period, date_format, placeholder_delta, \
round_date, graph_coefficient = period_parser[period]
site_list, site_caption_dict = args.path
......@@ -1459,7 +1490,7 @@ def main():
if action is None:
skipped_lines += 1
continue
hit_date = asDate(match.group('timestamp'))
hit_date = asDate(matchToDateTime(match))
if to_next_period is not None:
if latest_date is None or latest_date < hit_date:
latest_date = hit_date
......@@ -1488,7 +1519,7 @@ def main():
if show_progress:
print('done (%s)' % timedelta(seconds=time.time()
- period_increase_start), file=sys.stderr)
hit_date = asDate(match.group('timestamp'))
hit_date = asDate(matchToDateTime(match))
try:
site_data = per_site[site]
except KeyError:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment