Commit 6f7c77f3 authored by Vincent Pelletier's avatar Vincent Pelletier

Correct hit graph to smooth uneven periods (esp. "7 days").

Also, switch "week" definition (graph granularity for "--period quarter")
from "7 days chunks starting at 1st of month" to "7 days chunks starting
at 1st of january", so fewer dates need tweaking.
parent 26f6ab31
...@@ -28,12 +28,13 @@ ...@@ -28,12 +28,13 @@
############################################################################## ##############################################################################
from cgi import escape from cgi import escape
from collections import defaultdict, Counter from collections import defaultdict, Counter
from datetime import datetime, timedelta from datetime import datetime, timedelta, date
from functools import partial from functools import partial
from operator import itemgetter from operator import itemgetter
from urllib import splittype, splithost, unquote from urllib import splittype, splithost, unquote
import argparse import argparse
import bz2 import bz2
import calendar
import codecs import codecs
import gzip import gzip
import httplib import httplib
...@@ -103,16 +104,18 @@ def getDataPoints(apdex_dict): ...@@ -103,16 +104,18 @@ def getDataPoints(apdex_dict):
(value_date, apdex.getApdex() * 100, apdex.hit) for value_date, apdex (value_date, apdex.getApdex() * 100, apdex.hit) for value_date, apdex
in sorted(apdex_dict.iteritems(), key=ITEMGETTER0)] in sorted(apdex_dict.iteritems(), key=ITEMGETTER0)]
def prepareDataForGraph(daily_data, date_format, placeholder_delta): def prepareDataForGraph(daily_data, date_format, placeholder_delta,
coefficient_callback):
current_date = datetime.strptime(daily_data[0][0], date_format) current_date = datetime.strptime(daily_data[0][0], date_format)
new_daily_data = [] new_daily_data = []
append = new_daily_data.append append = new_daily_data.append
for measure in daily_data: for (measure_date_string, apdex, hit) in daily_data:
measure_date = datetime.strptime(measure[0], date_format) measure_date = datetime.strptime(measure_date_string, date_format)
while current_date < measure_date: while current_date < measure_date:
append((current_date.strftime(date_format), 100, 0)) append((current_date.strftime(date_format), 100, 0))
current_date += placeholder_delta current_date += placeholder_delta
append(measure) append((measure_date_string, apdex,
hit * coefficient_callback(measure_date)))
current_date = measure_date + placeholder_delta current_date = measure_date + placeholder_delta
return new_daily_data return new_daily_data
...@@ -311,8 +314,8 @@ class GenericSiteStats(object): ...@@ -311,8 +314,8 @@ class GenericSiteStats(object):
def getApdexData(self): def getApdexData(self):
return getDataPoints(self.apdex) return getDataPoints(self.apdex)
def asHTML(self, date_format, placeholder_delta, graph_period, encoding, def asHTML(self, date_format, placeholder_delta, graph_period,
stat_filter=lambda x: x): graph_coefficient, encoding, stat_filter=lambda x: x):
result = [] result = []
append = result.append append = result.append
apdex = APDEXStats(self.threshold, None) apdex = APDEXStats(self.threshold, None)
...@@ -493,8 +496,8 @@ class ERP5SiteStats(GenericSiteStats): ...@@ -493,8 +496,8 @@ class ERP5SiteStats(GenericSiteStats):
else: else:
self.no_module[value_date].accumulate(match) self.no_module[value_date].accumulate(match)
def asHTML(self, date_format, placeholder_delta, graph_period, encoding, def asHTML(self, date_format, placeholder_delta, graph_period, graph_coefficient,
stat_filter=lambda x: x): encoding, stat_filter=lambda x: x):
result = [] result = []
append = result.append append = result.append
append('<h2>Stats per module</h2><table class="stats stats_erp5"><tr>' append('<h2>Stats per module</h2><table class="stats stats_erp5"><tr>'
...@@ -548,7 +551,12 @@ class ERP5SiteStats(GenericSiteStats): ...@@ -548,7 +551,12 @@ class ERP5SiteStats(GenericSiteStats):
title title
) )
append(graphPair( append(graphPair(
prepareDataForGraph(data, date_format, placeholder_delta), prepareDataForGraph(
data,
date_format,
placeholder_delta,
graph_coefficient,
),
date_format, date_format,
graph_period, graph_period,
)) ))
...@@ -584,7 +592,8 @@ class ERP5SiteStats(GenericSiteStats): ...@@ -584,7 +592,8 @@ class ERP5SiteStats(GenericSiteStats):
append(module_document_overall[True].asHTML(self.threshold)) append(module_document_overall[True].asHTML(self.threshold))
append('</tr></table>') append('</tr></table>')
append(super(ERP5SiteStats, self).asHTML(date_format, append(super(ERP5SiteStats, self).asHTML(date_format,
placeholder_delta, graph_period, encoding, stat_filter=stat_filter)) placeholder_delta, graph_period, graph_coefficient, encoding,
stat_filter=stat_filter))
return '\n'.join(result) return '\n'.join(result)
@classmethod @classmethod
...@@ -703,17 +712,44 @@ def _asMonthString(timestamp): ...@@ -703,17 +712,44 @@ def _asMonthString(timestamp):
_, month, year = dt.split(':', 1)[0].split('/') _, month, year = dt.split(':', 1)[0].split('/')
return '%s/%02i' % (year, MONTH_VALUE_DICT[month]) return '%s/%02i' % (year, MONTH_VALUE_DICT[month])
_month_offset_cache = {}
def _asWeekString(timestamp): def _asWeekString(timestamp):
dt, _ = timestamp.split(' ') dt, _ = timestamp.split(' ')
day, month, year = dt.split(':', 1)[0].split('/') day, month, year = dt.split(':', 1)[0].split('/')
return '%s/%02i/%02i' % (year, MONTH_VALUE_DICT[month], (int(day) - 1) / 7 * 7 + 1) year = int(year)
month = MONTH_VALUE_DICT[month]
day = int(day)
key = (year, month)
try:
offset = _month_offset_cache[key]
except KeyError:
# Substract 1 to exclude first day of month, and 1 to prepare for next
# operation (avoid substracting on each run).
offset = date(year, month, 1).timetuple().tm_yday - 2
_month_offset_cache[key] = offset
day_of_year = day + offset
day -= day_of_year - (day_of_year / 7 * 7)
if day < 1:
month -= 1
day += calendar.monthrange(year, month)[1]
assert day > 0 and month > 0, (timestamp, year, month, day)
return '%04i/%02i/%02i' % (year, month, day)
def _weekStringAsQuarterString(timestamp): def _weekStringAsQuarterString(timestamp):
year, month, _ = timestamp.split('/') year, month, _ = timestamp.split('/')
return '%s/%02i' % (year, (int(month) - 1) / 3 * 3 + 1) return '%s/%02i' % (year, (int(month) - 1) / 3 * 3 + 1)
def _roundWeek(dt): def _roundWeek(dt):
return dt.replace(day=(dt.day - 1) / 7 * 7 + 1) day_of_year = dt.timetuple().tm_yday
return dt - timedelta(day_of_year - ((day_of_year - 1) / 7 * 7 + 1))
def _getWeekCoefficient(dt):
if dt.month != 12:
return 1
# 32 = 31 days of December + 1 day so YYYY/12/31 is still 1 day of measure,
# and return value is 7.
return max(1, 7. / (32 - dt.day))
def _asDayString(timestamp): def _asDayString(timestamp):
dt, _ = timestamp.split(' ') dt, _ = timestamp.split(' ')
...@@ -752,6 +788,10 @@ def _asHourString(timestamp): ...@@ -752,6 +788,10 @@ def _asHourString(timestamp):
# point # point
# - round a datetime.datetime instance so once represented using given format # - round a datetime.datetime instance so once represented using given format
# string it is a valid graph-granularity date for period # string it is a valid graph-granularity date for period
# - coefficient to apply to hit count for given (graph granularity)
# datetime.datetime. Most useful in case of "7 days", as last month's week
# may be a single day, causing graph to display a value up to 7 times lower
# than what it should be.
period_parser = { period_parser = {
'year': ( 'year': (
_asMonthString, _asMonthString,
...@@ -761,16 +801,18 @@ period_parser = { ...@@ -761,16 +801,18 @@ period_parser = {
# Longest month: 31 days # Longest month: 31 days
timedelta(31), timedelta(31),
lambda x: x, lambda x: x,
lambda x: 31. / calendar.monthrange(x.year, x.month)[1],
), ),
'quarter': ( 'quarter': (
_asWeekString, _asWeekString,
_weekStringAsQuarterString, _weekStringAsQuarterString,
# Note: Not calendar weeks, but chunks of 7 days starting on first month's # Note: Not calendar weeks, but chunks of 7 days starting on first year's
# day. Cheaper to compute, and *should* not be a problem. # day. Cheaper to compute than locating first sunday/monday of the year.
'7 days', '7 days',
'%Y/%m/%d', '%Y/%m/%d',
timedelta(7), timedelta(7),
_roundWeek, _roundWeek,
_getWeekCoefficient,
), ),
'month': ( 'month': (
_asDayString, _asDayString,
...@@ -780,6 +822,7 @@ period_parser = { ...@@ -780,6 +822,7 @@ period_parser = {
# Longest day: 24 hours + 1h DST (never more ?) # Longest day: 24 hours + 1h DST (never more ?)
timedelta(seconds=3600 * 25), timedelta(seconds=3600 * 25),
lambda x: x, lambda x: x,
lambda x: 1, # XXX: take DST into account (1/24th) ?
), ),
'week': ( # XXX: should be "7 days", but a single word is more convenient 'week': ( # XXX: should be "7 days", but a single word is more convenient
_as6HourString, _as6HourString,
...@@ -788,6 +831,7 @@ period_parser = { ...@@ -788,6 +831,7 @@ period_parser = {
'%Y/%m/%d %H', '%Y/%m/%d %H',
timedelta(seconds=3600 * 6), timedelta(seconds=3600 * 6),
_round6Hour, _round6Hour,
lambda x: 1,
), ),
'day': ( 'day': (
_asHourString, _asHourString,
...@@ -797,6 +841,7 @@ period_parser = { ...@@ -797,6 +841,7 @@ period_parser = {
# Longest hour: 60 * 60 seconds + 1 leap second. # Longest hour: 60 * 60 seconds + 1 leap second.
timedelta(seconds=3601), timedelta(seconds=3601),
lambda x: x, lambda x: x,
lambda x: 1,
), ),
} }
...@@ -810,6 +855,7 @@ def asHTML(out, encoding, per_site, args, default_site, period_parameter_dict, ...@@ -810,6 +855,7 @@ def asHTML(out, encoding, per_site, args, default_site, period_parameter_dict,
date_format = period_parameter_dict['date_format'] date_format = period_parameter_dict['date_format']
placeholder_delta = period_parameter_dict['placeholder_delta'] placeholder_delta = period_parameter_dict['placeholder_delta']
graph_period = period_parameter_dict['graph_period'] graph_period = period_parameter_dict['graph_period']
graph_coefficient = period_parameter_dict['graph_coefficient']
out.write('<!DOCTYPE html>\n<html><head><meta charset="%s">' out.write('<!DOCTYPE html>\n<html><head><meta charset="%s">'
'<title>Stats</title>' % encoding) '<title>Stats</title>' % encoding)
js_embed = getattr(args, 'js_embed', True) js_embed = getattr(args, 'js_embed', True)
...@@ -871,13 +917,14 @@ def asHTML(out, encoding, per_site, args, default_site, period_parameter_dict, ...@@ -871,13 +917,14 @@ def asHTML(out, encoding, per_site, args, default_site, period_parameter_dict,
apdex_data, apdex_data,
date_format, date_format,
placeholder_delta, placeholder_delta,
graph_coefficient,
), ),
date_format, date_format,
graph_period, graph_period,
) )
) )
out.write(data.asHTML(date_format, placeholder_delta, graph_period, out.write(data.asHTML(date_format, placeholder_delta, graph_period,
encoding, decimator)) graph_coefficient, encoding, decimator))
end_stat_time = time.time() end_stat_time = time.time()
if args.stats: if args.stats:
out.write('<h1>Parsing stats</h1><table class="stats">') out.write('<h1>Parsing stats</h1><table class="stats">')
...@@ -1067,7 +1114,7 @@ def main(): ...@@ -1067,7 +1114,7 @@ def main():
to_next_period = None to_next_period = None
period = args.period period = args.period
asDate, decimator, graph_period, date_format, placeholder_delta, \ asDate, decimator, graph_period, date_format, placeholder_delta, \
round_date = period_parser[period] round_date, graph_coefficient = period_parser[period]
site_list, site_caption_dict = args.path site_list, site_caption_dict = args.path
default_site = args.default default_site = args.default
if default_site is None: if default_site is None:
...@@ -1188,7 +1235,7 @@ def main(): ...@@ -1188,7 +1235,7 @@ def main():
print >> sys.stderr, 'Increasing period to', period, '...', print >> sys.stderr, 'Increasing period to', period, '...',
old_date_format = date_format old_date_format = date_format
asDate, decimator, graph_period, date_format, placeholder_delta, \ asDate, decimator, graph_period, date_format, placeholder_delta, \
round_date = period_parser[period] round_date, graph_coefficient = period_parser[period]
period_increase_start = time.time() period_increase_start = time.time()
for site_data in per_site.itervalues(): for site_data in per_site.itervalues():
site_data.rescale(rescale, getDuration) site_data.rescale(rescale, getDuration)
...@@ -1224,6 +1271,7 @@ def main(): ...@@ -1224,6 +1271,7 @@ def main():
'date_format': date_format, 'date_format': date_format,
'placeholder_delta': placeholder_delta, 'placeholder_delta': placeholder_delta,
'graph_period': graph_period, 'graph_period': graph_period,
'graph_coefficient': graph_coefficient,
}, { }, {
'start_time': start_time, 'start_time': start_time,
'end_parsing_time': end_parsing_time, 'end_parsing_time': end_parsing_time,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment