Commit 26f6ab31 authored by Vincent Pelletier's avatar Vincent Pelletier

Prepare date import.

parent bd9c05c1
...@@ -100,7 +100,7 @@ def getClassForStatusHit(hit, status): ...@@ -100,7 +100,7 @@ def getClassForStatusHit(hit, status):
def getDataPoints(apdex_dict): def getDataPoints(apdex_dict):
return [ return [
(date, apdex.getApdex() * 100, apdex.hit) for date, apdex (value_date, apdex.getApdex() * 100, apdex.hit) for value_date, apdex
in sorted(apdex_dict.iteritems(), key=ITEMGETTER0)] in sorted(apdex_dict.iteritems(), key=ITEMGETTER0)]
def prepareDataForGraph(daily_data, date_format, placeholder_delta): def prepareDataForGraph(daily_data, date_format, placeholder_delta):
...@@ -286,16 +286,16 @@ class GenericSiteStats(object): ...@@ -286,16 +286,16 @@ class GenericSiteStats(object):
def rescale(self, convert, getDuration): def rescale(self, convert, getDuration):
for status, date_dict in self.status.iteritems(): for status, date_dict in self.status.iteritems():
new_date_dict = defaultdict(int) new_date_dict = defaultdict(int)
for date, status_count in date_dict.iteritems(): for value_date, status_count in date_dict.iteritems():
new_date_dict[convert(date)] += status_count new_date_dict[convert(value_date)] += status_count
self.status[status] = new_date_dict self.status[status] = new_date_dict
new_apdex = defaultdict(partial(APDEXStats, self.threshold, getDuration)) new_apdex = defaultdict(partial(APDEXStats, self.threshold, getDuration))
for date, data in self.apdex.iteritems(): for value_date, data in self.apdex.iteritems():
new_apdex[convert(date)].accumulateFrom(data) new_apdex[convert(value_date)].accumulateFrom(data)
self.apdex = new_apdex self.apdex = new_apdex
def accumulate(self, match, url_match, date): def accumulate(self, match, url_match, value_date):
self.apdex[date].accumulate(match) self.apdex[value_date].accumulate(match)
if url_match is None: if url_match is None:
url = match.group('request') url = match.group('request')
else: else:
...@@ -303,7 +303,7 @@ class GenericSiteStats(object): ...@@ -303,7 +303,7 @@ class GenericSiteStats(object):
# XXX: can eat memory if there are many different urls # XXX: can eat memory if there are many different urls
self.url_apdex[url.split('?', 1)[0]].accumulate(match) self.url_apdex[url.split('?', 1)[0]].accumulate(match)
status = match.group('status') status = match.group('status')
self.status[status][date] += 1 self.status[status][value_date] += 1
if self.error_detail and statusIsError(status): if self.error_detail and statusIsError(status):
# XXX: can eat memory if there are many errors on many different urls # XXX: can eat memory if there are many errors on many different urls
self.error_url_count[status][url].append(match.group('referer')) self.error_url_count[status][url].append(match.group('referer'))
...@@ -335,14 +335,14 @@ class GenericSiteStats(object): ...@@ -335,14 +335,14 @@ class GenericSiteStats(object):
filtered_status = defaultdict(partial(defaultdict, int)) filtered_status = defaultdict(partial(defaultdict, int))
for status, date_dict in self.status.iteritems(): for status, date_dict in self.status.iteritems():
filtered_date_dict = filtered_status[status] filtered_date_dict = filtered_status[status]
for date, value in date_dict.iteritems(): for value_date, value in date_dict.iteritems():
filtered_date_dict[stat_filter(date)] += value filtered_date_dict[stat_filter(value_date)] += value
column_set.update(filtered_date_dict) column_set.update(filtered_date_dict)
column_list = sorted(column_set) column_list = sorted(column_set)
append('<h2>Hits per status code</h2><table class="stats"><tr>' append('<h2>Hits per status code</h2><table class="stats"><tr>'
'<th>status</th><th>overall</th>') '<th>status</th><th>overall</th>')
for date in column_list: for column in column_list:
append('<th>%s</th>' % date) append('<th>%s</th>' % column)
append('</tr>') append('</tr>')
def hitTd(hit, status): def hitTd(hit, status):
return '<td class="%s">%s</td>' % (getClassForStatusHit(hit, status), hit) return '<td class="%s">%s</td>' % (getClassForStatusHit(hit, status), hit)
...@@ -359,8 +359,8 @@ class GenericSiteStats(object): ...@@ -359,8 +359,8 @@ class GenericSiteStats(object):
has_errors |= statusIsError(status) has_errors |= statusIsError(status)
append('<tr><th>%s</th>' % statusAsHtml(status)) append('<tr><th>%s</th>' % statusAsHtml(status))
append(hitTd(sum(data_dict.itervalues()), status)) append(hitTd(sum(data_dict.itervalues()), status))
for date in column_list: for column in column_list:
append(hitTd(data_dict[date], status)) append(hitTd(data_dict[column], status))
append('</tr>') append('</tr>')
append('</table>') append('</table>')
if self.error_detail and has_errors: if self.error_detail and has_errors:
...@@ -440,8 +440,8 @@ class GenericSiteStats(object): ...@@ -440,8 +440,8 @@ class GenericSiteStats(object):
status = self.status status = self.status
for status_code, other_date_dict in other.status.iteritems(): for status_code, other_date_dict in other.status.iteritems():
date_dict = status[status_code] date_dict = status[status_code]
for date, count in other_date_dict.iteritems(): for status_date, count in other_date_dict.iteritems():
date_dict[date] += count date_dict[status_date] += count
class ERP5SiteStats(GenericSiteStats): class ERP5SiteStats(GenericSiteStats):
""" """
...@@ -470,28 +470,28 @@ class ERP5SiteStats(GenericSiteStats): ...@@ -470,28 +470,28 @@ class ERP5SiteStats(GenericSiteStats):
for document_dict in self.module.itervalues(): for document_dict in self.module.itervalues():
for is_document, date_dict in document_dict.iteritems(): for is_document, date_dict in document_dict.iteritems():
new_date_dict = defaultdict(partial(APDEXStats, threshold, getDuration)) new_date_dict = defaultdict(partial(APDEXStats, threshold, getDuration))
for date, data in date_dict.iteritems(): for value_date, data in date_dict.iteritems():
new_date_dict[convert(date)].accumulateFrom(data) new_date_dict[convert(value_date)].accumulateFrom(data)
document_dict[is_document] = new_date_dict document_dict[is_document] = new_date_dict
for attribute_id in ('no_module', 'site_search'): for attribute_id in ('no_module', 'site_search'):
attribute = defaultdict(partial(APDEXStats, threshold, getDuration)) attribute = defaultdict(partial(APDEXStats, threshold, getDuration))
for date, data in getattr(self, attribute_id).iteritems(): for value_date, data in getattr(self, attribute_id).iteritems():
attribute[convert(date)].accumulateFrom(data) attribute[convert(value_date)].accumulateFrom(data)
setattr(self, attribute_id, attribute) setattr(self, attribute_id, attribute)
def accumulate(self, match, url_match, date): def accumulate(self, match, url_match, value_date):
split = self.suffix(url_match.group('url')).split('?', 1)[0].split('/') split = self.suffix(url_match.group('url')).split('?', 1)[0].split('/')
if split and split[0].endswith('_module'): if split and split[0].endswith('_module'):
super(ERP5SiteStats, self).accumulate(match, url_match, date) super(ERP5SiteStats, self).accumulate(match, url_match, value_date)
module = split[0] module = split[0]
self.module[module][ self.module[module][
len(split) > 1 and (split[1] != 'view' and '_view' not in split[1]) len(split) > 1 and (split[1] != 'view' and '_view' not in split[1])
][date].accumulate(match) ][value_date].accumulate(match)
elif split and split[0] == 'ERP5Site_viewSearchResult': elif split and split[0] == 'ERP5Site_viewSearchResult':
super(ERP5SiteStats, self).accumulate(match, url_match, date) super(ERP5SiteStats, self).accumulate(match, url_match, value_date)
self.site_search[date].accumulate(match) self.site_search[value_date].accumulate(match)
else: else:
self.no_module[date].accumulate(match) self.no_module[value_date].accumulate(match)
def asHTML(self, date_format, placeholder_delta, graph_period, encoding, def asHTML(self, date_format, placeholder_delta, graph_period, encoding,
stat_filter=lambda x: x): stat_filter=lambda x: x):
...@@ -505,26 +505,26 @@ class ERP5SiteStats(GenericSiteStats): ...@@ -505,26 +505,26 @@ class ERP5SiteStats(GenericSiteStats):
filtered_module = defaultdict(partial(defaultdict, partial( filtered_module = defaultdict(partial(defaultdict, partial(
defaultdict, partial(APDEXStats, self.threshold, None)))) defaultdict, partial(APDEXStats, self.threshold, None))))
filtered_no_module = defaultdict(partial(APDEXStats, self.threshold, None)) filtered_no_module = defaultdict(partial(APDEXStats, self.threshold, None))
for date, value in self.no_module.iteritems(): for value_date, value in self.no_module.iteritems():
filtered_no_module[stat_filter(date)].accumulateFrom(value) filtered_no_module[stat_filter(value_date)].accumulateFrom(value)
column_set = set(filtered_no_module) column_set = set(filtered_no_module)
filtered_site_search = defaultdict(partial(APDEXStats, self.threshold, filtered_site_search = defaultdict(partial(APDEXStats, self.threshold,
None)) None))
for date, value in self.site_search.iteritems(): for value_date, value in self.site_search.iteritems():
filtered_site_search[stat_filter(date)].accumulateFrom(value) filtered_site_search[stat_filter(value_date)].accumulateFrom(value)
column_set.update(filtered_site_search) column_set.update(filtered_site_search)
for key, is_document_dict in self.module.iteritems(): for key, is_document_dict in self.module.iteritems():
filtered_is_document_dict = filtered_module[key] filtered_is_document_dict = filtered_module[key]
for key, data_dict in is_document_dict.iteritems(): for key, data_dict in is_document_dict.iteritems():
filtered_data_dict = filtered_is_document_dict[key] filtered_data_dict = filtered_is_document_dict[key]
module_document_apdex = module_document_overall[key] module_document_apdex = module_document_overall[key]
for date, value in data_dict.iteritems(): for value_date, value in data_dict.iteritems():
filtered_data_dict[stat_filter(date)].accumulateFrom(value) filtered_data_dict[stat_filter(value_date)].accumulateFrom(value)
module_document_apdex.accumulateFrom(value) module_document_apdex.accumulateFrom(value)
column_set.update(filtered_data_dict) column_set.update(filtered_data_dict)
column_list = sorted(column_set) column_list = sorted(column_set)
for date in column_list: for column in column_list:
append('<th colspan="4">%s</th>' % date) append('<th colspan="4">%s</th>' % column)
append('</tr><tr>') append('</tr><tr>')
for i in xrange(len(column_list) + 1): for i in xrange(len(column_list) + 1):
append(APDEXStats.asHTMLHeader(i == 0)) append(APDEXStats.asHTMLHeader(i == 0))
...@@ -534,8 +534,8 @@ class ERP5SiteStats(GenericSiteStats): ...@@ -534,8 +534,8 @@ class ERP5SiteStats(GenericSiteStats):
for data in data_dict.values(): for data in data_dict.values():
data_total.accumulateFrom(data) data_total.accumulateFrom(data)
append(data_total.asHTML(self.threshold, True)) append(data_total.asHTML(self.threshold, True))
for date in column_list: for column in column_list:
append(data_dict[date].asHTML(self.threshold)) append(data_dict[column].asHTML(self.threshold))
return data_total return data_total
def hiddenGraph(data_dict, title): def hiddenGraph(data_dict, title):
append('<td class="text group_right hidden_graph">') append('<td class="text group_right hidden_graph">')
...@@ -594,12 +594,12 @@ class ERP5SiteStats(GenericSiteStats): ...@@ -594,12 +594,12 @@ class ERP5SiteStats(GenericSiteStats):
module_dict = result.module[module_id] module_dict = result.module[module_id]
for is_document, date_dict_state in module_dict_state.iteritems(): for is_document, date_dict_state in module_dict_state.iteritems():
date_dict = module_dict[is_document == 'true'] date_dict = module_dict[is_document == 'true']
for date, apdex_state in date_dict_state.iteritems(): for value_date, apdex_state in date_dict_state.iteritems():
date_dict[date] = APDEXStats.fromJSONState(apdex_state, getDuration) date_dict[value_date] = APDEXStats.fromJSONState(apdex_state, getDuration)
for attribute_id in ('no_module', 'site_search'): for attribute_id in ('no_module', 'site_search'):
attribute = getattr(result, attribute_id) attribute = getattr(result, attribute_id)
for date, apdex_state in state[attribute_id].iteritems(): for value_date, apdex_state in state[attribute_id].iteritems():
attribute[date] = APDEXStats.fromJSONState(apdex_state, getDuration) attribute[value_date] = APDEXStats.fromJSONState(apdex_state, getDuration)
return result return result
def asJSONState(self): def asJSONState(self):
...@@ -621,12 +621,12 @@ class ERP5SiteStats(GenericSiteStats): ...@@ -621,12 +621,12 @@ class ERP5SiteStats(GenericSiteStats):
module_dict = module[module_id] module_dict = module[module_id]
for is_document, other_date_dict in other_module_dict.iteritems(): for is_document, other_date_dict in other_module_dict.iteritems():
date_dict = module_dict[is_document] date_dict = module_dict[is_document]
for date, apdex in other_date_dict.iteritems(): for value_date, apdex in other_date_dict.iteritems():
date_dict[date].accumulateFrom(apdex) date_dict[value_date].accumulateFrom(apdex)
for attribute_id in ('no_module', 'site_search'): for attribute_id in ('no_module', 'site_search'):
attribute = getattr(self, attribute_id) attribute = getattr(self, attribute_id)
for date, apdex in getattr(other, attribute_id).iteritems(): for value_date, apdex in getattr(other, attribute_id).iteritems():
attribute[date].accumulateFrom(apdex) attribute[value_date].accumulateFrom(apdex)
DURATION_US_FORMAT = '%D' DURATION_US_FORMAT = '%D'
DURATION_S_FORMAT = '%T' DURATION_S_FORMAT = '%T'
...@@ -722,8 +722,8 @@ def _asDayString(timestamp): ...@@ -722,8 +722,8 @@ def _asDayString(timestamp):
def _as6HourString(timestamp): def _as6HourString(timestamp):
dt, _ = timestamp.split(' ') dt, _ = timestamp.split(' ')
date, hour, _ = dt.split(':', 2) dt_date, hour, _ = dt.split(':', 2)
day, month, year = date.split('/') day, month, year = dt_date.split('/')
return '%s/%02i/%s %02i' % (year, MONTH_VALUE_DICT[month], day, return '%s/%02i/%s %02i' % (year, MONTH_VALUE_DICT[month], day,
int(hour) / 6 * 6) int(hour) / 6 * 6)
...@@ -736,8 +736,8 @@ def _hourAsWeekString(timestamp): ...@@ -736,8 +736,8 @@ def _hourAsWeekString(timestamp):
def _asHourString(timestamp): def _asHourString(timestamp):
dt, _ = timestamp.split(' ') dt, _ = timestamp.split(' ')
date, hour, _ = dt.split(':', 2) dt_date, hour, _ = dt.split(':', 2)
day, month, year = date.split('/') day, month, year = dt_date.split('/')
return '%s/%02i/%s %s' % (year, MONTH_VALUE_DICT[month], day, hour) return '%s/%02i/%s %s' % (year, MONTH_VALUE_DICT[month], day, hour)
# Key: argument (represents table granularity) # Key: argument (represents table granularity)
...@@ -855,10 +855,10 @@ def asHTML(out, encoding, per_site, args, default_site, period_parameter_dict, ...@@ -855,10 +855,10 @@ def asHTML(out, encoding, per_site, args, default_site, period_parameter_dict,
'<tr><th>date</th><th>hits</th></tr>') '<tr><th>date</th><th>hits</th></tr>')
hit_per_day = defaultdict(int) hit_per_day = defaultdict(int)
for site_data in per_site.itervalues(): for site_data in per_site.itervalues():
for date, _, hit in site_data.getApdexData(): for hit_date, _, hit in site_data.getApdexData():
hit_per_day[decimator(date)] += hit hit_per_day[decimator(hit_date)] += hit
for date, hit in sorted(hit_per_day.iteritems(), key=ITEMGETTER0): for hit_date, hit in sorted(hit_per_day.iteritems(), key=ITEMGETTER0):
out.write('<tr><td>%s</td><td>%s</td></tr>' % (date, hit)) out.write('<tr><td>%s</td><td>%s</td></tr>' % (hit_date, hit))
out.write('</table>') out.write('</table>')
for i, (site_id, data) in site_list: for i, (site_id, data) in site_list:
out.write('<h1 id="%s" title="%s">%s</h1>' % (i, escape(repr(site_id), out.write('<h1 id="%s" title="%s">%s</h1>' % (i, escape(repr(site_id),
...@@ -1170,12 +1170,12 @@ def main(): ...@@ -1170,12 +1170,12 @@ def main():
if action is None: if action is None:
skipped_lines += 1 skipped_lines += 1
continue continue
date = asDate(match.group('timestamp')) hit_date = asDate(match.group('timestamp'))
if to_next_period is not None: if to_next_period is not None:
if date > latest_date: # '' > None is True if hit_date > latest_date: # '' > None is True
latest_date = date latest_date = hit_date
if date < earliest_date or earliest_date is None: if hit_date < earliest_date or earliest_date is None:
earliest_date = date earliest_date = hit_date
next_period = getNextPeriod() next_period = getNextPeriod()
if latest_date > next_period: if latest_date > next_period:
try: try:
...@@ -1195,14 +1195,14 @@ def main(): ...@@ -1195,14 +1195,14 @@ def main():
if show_progress: if show_progress:
print >> sys.stderr, 'done (%s)' % timedelta(seconds=time.time() print >> sys.stderr, 'done (%s)' % timedelta(seconds=time.time()
- period_increase_start) - period_increase_start)
date = asDate(match.group('timestamp')) hit_date = asDate(match.group('timestamp'))
try: try:
site_data = per_site[site] site_data = per_site[site]
except KeyError: except KeyError:
site_data = per_site[site] = action(threshold, getDuration, site_data = per_site[site] = action(threshold, getDuration,
error_detail=error_detail) error_detail=error_detail)
try: try:
site_data.accumulate(match, url_match, date) site_data.accumulate(match, url_match, hit_date)
except Exception: except Exception:
if not quiet: if not quiet:
print >> sys.stderr, 'Error analysing line at %s:%i: %r' % (filename, print >> sys.stderr, 'Error analysing line at %s:%i: %r' % (filename,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment