Commit ffdc722a authored by Jérome Perrin's avatar Jérome Perrin

update for python3 >= 3.9

drop support for python 2
parent 943a005d
......@@ -26,23 +26,23 @@
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
##############################################################################
from __future__ import print_function, division, absolute_import, \
unicode_literals
from cgi import escape
from html import escape
from collections import defaultdict, Counter
from datetime import datetime, timedelta, date, tzinfo
from functools import partial
from operator import itemgetter
from urllib import splittype, splithost
from urllib.parse import splittype, splithost
import argparse
import bz2
import calendar
import codecs
import functools
import gzip
import httplib
import http.client
import itertools
import json
import lzma
import math
import os
import pkgutil
......@@ -61,47 +61,16 @@ except ImportError:
def getResource(name, encoding='utf-8'):
return pkgutil.get_data(__name__, name).decode(encoding)
def _wrapOpen(func):
@functools.wraps(func)
def wrapper(*args, **kw):
encoding = kw.pop('encoding', None)
info = codecs.lookup(encoding)
errors = kw.pop('errors', 'strict')
file_object = func(*args, **kw)
if encoding is None:
return file_object
srw = codecs.StreamReaderWriter(
file_object,
info.streamreader,
info.streamwriter,
errors,
)
srw.encoding = encoding
return srw
return wrapper
gzip_open = gzip.open
if sys.version_info >= (3, 3):
import lzma
lzma_open = lzma.open
bz2_open = bz2.open
_read_mode = 'rt'
else:
gzip_open = _wrapOpen(gzip_open)
bz2_open = _wrapOpen(bz2.BZ2File)
_read_mode = 'r'
try:
from backports import lzma
lzma_open = _wrapOpen(lzma.open)
except ImportError:
lzma = None
lzma_open = lzma.open
bz2_open = bz2.open
FILE_OPENER_LIST = [
(gzip_open, IOError),
(bz2_open, IOError),
(lzma_open, lzma.LZMAError)
]
if lzma is not None:
FILE_OPENER_LIST.append((lzma_open, lzma.LZMAError))
# XXX: what encoding ? apache doesn't document one, but requests are supposed
# to be urlencoded, so pure ascii. Are timestamps localised ?
......@@ -131,7 +100,7 @@ AUTO_PERIOD_COEF = 200
LARGER_THAN_INTEGER_STR = 'A'
SMALLER_THAN_INTEGER_STR = ''
HTTP_STATUS_CAPTION_DICT = httplib.responses.copy()
HTTP_STATUS_CAPTION_DICT = http.client.responses.copy()
# Non-standard status codes
HTTP_STATUS_CAPTION_DICT.setdefault(499, 'Client Closed Request')
HTTP_STATUS_CAPTION_DICT.setdefault(444, 'No Response')
......@@ -153,9 +122,9 @@ def getClassForStatusHit(hit, status):
def getDataPoints(apdex_dict, status_period_dict={}):
period_error_dict = defaultdict(int)
for status, period_dict in status_period_dict.iteritems():
for status, period_dict in status_period_dict.items():
if statusIsError(status):
for period, hit in period_dict.iteritems():
for period, hit in period_dict.items():
period_error_dict[period] += hit
# If there was an error, there was a hit, and apdex_dict must contain it
# (at same date).
......@@ -166,7 +135,7 @@ def getDataPoints(apdex_dict, status_period_dict={}):
apdex.getApdex() * 100,
apdex.hit,
period_error_dict.get(value_date, 0),
) for value_date, apdex in sorted(apdex_dict.iteritems(), key=ITEMGETTER0)
) for value_date, apdex in sorted(apdex_dict.items(), key=ITEMGETTER0)
]
def prepareDataForGraph(daily_data, date_format, placeholder_delta,
......@@ -202,7 +171,7 @@ def graphPair(daily_data, date_format, graph_period, apdex_y_min=None,
yLabelWidth = max(int(math.log10(max(x[2] for x in daily_data))) + 1,
3) * 6
return graph('apdex',
[zip(date_list, (round(x[1], 2) for x in daily_data))],
[list(zip(date_list, (round(x[1], 2) for x in daily_data)))],
{
'xaxis': {
'mode': 'time',
......@@ -225,12 +194,12 @@ def graphPair(daily_data, date_format, graph_period, apdex_y_min=None,
[
{
'label': 'Errors',
'data': zip(date_list, (x[3] for x in daily_data)),
'data': list(zip(date_list, (x[3] for x in daily_data))),
'color': 'red',
},
{
'label': 'Hits',
'data': zip(date_list, (x[2] for x in daily_data)),
'data': list(zip(date_list, (x[2] for x in daily_data))),
},
],
{
......@@ -326,7 +295,7 @@ class APDEXStats(object):
extra_class = ''
apdex_style = 'color: #%s; background-color: #%s' % (
(apdex < .5 and 'f' or '0') * 3,
('%x' % (apdex * 0xf)) * 3,
('%x' % int(apdex * 0xf)) * 3,
)
else:
extra_class = 'no_hit'
......@@ -363,7 +332,7 @@ class APDEXStats(object):
return result
_APDEXDateDictAsJSONState = lambda date_dict: dict(((y, z.asJSONState())
for y, z in date_dict.iteritems()))
for y, z in date_dict.items()))
class GenericSiteStats(object):
def __init__(self, threshold, getDuration, suffix, error_detail=False,
......@@ -383,13 +352,13 @@ class GenericSiteStats(object):
self.user_agent_counter = Counter()
def rescale(self, convert, getDuration):
for status, date_dict in self.status.iteritems():
for status, date_dict in self.status.items():
new_date_dict = defaultdict(int)
for value_date, status_count in date_dict.iteritems():
for value_date, status_count in date_dict.items():
new_date_dict[convert(value_date)] += status_count
self.status[status] = new_date_dict
new_apdex = defaultdict(partial(APDEXStats, self.threshold, getDuration))
for value_date, data in self.apdex.iteritems():
for value_date, data in self.apdex.items():
new_apdex[convert(value_date)].accumulateFrom(data)
self.apdex = new_apdex
......@@ -422,7 +391,7 @@ class GenericSiteStats(object):
result = []
append = result.append
apdex = APDEXStats(self.threshold, None)
for data in self.apdex.itervalues():
for data in self.apdex.values():
apdex.accumulateFrom(data)
append('<h2>Overall</h2><table class="stats"><tr>')
append(APDEXStats.asHTMLHeader())
......@@ -431,7 +400,7 @@ class GenericSiteStats(object):
append('</tr></table><h2>Hottest pages</h2><table class="stats"><tr>')
append(APDEXStats.asHTMLHeader())
append('<th>url</th></tr>')
for url, data in sorted(self.url_apdex.iteritems(), key=lambda x: x[1].getAverage() * x[1].hit,
for url, data in sorted(self.url_apdex.items(), key=lambda x: x[1].getAverage() * x[1].hit,
reverse=True)[:n_hottest_pages]:
append('<tr>')
append(data.asHTML(self.threshold))
......@@ -445,9 +414,9 @@ class GenericSiteStats(object):
append('</table>')
column_set = set()
filtered_status = defaultdict(partial(defaultdict, int))
for status, date_dict in self.status.iteritems():
for status, date_dict in self.status.items():
filtered_date_dict = filtered_status[status]
for value_date, value in date_dict.iteritems():
for value_date, value in date_dict.items():
filtered_date_dict[stat_filter(value_date)] += value
column_set.update(filtered_date_dict)
column_list = sorted(column_set)
......@@ -466,25 +435,24 @@ class GenericSiteStats(object):
else:
return '<abbr title="%s">%s</abbr>' % (definition, status)
has_errors = False
for status, data_dict in sorted(filtered_status.iteritems(),
key=ITEMGETTER0):
for status, data_dict in sorted(filtered_status.items(), key=ITEMGETTER0):
has_errors |= statusIsError(status)
append('<tr title="%s"><th>%s</th>' % (status, statusAsHtml(status)))
append(hitTd(sum(data_dict.itervalues()), status))
append(hitTd(sum(data_dict.values()), status))
for column in column_list:
append(hitTd(data_dict[column], status))
append('</tr>')
append('</table>')
if self.error_detail and has_errors:
def getHitForUrl(referer_counter):
return sum(referer_counter.itervalues())
return sum(referer_counter.values())
filtered_status_url = defaultdict(partial(defaultdict, dict))
for status, url_dict in self.error_url_count.iteritems():
filtered_status_url[status] = sorted(url_dict.iteritems(),
for status, url_dict in self.error_url_count.items():
filtered_status_url[status] = sorted(url_dict.items(),
key=lambda x: getHitForUrl(x[1]), reverse=True)[:N_ERROR_URL]
append('<h3>Error detail</h3><table class="stats"><tr><th>status</th>'
'<th>hits</th><th>url</th><th>referers</th></tr>')
for status, url_list in sorted(filtered_status_url.iteritems(),
for status, url_list in sorted(filtered_status_url.items(),
key=ITEMGETTER0):
append('<tr><th rowspan="%s">%s</th>' % (len(url_list),
statusAsHtml(status)))
......@@ -513,16 +481,16 @@ class GenericSiteStats(object):
state.get('user_agent_detail', True))
if error_detail:
error_url_count = result.error_url_count
for state_status, state_url_dict in state['error_url_count'].iteritems():
for state_status, state_url_dict in state['error_url_count'].items():
url_dict = error_url_count[state_status]
for url, counter in state_url_dict.iteritems():
for url, counter in state_url_dict.items():
url_dict[url].update(counter)
for attribute_id in ('url_apdex', 'apdex'):
attribute = getattr(result, attribute_id)
for key, apdex_state in state[attribute_id].iteritems():
for key, apdex_state in state[attribute_id].items():
attribute[key] = APDEXStats.fromJSONState(apdex_state, getDuration)
status = result.status
for status_code, date_dict in state['status'].iteritems():
for status_code, date_dict in state['status'].items():
status[status_code].update(date_dict)
result.user_agent_counter.update(state['user_agent_counter'])
return result
......@@ -544,18 +512,18 @@ class GenericSiteStats(object):
# user_agent_detail.
# Assuming they are consistently set.
if self.error_detail:
for status, other_url_dict in other.error_url_count.iteritems():
for status, other_url_dict in other.error_url_count.items():
url_dict = self.error_url_count[status]
for url, referer_counter in other_url_dict.iteritems():
for url, referer_counter in other_url_dict.items():
url_dict[url].update(referer_counter)
for attribute_id in ('url_apdex', 'apdex'):
self_attribute = getattr(self, attribute_id)
for key, apdex_data in getattr(other, attribute_id).iteritems():
for key, apdex_data in getattr(other, attribute_id).items():
self_attribute[key].accumulateFrom(apdex_data)
status = self.status
for status_code, other_date_dict in other.status.iteritems():
for status_code, other_date_dict in other.status.items():
date_dict = status[status_code]
for status_date, count in other_date_dict.iteritems():
for status_date, count in other_date_dict.items():
date_dict[status_date] += count
self.user_agent_counter.update(other.user_agent_counter)
......@@ -594,21 +562,21 @@ class ERP5SiteStats(GenericSiteStats):
def rescale(self, convert, getDuration):
super(ERP5SiteStats, self).rescale(convert, getDuration)
threshold = self.threshold
for document_dict in self.module.itervalues():
for is_document, date_dict in document_dict.iteritems():
for document_dict in self.module.values():
for is_document, date_dict in document_dict.items():
new_date_dict = defaultdict(partial(APDEXStats, threshold, getDuration))
for value_date, data in date_dict.iteritems():
for value_date, data in date_dict.items():
new_date_dict[convert(value_date)].accumulateFrom(data)
document_dict[is_document] = new_date_dict
for id_, date_dict in self.no_module.iteritems():
for id_, date_dict in self.no_module.items():
new_date_dict = defaultdict(partial(APDEXStats, threshold, getDuration))
for value_date, data in date_dict.iteritems():
for value_date, data in date_dict.items():
new_date_dict[convert(value_date)].accumulateFrom(data)
self.no_module[id_] = new_date_dict
attribute = defaultdict(partial(APDEXStats, threshold, getDuration))
for value_date, data in self.site_search.iteritems():
for value_date, data in self.site_search.items():
attribute[convert(value_date)].accumulateFrom(data)
self.site_search = attribute
......@@ -647,23 +615,23 @@ class ERP5SiteStats(GenericSiteStats):
filtered_no_module = defaultdict(partial(
defaultdict, partial(APDEXStats, self.threshold, None)))
column_set = set()
for key, data_dict in self.no_module.iteritems():
for key, data_dict in self.no_module.items():
filtered_id_dict = filtered_no_module[key]
for value_date, value in data_dict.iteritems():
for value_date, value in data_dict.items():
filtered_id_dict[stat_filter(value_date)].accumulateFrom(value)
other_overall.accumulateFrom(value)
column_set.update(filtered_id_dict)
filtered_site_search = defaultdict(partial(APDEXStats, self.threshold,
None))
for value_date, value in self.site_search.iteritems():
for value_date, value in self.site_search.items():
filtered_site_search[stat_filter(value_date)].accumulateFrom(value)
column_set.update(filtered_site_search)
for key, is_document_dict in self.module.iteritems():
for key, is_document_dict in self.module.items():
filtered_is_document_dict = filtered_module[key]
for key, data_dict in is_document_dict.iteritems():
for key, data_dict in is_document_dict.items():
filtered_data_dict = filtered_is_document_dict[key]
module_document_apdex = module_document_overall[key]
for value_date, value in data_dict.iteritems():
for value_date, value in data_dict.items():
filtered_data_dict[stat_filter(value_date)].accumulateFrom(value)
module_document_apdex.accumulateFrom(value)
column_set.update(filtered_data_dict)
......@@ -671,12 +639,12 @@ class ERP5SiteStats(GenericSiteStats):
for column in column_list:
append('<th colspan="4">%s</th>' % column)
append('</tr><tr>')
for i in xrange(len(column_list) + 1):
for i in range(len(column_list) + 1):
append(APDEXStats.asHTMLHeader(i == 0))
append('</tr>')
def apdexAsColumns(data_dict):
data_total = APDEXStats(self.threshold, None)
for data in data_dict.itervalues():
for data in data_dict.values():
data_total.accumulateFrom(data)
append(data_total.asHTML(self.threshold, True))
for column in column_list:
......@@ -711,8 +679,7 @@ class ERP5SiteStats(GenericSiteStats):
))
append('</div></div>')
append('</td>')
for module_id, data_dict in sorted(filtered_module.iteritems(),
key=ITEMGETTER0):
for module_id, data_dict in sorted(filtered_module.items(), key=ITEMGETTER0):
append('<tr class="group_top" title="%s (module)"><th rowspan="2">%s</th>'
'<th>module</th>' % (module_id, module_id))
hiddenGraph(self.module[module_id][False], module_id + ' (module)')
......@@ -726,7 +693,7 @@ class ERP5SiteStats(GenericSiteStats):
hiddenGraph(self.site_search, 'site search')
site_search_overall = apdexAsColumns(filtered_site_search)
append('</tr>')
for id_, date_dict in sorted(filtered_no_module.iteritems()):
for id_, date_dict in sorted(filtered_no_module.items()):
append('<tr class="group_top group_bottom" title="%s"><th colspan="2">%s</th>'
% (id_, id_))
hiddenGraph(self.no_module[id_], id_)
......@@ -758,20 +725,20 @@ class ERP5SiteStats(GenericSiteStats):
@classmethod
def fromJSONState(cls, state, getDuration, suffix):
result = super(ERP5SiteStats, cls).fromJSONState(state, getDuration, suffix)
for module_id, module_dict_state in state['module'].iteritems():
for module_id, module_dict_state in state['module'].items():
module_dict = result.module[module_id]
for is_document, date_dict_state in module_dict_state.iteritems():
for is_document, date_dict_state in module_dict_state.items():
date_dict = module_dict[is_document == 'true']
for value_date, apdex_state in date_dict_state.iteritems():
for value_date, apdex_state in date_dict_state.items():
date_dict[value_date] = APDEXStats.fromJSONState(apdex_state, getDuration)
for id_, date_dict in state['no_module'].iteritems():
for id_, date_dict in state['no_module'].items():
no_module_dict = result.no_module[id_]
for value_date, apdex_state in date_dict.iteritems():
for value_date, apdex_state in date_dict.items():
no_module_dict[value_date] = APDEXStats.fromJSONState(
apdex_state, getDuration)
for value_date, apdex_state in state['site_search'].iteritems():
for value_date, apdex_state in state['site_search'].items():
result.site_search[value_date] = APDEXStats.fromJSONState(
apdex_state, getDuration)
......@@ -780,13 +747,13 @@ class ERP5SiteStats(GenericSiteStats):
def asJSONState(self):
result = super(ERP5SiteStats, self).asJSONState()
result['module'] = module = {}
for module_id, module_dict in self.module.iteritems():
for module_id, module_dict in self.module.items():
module_dict_state = module[module_id] = {}
for is_document, date_dict in module_dict.iteritems():
for is_document, date_dict in module_dict.items():
module_dict_state[is_document] = _APDEXDateDictAsJSONState(date_dict)
result['no_module'] = no_module = {}
for id_, date_dict in self.no_module.iteritems():
for id_, date_dict in self.no_module.items():
no_module[id_] = _APDEXDateDictAsJSONState(date_dict)
result['site_search'] = _APDEXDateDictAsJSONState(self.site_search)
......@@ -795,20 +762,20 @@ class ERP5SiteStats(GenericSiteStats):
def accumulateFrom(self, other):
super(ERP5SiteStats, self).accumulateFrom(other)
module = self.module
for module_id, other_module_dict in other.module.iteritems():
for module_id, other_module_dict in other.module.items():
module_dict = module[module_id]
for is_document, other_date_dict in other_module_dict.iteritems():
for is_document, other_date_dict in other_module_dict.items():
date_dict = module_dict[is_document]
for value_date, apdex in other_date_dict.iteritems():
for value_date, apdex in other_date_dict.items():
date_dict[value_date].accumulateFrom(apdex)
for id_, other_date_dict in other.no_module.iteritems():
for id_, other_date_dict in other.no_module.items():
date_dict = self.no_module[id_]
for value_date, apdex in other_date_dict.iteritems():
for value_date, apdex in other_date_dict.items():
date_dict.accumulateFrom(apdex)
attribute = self.site_search
for value_date, apdex in other.site_search.iteritems():
for value_date, apdex in other.site_search.items():
attribute[value_date].accumulateFrom(apdex)
DURATION_US_FORMAT = '%D'
......@@ -861,7 +828,7 @@ class AggregateSiteUrl(argparse.Action):
def __call__(self, parser, namespace, values, option_string=None):
action = base_action = self.__argument_to_aggregator[option_string]
site_list, site_caption_dict = getattr(namespace, self.dest)
next_value = iter(values).next
next_value = iter(values).__next__
while True:
try:
value = next_value()
......@@ -917,7 +884,7 @@ class ShlexArgumentParser(argparse.ArgumentParser):
shlex.split(in_file.read(), comments=True),
new_cwd,
))
except IOError, exc:
except IOError as exc:
self.error(str(exc))
else:
append(arg)
......@@ -1125,7 +1092,7 @@ def asHTML(out, encoding, per_site, args, default_site, period_parameter_dict,
apdex_y_scale = apdex_y_scale_dict[args.apdex_yscale]
hit_y_scale = hit_y_scale_dict[args.hit_yscale]
out.write('</head><body><h1>Overall</h1>')
site_list = list(enumerate(sorted(per_site.iteritems(),
site_list = list(enumerate(sorted(per_site.items(),
key=lambda x: site_caption_dict[x[0]])))
html_site_caption_dict = {}
for i, (site_id, _) in site_list:
......@@ -1149,7 +1116,7 @@ def asHTML(out, encoding, per_site, args, default_site, period_parameter_dict,
hit_per_day = defaultdict(int)
x_min = LARGER_THAN_INTEGER_STR
x_max = SMALLER_THAN_INTEGER_STR
for site_data in per_site.itervalues():
for site_data in per_site.values():
apdex_data_list = site_data.getApdexData()
if apdex_data_list:
x_min = min(x_min, apdex_data_list[0][0])
......@@ -1159,7 +1126,7 @@ def asHTML(out, encoding, per_site, args, default_site, period_parameter_dict,
if x_min == LARGER_THAN_INTEGER_STR:
x_min = None
x_max = None
for hit_date, hit in sorted(hit_per_day.iteritems(), key=ITEMGETTER0):
for hit_date, hit in sorted(hit_per_day.items(), key=ITEMGETTER0):
out.write('<tr><td>%s</td><td>%s</td></tr>' % (hit_date, hit))
out.write('</table>')
n_hottest_pages = args.n_hottest_pages
......@@ -1230,7 +1197,7 @@ def asHTML(out, encoding, per_site, args, default_site, period_parameter_dict,
out.write('</body></html>')
def asJSON(out, encoding, per_site, *_):
json.dump([(x, y.asJSONState()) for x, y in per_site.iteritems()], out)
json.dump([(x, y.asJSONState()) for x, y in per_site.items()], out)
format_generator = {
'html': (asHTML, 'utf-8'),
......@@ -1365,7 +1332,7 @@ def main():
group.add_argument('--erp5-base', dest='path', nargs='+',
action=AggregateSiteUrl,
help='Similar to --base, but with specialised statistics. Ex: '
'"/erp5(/|$|\?)"')
'"/erp5(/|$|\\?)"')
group.add_argument('--skip-base', dest='path', nargs='+',
action=AggregateSiteUrl,
help='Absolute base url(s) to ignore.')
......@@ -1409,7 +1376,7 @@ def main():
line_regex = ''
expensive_line_regex = ''
try:
n = iter(args.logformat).next
n = iter(args.logformat).__next__
while True:
key = None
expensive_char = char = n()
......@@ -1436,7 +1403,7 @@ def main():
matchrequest = REQUEST_PATTERN.match
if args.period is None:
next_period_data = ((x, y[4] * AUTO_PERIOD_COEF) for (x, y) in
sorted(period_parser.iteritems(), key=lambda x: x[1][4])).next
sorted(period_parser.items(), key=lambda x: x[1][4])).__next__
period, to_next_period = next_period_data()
original_period = period
earliest_date = latest_date = None
......@@ -1540,7 +1507,7 @@ def main():
logfile = sys.stdin
else:
for opener, exc in FILE_OPENER_LIST:
logfile = opener(filename, _read_mode, encoding=INPUT_ENCODING, errors=INPUT_ENCODING_ERROR_HANDLER)
logfile = opener(filename, 'rt', encoding=INPUT_ENCODING, errors=INPUT_ENCODING_ERROR_HANDLER)
try:
logfile.readline()
except exc:
......@@ -1549,7 +1516,7 @@ def main():
logfile.seek(0)
break
else:
logfile = codecs.open(filename, _read_mode, encoding=INPUT_ENCODING, errors=INPUT_ENCODING_ERROR_HANDLER)
logfile = open(filename, 'r', encoding=INPUT_ENCODING, errors=INPUT_ENCODING_ERROR_HANDLER)
lineno = 0
for lineno, line in enumerate(logfile, 1):
if show_progress and lineno % 5000 == 0:
......@@ -1572,7 +1539,7 @@ def main():
no_url_lines += 1
continue
url = url_match.group('url')
if url.startswith(b'http'):
if url.startswith('http'):
url = splithost(splittype(url)[1])[1]
url = get_url_prefix(match, url)
for site, prefix_match, action in site_list:
......@@ -1608,7 +1575,7 @@ def main():
latest_date = rescale(latest_date)
earliest_date = rescale(earliest_date)
period_increase_start = time.time()
for site_data in per_site.itervalues():
for site_data in per_site.values():
site_data.rescale(rescale, getDuration)
if show_progress:
print('done (%s)' % timedelta(seconds=time.time()
......@@ -1633,9 +1600,10 @@ def main():
end_parsing_time = time.time()
generator, out_encoding = format_generator[args.format]
if args.out == '-':
out = codecs.getwriter(out_encoding)(sys.stdout)
out = sys.stdout
out.reconfigure(encoding=out_encoding)
else:
out = codecs.open(args.out, 'w', encoding=out_encoding)
out = open(args.out, 'w', encoding=out_encoding)
with out:
generator(out, out_encoding, per_site, args, default_site, {
'period': period,
......@@ -1662,10 +1630,11 @@ def main():
if __name__ == '__main__':
__resource_base = os.path.join(*os.path.split(__file__)[:-1])
def getResource(name, encoding='utf-8'):
return codecs.open(
with open(
os.path.join(__resource_base, name),
encoding=encoding,
).read()
) as f:
return f.read()
main()
......
......@@ -3,10 +3,10 @@ import sys
import json
import bz2
import gzip
from StringIO import StringIO
import io
import tempfile
import apachedex
from . import lzma
import lzma
class ApacheDEXTestCase(unittest.TestCase):
......@@ -15,8 +15,10 @@ class ApacheDEXTestCase(unittest.TestCase):
self._original_sys_stdin = sys.stdin
self._original_sys_stderr = sys.stderr
self._original_sys_stdout = sys.stdout
sys.stderr = StringIO()
sys.stdout = StringIO()
self._stderr_bytes = io.BytesIO()
sys.stderr = io.TextIOWrapper(self._stderr_bytes, write_through=True)
self._stdout_bytes = io.BytesIO()
sys.stdout = io.TextIOWrapper(self._stdout_bytes, write_through=True)
def tearDown(self):
sys.argv = self._original_sys_argv
......@@ -25,17 +27,31 @@ class ApacheDEXTestCase(unittest.TestCase):
sys.stdout = self._original_sys_stdout
class TestFiles(ApacheDEXTestCase):
def test(self):
with tempfile.NamedTemporaryFile() as fin, tempfile.NamedTemporaryFile() as fout:
fin.write(
b'''127.0.0.1 - - [14/Jul/2017:09:41:41 +0200] "GET / HTTP/1.1" 200 7499 "https://example.org/" "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36" 1754'''
)
fin.flush()
sys.argv = ['apachedex', '--base=/', fin.name, '--out', fout.name]
apachedex.main()
fout.flush()
fout.seek(0)
self.assertIn(b"<html>", fout.read())
class TestMalformedInput(ApacheDEXTestCase):
def test_timestamp_mixed_in_timestamp(self):
sys.argv = ['apachedex', '--base=/', '-']
sys.stdin = StringIO(
sys.stdin = io.StringIO(
# this first line is valid, but second is not
'''127.0.0.1 - - [14/Jul/2017:09:41:41 +0200] "GET / HTTP/1.1" 200 7499 "https://example.org/" "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36" 1754
127.0.0.1 - - [14/Jul/2017:127.0.0.1 - - [14/Jul/2017:09:41:41 +0200] "GET / HTTP/1.1" 200 7499 "https://example.org/" "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36" 1754''')
apachedex.main()
self.assertNotIn('Malformed line at -:1', sys.stderr.getvalue())
self.assertIn('Malformed line at -:2', sys.stderr.getvalue())
self.assertNotIn(b'Malformed line at -:1', self._stderr_bytes.getvalue())
self.assertIn(b'Malformed line at -:2', self._stderr_bytes.getvalue())
class TestCharacterEncoding(ApacheDEXTestCase):
......@@ -48,7 +64,7 @@ class TestCharacterEncoding(ApacheDEXTestCase):
fin.flush()
sys.argv = ['apachedex', '--base=/', fin.name, '-f', 'json', '-o', fout.name]
apachedex.main()
self.assertNotIn('Malformed line', sys.stderr.getvalue())
self.assertNotIn(b'Malformed line', self._stderr_bytes.getvalue())
with open(fout.name) as f:
self.assertTrue(json.load(f))
......@@ -74,7 +90,7 @@ class EncodedInputTestMixin:
fin.flush()
sys.argv = ['apachedex', '--base=/', fin.name, '-f', 'json', '-o', fout.name]
apachedex.main()
self.assertNotIn('Malformed line', sys.stderr.getvalue())
self.assertNotIn(b'Malformed line', self._stderr_bytes.getvalue())
with open(fout.name) as f:
self.assertTrue(json.load(f))
......@@ -86,20 +102,15 @@ class TestBzip2Encoding(ApacheDEXTestCase, EncodedInputTestMixin):
class TestZlibEncoding(ApacheDEXTestCase, EncodedInputTestMixin):
def _getInputData(self):
f = StringIO()
f = io.BytesIO()
with gzip.GzipFile(mode="w", fileobj=f) as gzfile:
gzfile.write(self.DEFAULT_LINE)
return f.getvalue()
if lzma is not None:
class TestLzmaEncoding(ApacheDEXTestCase, EncodedInputTestMixin):
def _getInputData(self):
return lzma.compress(self.DEFAULT_LINE)
else:
class TestLzmaEncoding(ApacheDEXTestCase):
def test(self):
self.skipTest("lzma not available")
class TestLzmaEncoding(ApacheDEXTestCase, EncodedInputTestMixin):
def _getInputData(self):
return lzma.compress(self.DEFAULT_LINE)
class TestTimeEnconding(ApacheDEXTestCase):
......@@ -107,7 +118,7 @@ class TestTimeEnconding(ApacheDEXTestCase):
def test_seconds_timing(self):
with tempfile.NamedTemporaryFile() as fout:
sys.argv = ['apachedex', '--base=/', '-', '--logformat', '%h %l %u %t "%r" %>s %O "%{Referer}i" "%{User-Agent}i" %T', '-f', 'json', '-o', fout.name]
sys.stdin = StringIO(
sys.stdin = io.StringIO(
'''127.0.0.1 - - [14/Jul/2017:09:41:41 +0200] "GET / HTTP/1.1" 200 7499 "https://example.org/" "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36" 1''')
apachedex.main()
......@@ -119,7 +130,7 @@ class TestTimeEnconding(ApacheDEXTestCase):
def test_milliseconds_timing(self):
with tempfile.NamedTemporaryFile() as fout:
sys.argv = ['apachedex', '--base=/', '-', '--logformat', '%h %l %u %t "%r" %>s %O "%{Referer}i" "%{User-Agent}i" %D', '-f', 'json', '-o', fout.name]
sys.stdin = StringIO(
sys.stdin = io.StringIO(
'''127.0.0.1 - - [14/Jul/2017:09:41:41 +0200] "GET / HTTP/1.1" 200 7499 "https://example.org/" "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36" 1000000''')
apachedex.main()
......@@ -131,7 +142,7 @@ class TestTimeEnconding(ApacheDEXTestCase):
def test_microseconds_timing(self):
with tempfile.NamedTemporaryFile() as fout:
sys.argv = ['apachedex', '--base=/', '-', '--logformat', '%h %l %u %t "%r" %>s %O "%{Referer}i" "%{User-Agent}i" %{ms}T', '-f', 'json', '-o', fout.name]
sys.stdin = StringIO(
sys.stdin = io.StringIO(
'''127.0.0.1 - - [14/Jul/2017:09:41:41 +0200] "GET / HTTP/1.1" 200 7499 "https://example.org/" "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36" 1000
''')
......
......@@ -64,14 +64,13 @@ setup(
long_description=".. contents::\n\n" + description,
author='Vincent Pelletier',
author_email='vincent@nexedi.com',
url='http://git.erp5.org/gitweb/apachedex.git',
url='https://lab.nexedi.com/nexedi/apachedex.git',
license='GPL 2+',
platforms=['any'],
classifiers=[
'Intended Audience :: Developers',
'License :: OSI Approved :: GNU General Public License v2 or later (GPLv2+)',
'Operating System :: OS Independent',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: Implementation :: PyPy',
'Programming Language :: Python :: Implementation :: CPython',
......@@ -90,5 +89,4 @@ setup(
},
test_suite='apachedex.tests',
zip_safe=True,
use_2to3=True,
)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment