update for python3 >= 3.9

drop support for python 2

update for python3 >= 3.9
drop support for python 2
6695817c · Jérome Perrin · 943a005d · 6695817c · 6695817c · 6695817c
Commit 6695817c authored Dec 19, 2023 by Jérome Perrin
Hide whitespace changes
Inline Side-by-side

Showing with 119 additions and 144 deletions

apachedex/__init__.py apachedex/__init__.py +89 -120

apachedex/tests.py apachedex/tests.py +29 -21

setup.py setup.py +1 -3

No files found.
--- a/apachedex/__init__.py
+++ b/apachedex/__init__.py
@@ -26,23 +26,23 @@
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 #
 ##############################################################################
-from __future__ import print_function, division, absolute_import, \
-  unicode_literals
-from cgi import escape
+
+from html import escape
 from collections import defaultdict, Counter
 from datetime import datetime, timedelta, date, tzinfo
 from functools import partial
 from operator import itemgetter
-from urllib import splittype, splithost
+from urllib.parse import splittype, splithost
 import argparse
 import bz2
 import calendar
 import codecs
 import functools
 import gzip
-import httplib
+import http.client
 import itertools
 import json
+import lzma
 import math
 import os
 import pkgutil
@@ -61,47 +61,16 @@ except ImportError:
 def getResource(name, encoding='utf-8'):
  return pkgutil.get_data(__name__, name).decode(encoding)

-def _wrapOpen(func):
-  @functools.wraps(func)
-  def wrapper(*args, **kw):
-    encoding = kw.pop('encoding', None)
-    info = codecs.lookup(encoding)
-    errors = kw.pop('errors', 'strict')
-    file_object = func(*args, **kw)
-    if encoding is None:
-      return file_object
-    srw = codecs.StreamReaderWriter(
-      file_object,
-      info.streamreader,
-      info.streamwriter,
-      errors,
-    )
-    srw.encoding = encoding
-    return srw
-  return wrapper

 gzip_open = gzip.open
-if sys.version_info >= (3, 3):
-  import lzma
-  lzma_open = lzma.open
-  bz2_open = bz2.open
-  _read_mode = 'rt'
-else:
-  gzip_open = _wrapOpen(gzip_open)
-  bz2_open = _wrapOpen(bz2.BZ2File)
-  _read_mode = 'r'
-  try:
-    from backports import lzma
-    lzma_open = _wrapOpen(lzma.open)
-  except ImportError:
-    lzma = None
+lzma_open = lzma.open
+bz2_open = bz2.open

 FILE_OPENER_LIST = [
  (gzip_open, IOError),
  (bz2_open, IOError),
+  (lzma_open, lzma.LZMAError)
 ]
-if lzma is not None:
-  FILE_OPENER_LIST.append((lzma_open, lzma.LZMAError))

 # XXX: what encoding ? apache doesn't document one, but requests are supposed
 # to be urlencoded, so pure ascii. Are timestamps localised ?
@@ -131,7 +100,7 @@ AUTO_PERIOD_COEF = 200
 LARGER_THAN_INTEGER_STR = 'A'
 SMALLER_THAN_INTEGER_STR = ''

-HTTP_STATUS_CAPTION_DICT = httplib.responses.copy()
+HTTP_STATUS_CAPTION_DICT = http.client.responses.copy()
 # Non-standard status codes
 HTTP_STATUS_CAPTION_DICT.setdefault(499, 'Client Closed Request')
 HTTP_STATUS_CAPTION_DICT.setdefault(444, 'No Response')
@@ -153,9 +122,9 @@ def getClassForStatusHit(hit, status):

 def getDataPoints(apdex_dict, status_period_dict={}):
  period_error_dict = defaultdict(int)
-  for status, period_dict in status_period_dict.iteritems():
+  for status, period_dict in status_period_dict.items():
    if statusIsError(status):
-      for period, hit in period_dict.iteritems():
+      for period, hit in period_dict.items():
        period_error_dict[period] += hit
  # If there was an error, there was a hit, and apdex_dict must contain it
  # (at same date).
@@ -166,7 +135,7 @@ def getDataPoints(apdex_dict, status_period_dict={}):
      apdex.getApdex() * 100,
      apdex.hit,
      period_error_dict.get(value_date, 0),
-    ) for value_date, apdex in sorted(apdex_dict.iteritems(), key=ITEMGETTER0)
+    ) for value_date, apdex in sorted(apdex_dict.items(), key=ITEMGETTER0)
  ]

 def prepareDataForGraph(daily_data, date_format, placeholder_delta,
@@ -202,7 +171,7 @@ def graphPair(daily_data, date_format, graph_period, apdex_y_min=None,
  yLabelWidth = max(int(math.log10(max(x[2] for x in daily_data))) + 1,
    3) * 6
  return graph('apdex',
-    [zip(date_list, (round(x[1], 2) for x in daily_data))],
+    [list(zip(date_list, (round(x[1], 2) for x in daily_data)))],
    {
      'xaxis': {
        'mode': 'time',
@@ -225,12 +194,12 @@ def graphPair(daily_data, date_format, graph_period, apdex_y_min=None,
    [
      {
        'label': 'Errors',
-        'data': zip(date_list, (x[3] for x in daily_data)),
+        'data': list(zip(date_list, (x[3] for x in daily_data))),
        'color': 'red',
      },
      {
        'label': 'Hits',
-        'data': zip(date_list, (x[2] for x in daily_data)),
+        'data': list(zip(date_list, (x[2] for x in daily_data))),
      },
    ],
    {
@@ -326,7 +295,7 @@ class APDEXStats(object):
      extra_class = ''
      apdex_style = 'color: #%s; background-color: #%s' % (
        (apdex < .5 and 'f' or '0') * 3,
-        ('%x' % (apdex * 0xf)) * 3,
+        ('%x' % int(apdex * 0xf)) * 3,
      )
    else:
      extra_class = 'no_hit'
@@ -363,7 +332,7 @@ class APDEXStats(object):
    return result

 _APDEXDateDictAsJSONState = lambda date_dict: dict(((y, z.asJSONState())
-  for y, z in date_dict.iteritems()))
+  for y, z in date_dict.items()))

 class GenericSiteStats(object):
  def __init__(self, threshold, getDuration, suffix, error_detail=False,
@@ -383,13 +352,13 @@ class GenericSiteStats(object):
    self.user_agent_counter = Counter()

  def rescale(self, convert, getDuration):
-    for status, date_dict in self.status.iteritems():
+    for status, date_dict in self.status.items():
      new_date_dict = defaultdict(int)
-      for value_date, status_count in date_dict.iteritems():
+      for value_date, status_count in date_dict.items():
        new_date_dict[convert(value_date)] += status_count
      self.status[status] = new_date_dict
    new_apdex = defaultdict(partial(APDEXStats, self.threshold, getDuration))
-    for value_date, data in self.apdex.iteritems():
+    for value_date, data in self.apdex.items():
      new_apdex[convert(value_date)].accumulateFrom(data)
    self.apdex = new_apdex

@@ -422,7 +391,7 @@ class GenericSiteStats(object):
    result = []
    append = result.append
    apdex = APDEXStats(self.threshold, None)
-    for data in self.apdex.itervalues():
+    for data in self.apdex.values():
      apdex.accumulateFrom(data)
    append('<h2>Overall</h2><table class="stats"><tr>')
    append(APDEXStats.asHTMLHeader())
@@ -431,7 +400,7 @@ class GenericSiteStats(object):
    append('</tr></table><h2>Hottest pages</h2><table class="stats"><tr>')
    append(APDEXStats.asHTMLHeader())
    append('<th>url</th></tr>')
-    for url, data in sorted(self.url_apdex.iteritems(), key=lambda x: x[1].getAverage() * x[1].hit,
+    for url, data in sorted(self.url_apdex.items(), key=lambda x: x[1].getAverage() * x[1].hit,
        reverse=True)[:n_hottest_pages]:
      append('<tr>')
      append(data.asHTML(self.threshold))
@@ -445,9 +414,9 @@ class GenericSiteStats(object):
      append('</table>')
    column_set = set()
    filtered_status = defaultdict(partial(defaultdict, int))
-    for status, date_dict in self.status.iteritems():
+    for status, date_dict in self.status.items():
      filtered_date_dict = filtered_status[status]
-      for value_date, value in date_dict.iteritems():
+      for value_date, value in date_dict.items():
        filtered_date_dict[stat_filter(value_date)] += value
      column_set.update(filtered_date_dict)
    column_list = sorted(column_set)
@@ -466,25 +435,24 @@ class GenericSiteStats(object):
      else:
        return '<abbr title="%s">%s</abbr>' % (definition, status)
    has_errors = False
-    for status, data_dict in sorted(filtered_status.iteritems(),
-        key=ITEMGETTER0):
+    for status, data_dict in sorted(filtered_status.items(), key=ITEMGETTER0):
      has_errors |= statusIsError(status)
      append('<tr title="%s"><th>%s</th>' % (status, statusAsHtml(status)))
-      append(hitTd(sum(data_dict.itervalues()), status))
+      append(hitTd(sum(data_dict.values()), status))
      for column in column_list:
        append(hitTd(data_dict[column], status))
      append('</tr>')
    append('</table>')
    if self.error_detail and has_errors:
      def getHitForUrl(referer_counter):
-        return sum(referer_counter.itervalues())
+        return sum(referer_counter.values())
      filtered_status_url = defaultdict(partial(defaultdict, dict))
-      for status, url_dict in self.error_url_count.iteritems():
-        filtered_status_url[status] = sorted(url_dict.iteritems(),
+      for status, url_dict in self.error_url_count.items():
+        filtered_status_url[status] = sorted(url_dict.items(),
          key=lambda x: getHitForUrl(x[1]), reverse=True)[:N_ERROR_URL]
      append('<h3>Error detail</h3><table class="stats"><tr><th>status</th>'
        '<th>hits</th><th>url</th><th>referers</th></tr>')
-      for status, url_list in sorted(filtered_status_url.iteritems(),
+      for status, url_list in sorted(filtered_status_url.items(),
          key=ITEMGETTER0):
        append('<tr><th rowspan="%s">%s</th>' % (len(url_list),
          statusAsHtml(status)))
@@ -513,16 +481,16 @@ class GenericSiteStats(object):
      state.get('user_agent_detail', True))
    if error_detail:
      error_url_count = result.error_url_count
-      for state_status, state_url_dict in state['error_url_count'].iteritems():
+      for state_status, state_url_dict in state['error_url_count'].items():
        url_dict = error_url_count[state_status]
-        for url, counter in state_url_dict.iteritems():
+        for url, counter in state_url_dict.items():
          url_dict[url].update(counter)
    for attribute_id in ('url_apdex', 'apdex'):
      attribute = getattr(result, attribute_id)
-      for key, apdex_state in state[attribute_id].iteritems():
+      for key, apdex_state in state[attribute_id].items():
        attribute[key] = APDEXStats.fromJSONState(apdex_state, getDuration)
    status = result.status
-    for status_code, date_dict in state['status'].iteritems():
+    for status_code, date_dict in state['status'].items():
      status[status_code].update(date_dict)
    result.user_agent_counter.update(state['user_agent_counter'])
    return result
@@ -544,18 +512,18 @@ class GenericSiteStats(object):
    # user_agent_detail.
    # Assuming they are consistently set.
    if self.error_detail:
-      for status, other_url_dict in other.error_url_count.iteritems():
+      for status, other_url_dict in other.error_url_count.items():
        url_dict = self.error_url_count[status]
-        for url, referer_counter in other_url_dict.iteritems():
+        for url, referer_counter in other_url_dict.items():
          url_dict[url].update(referer_counter)
    for attribute_id in ('url_apdex', 'apdex'):
      self_attribute = getattr(self, attribute_id)
-      for key, apdex_data in getattr(other, attribute_id).iteritems():
+      for key, apdex_data in getattr(other, attribute_id).items():
        self_attribute[key].accumulateFrom(apdex_data)
    status = self.status
-    for status_code, other_date_dict in other.status.iteritems():
+    for status_code, other_date_dict in other.status.items():
      date_dict = status[status_code]
-      for status_date, count in other_date_dict.iteritems():
+      for status_date, count in other_date_dict.items():
        date_dict[status_date] += count
    self.user_agent_counter.update(other.user_agent_counter)

@@ -594,21 +562,21 @@ class ERP5SiteStats(GenericSiteStats):
  def rescale(self, convert, getDuration):
    super(ERP5SiteStats, self).rescale(convert, getDuration)
    threshold = self.threshold
-    for document_dict in self.module.itervalues():
-      for is_document, date_dict in document_dict.iteritems():
+    for document_dict in self.module.values():
+      for is_document, date_dict in document_dict.items():
        new_date_dict = defaultdict(partial(APDEXStats, threshold, getDuration))
-        for value_date, data in date_dict.iteritems():
+        for value_date, data in date_dict.items():
          new_date_dict[convert(value_date)].accumulateFrom(data)
        document_dict[is_document] = new_date_dict

-    for id_, date_dict in self.no_module.iteritems():
+    for id_, date_dict in self.no_module.items():
      new_date_dict = defaultdict(partial(APDEXStats, threshold, getDuration))
-      for value_date, data in date_dict.iteritems():
+      for value_date, data in date_dict.items():
        new_date_dict[convert(value_date)].accumulateFrom(data)
      self.no_module[id_] = new_date_dict

    attribute = defaultdict(partial(APDEXStats, threshold, getDuration))
-    for value_date, data in self.site_search.iteritems():
+    for value_date, data in self.site_search.items():
      attribute[convert(value_date)].accumulateFrom(data)
    self.site_search = attribute

@@ -647,23 +615,23 @@ class ERP5SiteStats(GenericSiteStats):
    filtered_no_module = defaultdict(partial(
      defaultdict, partial(APDEXStats, self.threshold, None)))
    column_set = set()
-    for key, data_dict in self.no_module.iteritems():
+    for key, data_dict in self.no_module.items():
      filtered_id_dict = filtered_no_module[key]
-      for value_date, value in data_dict.iteritems():
+      for value_date, value in data_dict.items():
        filtered_id_dict[stat_filter(value_date)].accumulateFrom(value)
        other_overall.accumulateFrom(value)
      column_set.update(filtered_id_dict)
    filtered_site_search = defaultdict(partial(APDEXStats, self.threshold,
      None))
-    for value_date, value in self.site_search.iteritems():
+    for value_date, value in self.site_search.items():
      filtered_site_search[stat_filter(value_date)].accumulateFrom(value)
    column_set.update(filtered_site_search)
-    for key, is_document_dict in self.module.iteritems():
+    for key, is_document_dict in self.module.items():
      filtered_is_document_dict = filtered_module[key]
-      for key, data_dict in is_document_dict.iteritems():
+      for key, data_dict in is_document_dict.items():
        filtered_data_dict = filtered_is_document_dict[key]
        module_document_apdex = module_document_overall[key]
-        for value_date, value in data_dict.iteritems():
+        for value_date, value in data_dict.items():
          filtered_data_dict[stat_filter(value_date)].accumulateFrom(value)
          module_document_apdex.accumulateFrom(value)
        column_set.update(filtered_data_dict)
@@ -671,12 +639,12 @@ class ERP5SiteStats(GenericSiteStats):
    for column in column_list:
      append('<th colspan="4">%s</th>' % column)
    append('</tr><tr>')
-    for i in xrange(len(column_list) + 1):
+    for i in range(len(column_list) + 1):
      append(APDEXStats.asHTMLHeader(i == 0))
    append('</tr>')
    def apdexAsColumns(data_dict):
      data_total = APDEXStats(self.threshold, None)
-      for data in data_dict.itervalues():
+      for data in data_dict.values():
        data_total.accumulateFrom(data)
      append(data_total.asHTML(self.threshold, True))
      for column in column_list:
@@ -711,8 +679,7 @@ class ERP5SiteStats(GenericSiteStats):
        ))
        append('</div></div>')
      append('</td>')
-    for module_id, data_dict in sorted(filtered_module.iteritems(),
-        key=ITEMGETTER0):
+    for module_id, data_dict in sorted(filtered_module.items(), key=ITEMGETTER0):
      append('<tr class="group_top" title="%s (module)"><th rowspan="2">%s</th>'
        '<th>module</th>' % (module_id, module_id))
      hiddenGraph(self.module[module_id][False], module_id + ' (module)')
@@ -726,7 +693,7 @@ class ERP5SiteStats(GenericSiteStats):
    hiddenGraph(self.site_search, 'site search')
    site_search_overall = apdexAsColumns(filtered_site_search)
    append('</tr>')
-    for id_, date_dict in sorted(filtered_no_module.iteritems()):
+    for id_, date_dict in sorted(filtered_no_module.items()):
      append('<tr class="group_top group_bottom" title="%s"><th colspan="2">%s</th>'
             % (id_, id_))
      hiddenGraph(self.no_module[id_], id_)
@@ -758,20 +725,20 @@ class ERP5SiteStats(GenericSiteStats):
  @classmethod
  def fromJSONState(cls, state, getDuration, suffix):
    result = super(ERP5SiteStats, cls).fromJSONState(state, getDuration, suffix)
-    for module_id, module_dict_state in state['module'].iteritems():
+    for module_id, module_dict_state in state['module'].items():
      module_dict = result.module[module_id]
-      for is_document, date_dict_state in module_dict_state.iteritems():
+      for is_document, date_dict_state in module_dict_state.items():
        date_dict = module_dict[is_document == 'true']
-        for value_date, apdex_state in date_dict_state.iteritems():
+        for value_date, apdex_state in date_dict_state.items():
          date_dict[value_date] = APDEXStats.fromJSONState(apdex_state, getDuration)

-    for id_, date_dict in state['no_module'].iteritems():
+    for id_, date_dict in state['no_module'].items():
      no_module_dict = result.no_module[id_]
-      for value_date, apdex_state in date_dict.iteritems():
+      for value_date, apdex_state in date_dict.items():
        no_module_dict[value_date] = APDEXStats.fromJSONState(
            apdex_state, getDuration)

-    for value_date, apdex_state in state['site_search'].iteritems():
+    for value_date, apdex_state in state['site_search'].items():
      result.site_search[value_date] = APDEXStats.fromJSONState(
        apdex_state, getDuration)

@@ -780,13 +747,13 @@ class ERP5SiteStats(GenericSiteStats):
  def asJSONState(self):
    result = super(ERP5SiteStats, self).asJSONState()
    result['module'] = module = {}
-    for module_id, module_dict in self.module.iteritems():
+    for module_id, module_dict in self.module.items():
      module_dict_state = module[module_id] = {}
-      for is_document, date_dict in module_dict.iteritems():
+      for is_document, date_dict in module_dict.items():
        module_dict_state[is_document] = _APDEXDateDictAsJSONState(date_dict)

    result['no_module'] = no_module = {}
-    for id_, date_dict in self.no_module.iteritems():
+    for id_, date_dict in self.no_module.items():
      no_module[id_] = _APDEXDateDictAsJSONState(date_dict)

    result['site_search'] = _APDEXDateDictAsJSONState(self.site_search)
@@ -795,20 +762,20 @@ class ERP5SiteStats(GenericSiteStats):
  def accumulateFrom(self, other):
    super(ERP5SiteStats, self).accumulateFrom(other)
    module = self.module
-    for module_id, other_module_dict in other.module.iteritems():
+    for module_id, other_module_dict in other.module.items():
      module_dict = module[module_id]
-      for is_document, other_date_dict in other_module_dict.iteritems():
+      for is_document, other_date_dict in other_module_dict.items():
        date_dict = module_dict[is_document]
-        for value_date, apdex in other_date_dict.iteritems():
+        for value_date, apdex in other_date_dict.items():
          date_dict[value_date].accumulateFrom(apdex)

-    for id_, other_date_dict in other.no_module.iteritems():
+    for id_, other_date_dict in other.no_module.items():
      date_dict = self.no_module[id_]
-      for value_date, apdex in other_date_dict.iteritems():
+      for value_date, apdex in other_date_dict.items():
        date_dict.accumulateFrom(apdex)

    attribute = self.site_search
-    for value_date, apdex in other.site_search.iteritems():
+    for value_date, apdex in other.site_search.items():
      attribute[value_date].accumulateFrom(apdex)

 DURATION_US_FORMAT = '%D'
@@ -861,7 +828,7 @@ class AggregateSiteUrl(argparse.Action):
  def __call__(self, parser, namespace, values, option_string=None):
    action = base_action = self.__argument_to_aggregator[option_string]
    site_list, site_caption_dict = getattr(namespace, self.dest)
-    next_value = iter(values).next
+    next_value = iter(values).__next__
    while True:
      try:
        value = next_value()
@@ -917,7 +884,7 @@ class ShlexArgumentParser(argparse.ArgumentParser):
              shlex.split(in_file.read(), comments=True),
              new_cwd,
            ))
-        except IOError, exc:
+        except IOError as exc:
          self.error(str(exc))
      else:
        append(arg)
@@ -1125,7 +1092,7 @@ def asHTML(out, encoding, per_site, args, default_site, period_parameter_dict,
  apdex_y_scale = apdex_y_scale_dict[args.apdex_yscale]
  hit_y_scale = hit_y_scale_dict[args.hit_yscale]
  out.write('</head><body><h1>Overall</h1>')
-  site_list = list(enumerate(sorted(per_site.iteritems(),
+  site_list = list(enumerate(sorted(per_site.items(),
    key=lambda x: site_caption_dict[x[0]])))
  html_site_caption_dict = {}
  for i, (site_id, _) in site_list:
@@ -1149,7 +1116,7 @@ def asHTML(out, encoding, per_site, args, default_site, period_parameter_dict,
  hit_per_day = defaultdict(int)
  x_min = LARGER_THAN_INTEGER_STR
  x_max = SMALLER_THAN_INTEGER_STR
-  for site_data in per_site.itervalues():
+  for site_data in per_site.values():
    apdex_data_list = site_data.getApdexData()
    if apdex_data_list:
      x_min = min(x_min, apdex_data_list[0][0])
@@ -1159,7 +1126,7 @@ def asHTML(out, encoding, per_site, args, default_site, period_parameter_dict,
  if x_min == LARGER_THAN_INTEGER_STR:
    x_min = None
    x_max = None
-  for hit_date, hit in sorted(hit_per_day.iteritems(), key=ITEMGETTER0):
+  for hit_date, hit in sorted(hit_per_day.items(), key=ITEMGETTER0):
    out.write('<tr><td>%s</td><td>%s</td></tr>' % (hit_date, hit))
  out.write('</table>')
  n_hottest_pages = args.n_hottest_pages
@@ -1230,7 +1197,7 @@ def asHTML(out, encoding, per_site, args, default_site, period_parameter_dict,
  out.write('</body></html>')

 def asJSON(out, encoding, per_site, *_):
-  json.dump([(x, y.asJSONState()) for x, y in per_site.iteritems()], out)
+  json.dump([(x, y.asJSONState()) for x, y in per_site.items()], out)

 format_generator = {
  'html': (asHTML, 'utf-8'),
@@ -1365,7 +1332,7 @@ def main():
  group.add_argument('--erp5-base', dest='path', nargs='+',
    action=AggregateSiteUrl,
    help='Similar to --base, but with specialised statistics. Ex: '
-    '"/erp5(/|$|\?)"')
+    '"/erp5(/|$|\\?)"')
  group.add_argument('--skip-base', dest='path', nargs='+',
    action=AggregateSiteUrl,
    help='Absolute base url(s) to ignore.')
@@ -1409,7 +1376,7 @@ def main():
  line_regex = ''
  expensive_line_regex = ''
  try:
-    n = iter(args.logformat).next
+    n = iter(args.logformat).__next__
    while True:
      key = None
      expensive_char = char = n()
@@ -1436,7 +1403,7 @@ def main():
  matchrequest = REQUEST_PATTERN.match
  if args.period is None:
    next_period_data = ((x, y[4] * AUTO_PERIOD_COEF) for (x, y) in
-      sorted(period_parser.iteritems(), key=lambda x: x[1][4])).next
+      sorted(period_parser.items(), key=lambda x: x[1][4])).__next__
    period, to_next_period = next_period_data()
    original_period = period
    earliest_date = latest_date = None
@@ -1540,7 +1507,7 @@ def main():
      logfile = sys.stdin
    else:
      for opener, exc in FILE_OPENER_LIST:
-        logfile = opener(filename, _read_mode, encoding=INPUT_ENCODING, errors=INPUT_ENCODING_ERROR_HANDLER)
+        logfile = opener(filename, 'rt', encoding=INPUT_ENCODING, errors=INPUT_ENCODING_ERROR_HANDLER)
        try:
          logfile.readline()
        except exc:
@@ -1549,7 +1516,7 @@ def main():
          logfile.seek(0)
          break
      else:
-        logfile = codecs.open(filename, _read_mode, encoding=INPUT_ENCODING, errors=INPUT_ENCODING_ERROR_HANDLER)
+        logfile = open(filename, 'r', encoding=INPUT_ENCODING, errors=INPUT_ENCODING_ERROR_HANDLER)
    lineno = 0
    for lineno, line in enumerate(logfile, 1):
      if show_progress and lineno % 5000 == 0:
@@ -1572,7 +1539,7 @@ def main():
        no_url_lines += 1
        continue
      url = url_match.group('url')
-      if url.startswith(b'http'):
+      if url.startswith('http'):
        url = splithost(splittype(url)[1])[1]
      url = get_url_prefix(match, url)
      for site, prefix_match, action in site_list:
@@ -1608,7 +1575,7 @@ def main():
          latest_date = rescale(latest_date)
          earliest_date = rescale(earliest_date)
          period_increase_start = time.time()
-          for site_data in per_site.itervalues():
+          for site_data in per_site.values():
            site_data.rescale(rescale, getDuration)
          if show_progress:
            print('done (%s)' % timedelta(seconds=time.time()
@@ -1633,9 +1600,10 @@ def main():
  end_parsing_time = time.time()
  generator, out_encoding = format_generator[args.format]
  if args.out == '-':
-    out = codecs.getwriter(out_encoding)(sys.stdout)
+    out = sys.stdout
+    out.reconfigure(encoding=out_encoding)
  else:
-    out = codecs.open(args.out, 'w', encoding=out_encoding)
+    out = open(args.out, 'w', encoding=out_encoding)
  with out:
    generator(out, out_encoding, per_site, args, default_site, {
        'period': period,
@@ -1662,10 +1630,11 @@ def main():
 if __name__ == '__main__':
  __resource_base = os.path.join(*os.path.split(__file__)[:-1])
  def getResource(name, encoding='utf-8'):
-    return codecs.open(
+    with open(
      os.path.join(__resource_base, name),
      encoding=encoding,
-    ).read()
+    ) as f:
+      return f.read()

  main()


--- a/apachedex/tests.py
+++ b/apachedex/tests.py
@@ -3,10 +3,10 @@ import sys
 import json
 import bz2
 import gzip
-from StringIO import StringIO
+import io
 import tempfile
 import apachedex
-from . import lzma
+import lzma


 class ApacheDEXTestCase(unittest.TestCase):
@@ -15,8 +15,10 @@ class ApacheDEXTestCase(unittest.TestCase):
    self._original_sys_stdin = sys.stdin
    self._original_sys_stderr = sys.stderr
    self._original_sys_stdout = sys.stdout
-    sys.stderr = StringIO()
-    sys.stdout = StringIO()
+    self._stderr_bytes = io.BytesIO()
+    sys.stderr = io.TextIOWrapper(self._stderr_bytes, write_through=True)
+    self._stdout_bytes = io.BytesIO()
+    sys.stdout = io.TextIOWrapper(self._stdout_bytes, write_through=True)

  def tearDown(self):
    sys.argv = self._original_sys_argv
@@ -25,17 +27,28 @@ class ApacheDEXTestCase(unittest.TestCase):
    sys.stdout = self._original_sys_stdout


+class TestFiles(ApacheDEXTestCase):
+  def test(self):
+    with tempfile.NamedTemporaryFile() as fin, tempfile.NamedTemporaryFile() as fout:
+      fin.write(
+        b'''127.0.0.1 - - [14/Jul/2017:09:41:41 +0200] "GET / HTTP/1.1" 200 7499 "https://example.org/" "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36" 1754'''
+      )
+      fin.flush()
+      sys.argv = ['apachedex', '--base=/', fin.name, '--out', fout.name]
+      apachedex.main()
+
+
 class TestMalformedInput(ApacheDEXTestCase):
  def test_timestamp_mixed_in_timestamp(self):
    sys.argv = ['apachedex', '--base=/', '-']
-    sys.stdin = StringIO(
+    sys.stdin = io.StringIO(
    # this first line is valid, but second is not
    '''127.0.0.1 - - [14/Jul/2017:09:41:41 +0200] "GET / HTTP/1.1" 200 7499 "https://example.org/" "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36" 1754
 127.0.0.1 - - [14/Jul/2017:127.0.0.1 - - [14/Jul/2017:09:41:41 +0200] "GET / HTTP/1.1" 200 7499 "https://example.org/" "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36" 1754''')
    apachedex.main()

-    self.assertNotIn('Malformed line at -:1', sys.stderr.getvalue())
-    self.assertIn('Malformed line at -:2', sys.stderr.getvalue())
+    self.assertNotIn(b'Malformed line at -:1', self._stderr_bytes.getvalue())
+    self.assertIn(b'Malformed line at -:2', self._stderr_bytes.getvalue())


 class TestCharacterEncoding(ApacheDEXTestCase):
@@ -48,7 +61,7 @@ class TestCharacterEncoding(ApacheDEXTestCase):
      fin.flush()
      sys.argv = ['apachedex', '--base=/', fin.name, '-f', 'json', '-o', fout.name]
      apachedex.main()
-      self.assertNotIn('Malformed line', sys.stderr.getvalue())
+      self.assertNotIn(b'Malformed line', self._stderr_bytes.getvalue())
      with open(fout.name) as f:
        self.assertTrue(json.load(f))

@@ -74,7 +87,7 @@ class EncodedInputTestMixin:
      fin.flush()
      sys.argv = ['apachedex', '--base=/', fin.name, '-f', 'json', '-o', fout.name]
      apachedex.main()
-      self.assertNotIn('Malformed line', sys.stderr.getvalue())
+      self.assertNotIn(b'Malformed line', self._stderr_bytes.getvalue())
      with open(fout.name) as f:
        self.assertTrue(json.load(f))

@@ -86,20 +99,15 @@ class TestBzip2Encoding(ApacheDEXTestCase, EncodedInputTestMixin):

 class TestZlibEncoding(ApacheDEXTestCase, EncodedInputTestMixin):
  def _getInputData(self):
-    f = StringIO()
+    f = io.BytesIO()
    with gzip.GzipFile(mode="w", fileobj=f) as gzfile:
      gzfile.write(self.DEFAULT_LINE)
    return f.getvalue()


-if lzma is not None:
-  class TestLzmaEncoding(ApacheDEXTestCase, EncodedInputTestMixin):
-    def _getInputData(self):
-      return lzma.compress(self.DEFAULT_LINE)
-else:
-  class TestLzmaEncoding(ApacheDEXTestCase):
-    def test(self):
-      self.skipTest("lzma not available")
+class TestLzmaEncoding(ApacheDEXTestCase, EncodedInputTestMixin):
+  def _getInputData(self):
+    return lzma.compress(self.DEFAULT_LINE)


 class TestTimeEnconding(ApacheDEXTestCase):
@@ -107,7 +115,7 @@ class TestTimeEnconding(ApacheDEXTestCase):
  def test_seconds_timing(self):
    with tempfile.NamedTemporaryFile() as fout:
      sys.argv = ['apachedex', '--base=/', '-', '--logformat', '%h %l %u %t "%r" %>s %O "%{Referer}i" "%{User-Agent}i" %T', '-f', 'json', '-o', fout.name]
-      sys.stdin = StringIO(
+      sys.stdin = io.StringIO(
      '''127.0.0.1 - - [14/Jul/2017:09:41:41 +0200] "GET / HTTP/1.1" 200 7499 "https://example.org/" "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36" 1''')

      apachedex.main()
@@ -119,7 +127,7 @@ class TestTimeEnconding(ApacheDEXTestCase):
  def test_milliseconds_timing(self):
    with tempfile.NamedTemporaryFile() as fout:
      sys.argv = ['apachedex', '--base=/', '-', '--logformat', '%h %l %u %t "%r" %>s %O "%{Referer}i" "%{User-Agent}i" %D', '-f', 'json', '-o', fout.name]
-      sys.stdin = StringIO(
+      sys.stdin = io.StringIO(
      '''127.0.0.1 - - [14/Jul/2017:09:41:41 +0200] "GET / HTTP/1.1" 200 7499 "https://example.org/" "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36" 1000000''')

      apachedex.main()
@@ -131,7 +139,7 @@ class TestTimeEnconding(ApacheDEXTestCase):
  def test_microseconds_timing(self):
    with tempfile.NamedTemporaryFile() as fout:
      sys.argv = ['apachedex', '--base=/', '-', '--logformat', '%h %l %u %t "%r" %>s %O "%{Referer}i" "%{User-Agent}i" %{ms}T', '-f', 'json', '-o', fout.name]
-      sys.stdin = StringIO(
+      sys.stdin = io.StringIO(
      '''127.0.0.1 - - [14/Jul/2017:09:41:41 +0200] "GET / HTTP/1.1" 200 7499 "https://example.org/" "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36" 1000
      ''')


--- a/setup.py
+++ b/setup.py
@@ -64,14 +64,13 @@ setup(
  long_description=".. contents::\n\n" + description,
  author='Vincent Pelletier',
  author_email='vincent@nexedi.com',
-  url='http://git.erp5.org/gitweb/apachedex.git',
+  url='https://lab.nexedi.com/nexedi/apachedex.git',
  license='GPL 2+',
  platforms=['any'],
  classifiers=[
    'Intended Audience :: Developers',
    'License :: OSI Approved :: GNU General Public License v2 or later (GPLv2+)',
    'Operating System :: OS Independent',
-    'Programming Language :: Python :: 2.7',
    'Programming Language :: Python :: 3',
    'Programming Language :: Python :: Implementation :: PyPy',
    'Programming Language :: Python :: Implementation :: CPython',
@@ -90,5 +89,4 @@ setup(
  },
  test_suite='apachedex.tests',
  zip_safe=True,
-  use_2to3=True,
 )