From 1e854294bfadfa019c69fae8049c921bcdf6c8db Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=81ukasz=20Nowak?= <luke@nexedi.com>
Date: Thu, 9 Aug 2012 17:19:19 +0200
Subject: [PATCH] Imported from SVN.

Path https://svn.erp5.org/repos/public/erp5/trunk/utils/timing_log_parser/parse_timing_log.py, revision 34836.
---
 erp5/util/parse_timing_log/__init__.py | 277 +++++++++++++++++++++++++
 1 file changed, 277 insertions(+)
 create mode 100755 erp5/util/parse_timing_log/__init__.py

diff --git a/erp5/util/parse_timing_log/__init__.py b/erp5/util/parse_timing_log/__init__.py
new file mode 100755
index 0000000000..1679598c72
--- /dev/null
+++ b/erp5/util/parse_timing_log/__init__.py
@@ -0,0 +1,277 @@
+#!/usr/bin/python
+##############################################################################
+#
+# Copyright (c) 2009 Nexedi SA and Contributors. All Rights Reserved.
+#                    Vincent Pelletier <vincent@nexedi.com>
+#
+# WARNING: This program as such is intended to be used by professional
+# programmers who take the whole responsability of assessing all potential
+# consequences resulting from its eventual inadequacies and bugs
+# End users who are looking for a ready-to-use solution with commercial
+# garantees and support are strongly adviced to contract a Free Software
+# Service Company
+#
+# This program is Free Software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+#
+##############################################################################
+
+import os
+import sys
+import imp
+import gzip
+import getopt
+from time import time
+
+PROFILING_ENABLED = False
+if PROFILING_ENABLED:
+  from tiny_profiler import profiler_decorator, profiler_report
+else:
+  def profiler_decorator(funct):
+    return funct
+
+  def profiler_report():
+    pass
+
+usage = """
+Usage:
+  parse_timing_log.py [--prefix <prefix>] --config <config> [--debug]
+                      [--no-average] [--sum] [--load <file>] [--save <file>]
+                      [--decimate <int>] [file_1 [file_2 [...]]]
+
+  Either --prefix or --save must be given.
+
+  --prefix <prefix>
+    <prefix> is a string which is used to prefix result file names.
+    If ommited, no CSV will be generated.
+
+  --no-average
+    Disable the generation of CSV files with average values.
+
+  --sum
+    Generate CSV files with time sum as values.
+    They use the same names as average files, suffixed with "_sum.csv"
+    Ignored if --prefix was not given.
+
+  --load <file>
+    Load internal data dict from given file before processing any given file.
+    If it's given multiple time, the content of all those files will be merged.
+
+  --save <file>
+    Save interal data dict to given file after processing all given files.
+
+  --config <config>
+    <config> is a python script defining 2 values:
+      - a method called "processLine"
+      - a compiled regex called "LINE_PATTERN"
+      - a date list sort key computation function called "date_key"
+  
+  --debug
+    Display missed and skipped lines.
+
+  --decimate <int>
+    Instead of generating a line per measure, generate one line per <int>
+    measures.
+    Remain of the integer division of the number of measures per decimate value
+    are all put in latest output line.
+
+  file_1 ...
+    Log files to process.
+    Order in which files are given does not matter.
+    Files can be gzip or plain text.
+
+Output files:
+  CSV, one file per distinct processLine return value, one line per log day,
+  one column per measure.
+  First line contains column titles.
+  First column contains measure date (first recognisable date in current file).
+  Each other cell contains:
+    =<value sum>/<value count>
+  Example:
+    =434/125
+  Which means an average of 3.472s over 125 values.
+  Empty clls means that there are no values for that measure in current file.
+  Strings are surrounded by double quotes (").
+  Fields are sparated by colons (,).
+"""
+
+@profiler_decorator
+def parseFile(filename, measure_dict):
+  date = None
+  line_number = 0
+  match_count = 0
+  skip_count = 0
+  logfile = gzip.open(filename, 'r')
+  try:
+    line = logfile.readline()
+  except IOError:
+    logfile = open(filename, 'r')
+    line = logfile.readline()
+  begin = time()
+  while line != '':
+    line_number += 1
+    if line_number % 5000 == 0:
+      sys.stderr.write('%i\r' % (line_number, ))
+      sys.stderr.flush()
+    match_list = LINE_PATTERN.findall(line)
+    if len(match_list) != 1:
+      print >>sys.stderr, 'Unparseable line: %s:%i %r' % (filename, line_number, line)
+    else:
+      result, filter_id, date, duration = processLine(match_list[0], filename, line_number)
+      # Possible result values & meaning:
+      #  False: try next filter_method
+      #  True: ignore & skip to next line
+      #  (string): use & skip to next line
+      if result is False:
+        if debug:
+          print >>sys.stderr, '? %s:%i %r' % (filename, line_number, match_list[0])
+      elif result is True:
+        if debug:
+          print >>sys.stderr, '- %s:%i %r' % (filename, line_number, match_list[0])
+        skip_count += 1
+      else:
+        measure_dict.setdefault(filter_id, {}).setdefault(result, {}).setdefault(date, []).append(int(duration))
+        match_count += 1
+    line = logfile.readline()
+  print >>sys.stderr, '%i' % (line_number, )
+  if line_number > 0:
+    duration = time() - begin
+    print >>sys.stderr, "Matched %i lines (%.2f%%), %i skipped (%.2f%%), %i unmatched (%.2f%%) in %.2fs (%i lines per second)." % \
+      (match_count, (float(match_count) / line_number) * 100, skip_count, (float(skip_count) / line_number) * 100, (line_number - match_count - skip_count), (1 - (float(match_count + skip_count) / line_number)) * 100, duration, line_number / duration)
+
+debug = False
+outfile_prefix = None
+configuration = None
+do_average = True
+do_sum = False
+load_file_name_list = []
+save_file_name = None
+decimate_count = 1
+
+try:
+  opts, file_list = getopt.getopt(sys.argv[1:], '', ['debug', 'config=', 'prefix=', 'no-average', 'sum', 'load=', 'save=', 'decimate='])
+except Exception, reason:
+  print >>sys.stderr, reason
+  print >>sys.stderr, usage
+  sys.exit(1)
+
+for name, value in opts:
+  if name == '--debug':
+    debug = True
+  elif name == '--config':
+    configuration = value
+  elif name == '--prefix':
+    outfile_prefix = value
+  elif name == '--no-average':
+    do_average = False
+  elif name == '--sum':
+    do_sum = True
+  elif name == '--load':
+    load_file_name_list.append(value)
+  elif name == '--save':
+    save_file_name = value
+  elif name == '--decimate':
+    decimate_count = int(value)
+
+if configuration is None:
+  raise ValueError, '--config is mandatory'
+
+config_file = os.path.splitext(os.path.basename(configuration))[0]
+config_path = [os.path.dirname(os.path.abspath(configuration))] + sys.path
+file, path, description = imp.find_module(config_file, config_path)
+module = imp.load_module(config_file, file, path, description)
+file.close()
+
+processLine = module.processLine
+LINE_PATTERN = module.LINE_PATTERN
+date_key = module.date_key
+
+file_count = len(file_list)
+file_number = 0
+
+measure_dict = {}
+if len(load_file_name_list):
+  for load_file_name in load_file_name_list:
+    load_file = open(load_file_name)
+    temp_measure_dict = eval(load_file.read(), {})
+    load_file.close()
+    assert isinstance(measure_dict, dict)
+    for filter_id, result_dict in temp_measure_dict.iteritems():
+      for result, date_dict in result_dict.iteritems():
+        for date, duration_list in date_dict.iteritems():
+          measure_dict.setdefault(filter_id, {}).setdefault(result, {}).setdefault(date, []).extend(duration_list)
+    print >>sys.stderr, 'Previous processing result restored from %r' % (load_file_name, )
+
+for filename in file_list:
+  file_number += 1
+  print >>sys.stderr, 'Processing %s [%i/%i]...' % (filename, file_number, file_count)
+  parseFile(filename, measure_dict)
+
+if save_file_name is not None:
+  save_file = open(save_file_name, 'w')
+  save_file.write(repr(measure_dict))
+  save_file.close()
+  print >>sys.stderr, 'Processing result saved to %r' % (save_file_name, )
+
+if outfile_prefix is not None:
+  ## Generate a list of all measures and a 2-levels dictionnary with date as key and measure dictionnary as value
+  measure_id_list = []
+  append = measure_id_list.append
+  sheet_dict = {}
+  line_dict = {}
+  for match_id, match_dict in measure_dict.iteritems():
+    for result_id, result_dict in match_dict.iteritems():
+      measure_id = (match_id, result_id)
+      sheet_dict.setdefault(match_id, []).append((result_id, measure_id))
+      append(measure_id)
+      for date, measure_list in result_dict.iteritems():
+        first_level_dict = line_dict.setdefault(date, {})
+        assert measure_id not in first_level_dict
+        first_level_dict[measure_id] = measure_list
+
+  date_list = line_dict.keys()
+  date_list.sort(key=date_key)
+
+  def render_cell(value_list, format):
+    if isinstance(value_list, (list, tuple)):
+      return format % {'sum': sum(value_list), 'count': len(value_list)}
+    else:
+      return value_list
+
+  def renderOutput(data_format, filename_suffix):
+    for sheet_id, sheet_column_list in sheet_dict.iteritems():
+      outfile_name = '%s_%s_%s.csv' % (outfile_prefix, sheet_id, filename_suffix)
+      print >>sys.stderr, 'Writing to %r...' % (outfile_name, )
+      outfile = open(outfile_name, 'w')
+      print >>outfile, '"date",%s' % (','.join(['"%s"' % (x[0], ) for x in sheet_column_list]), )
+      decimate_dict = {}
+      decimate = 0
+      for date in date_list:
+        for key, value in line_dict[date].iteritems():
+          decimate_dict.setdefault(key, []).extend(value)
+        decimate += 1
+        if decimate == decimate_count:
+          print >>outfile, '"%s",%s' % (date, ','.join([render_cell(decimate_dict.get(x[1], ''), data_format) for x in sheet_column_list]))
+          decimate_dict = {}
+          decimate = 0
+      if len(decimate_dict):
+        print >>outfile, '"%s",%s' % (date, ','.join([render_cell(decimate_dict.get(x[1], ''), data_format) for x in sheet_column_list]))
+
+  if do_average:
+    renderOutput('=%(sum)i/%(count)i', 'avg')
+  if do_sum:
+    renderOutput('=%(sum)i', 'sum')
+
+profiler_report()
+
-- 
2.30.9