Commit 88f4ef91 authored by Vincent Pelletier's avatar Vincent Pelletier

Initial packaging.

Embed main.js. It's too small to justify doing a python module out of a
command-line executable.
parent ff49a276
This diff is collapsed.
include README
include TODO
include COPYING
include apachedex/jquery*.js
Compute APDEX from Apache-style logs.
Overview
========
Parses Apache-style logs and generates several statistics useful to website
developers:
- APDEX (Application Performance inDEX, see http://www.apdex.org) ratio
(plotted)
- hit count (plotted)
- HTTP status codes, with optional detailed output of the most frequent URLs
per error status code, along with their most frequent referers
- Hottest pages (pages which use rendering time the most)
- ERP5 sites: per-module statistics, with module and document views separated
Some parsing performance figures:
On a 2.3Ghz Corei5, apachedex achieves 97000 lines/s (
pypy-c-jit-62994-bd32583a3f11-linux64) and 43000 lines/s (CPython 2.7).
Requirements
============
Dependencies
------------
As such, apachedex has no dependencies outside of standard python 2.7
installation.
But generated output needs a few javascript files which come from other
projects:
- jquery.js
- jquery.flot.js
- jquery.flot.time.js (official flot plugin)
- jquery.flot.axislabels.js (third-party flot plugin)
If you installed apachedex (using an egg or with a distribution's package) you
should have them already.
If you are running from repository, you need to fetch them first::
python setup.py deps
Input
-----
All default "combined" log format fields are supported (more can easily be
added), plus %D.
Mandatory fields are (in any order) `%t`, `%r` (for request's URL), `%>s`,
`%{Referer}i`, `%D`. Just tell apachedex the value from your apache log
configuration (see `--logformat` argument documentation).
Input files may be provided gzip'ed.
Output
------
The output is HTML + CSS + JS, so you need a web browser to read it.
Usage
=====
A few usage examples. See embedded help (-h/--help) for further options.
Most basic usage::
apachedex --default website access.log
Generate stand-alone output (suitable for inclusion in a mail, for example)::
apachedex --default website --js-embed access.log --out attachment.html
A log file with requests for 2 websites for which individual stats are
desired, and hits outside those base urls are ignored::
apachedex --base /site1 /site2
A log file with a site section to ignore. Order does not matter::
apachedex --skip-base /ignored --default website
A mix of both above examples. Order matters !::
apachedex --skip-base /site1/ignored --base /site1 /site2
- use some templating system instead of hardcoded html strings
- provide some form of raw data output, not just html
- allow user to specify min & max dates
......@@ -26,10 +26,6 @@
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
##############################################################################
# TODO:
# - use some templating system instead of hardcoded html strings
# - provide some form of raw data output, not just html
# - allow user to specify min & max dates
from cgi import escape
from collections import defaultdict, Counter
from datetime import datetime, tzinfo, timedelta
......@@ -44,7 +40,17 @@ import os
import re
import sys
import time
try:
import pkg_resources
except ImportError:
# By default, assume resources are next to __file__
abs_file_container = os.path.abspath(os.path.dirname(__file__))
def getResource(name):
return open(os.path.join(abs_file_container, name)).read()
else:
abs_file_container = None
def getResource(name):
return pkg_resources.resource_string(__name__, name)
MONTH_VALUE_DICT = dict((y, x) for (x, y) in enumerate(('Jan', 'Feb', 'Mar',
'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'), 1))
......@@ -376,6 +382,7 @@ period_parser = {
}
def main():
global abs_file_container
parser = argparse.ArgumentParser(description='Compute Apdex out of '
'apache-style log files')
parser.add_argument('logfile', nargs='+',
......@@ -388,9 +395,6 @@ def main():
help='Filename to write output to. Use - for stdout. Default: %(default)s')
parser.add_argument('-q', '--quiet', action='store_true',
help='Suppress warnings about malformed lines.')
parser.add_argument('--js',
default=os.path.abspath(os.path.dirname(__file__)),
help='Folder containing needed js files. Default: %(default)s')
group = parser.add_argument_group('generated content')
group.add_argument('-a', '--apdex', default=1.0, type=float,
......@@ -403,8 +407,13 @@ def main():
group.add_argument('-s', '--stats', action='store_true',
help='Enable parsing stats (time spent parsing input, time spent '
'generating output, ...)')
group.add_argument('--js-embed', action='store_true',
help='Embed js files instead of linking to them.')
if abs_file_container is not None:
# Force embedding when file container is unknown (ex: pkg_resources).
# XXX: allow when --js is also provided ?
group.add_argument('--js', default=abs_file_container,
help='Folder containing needed js files. Default: %(default)s')
group.add_argument('--js-embed', action='store_true',
help='Embed js files instead of linking to them.')
group = parser.add_argument_group('site matching', 'Earlier arguments take '
'precedence. For example: --skip-base /foo/bar --base /foo generates '
......@@ -423,6 +432,7 @@ def main():
help='Absolute base url(s) to ignore.')
args = parser.parse_args()
abs_file_container = getattr(args, 'js', abs_file_container)
line_regex = ''
try:
n = iter(args.logformat).next
......@@ -538,14 +548,22 @@ def main():
'.flot-x-axis .tickLabel { text-align: center; } '
'</style>')
for script in ('jquery.js', 'jquery.flot.js', 'jquery.flot.time.js',
'jquery.flot.axislabels.js', 'main.js'):
if args.js_embed:
out.write('<script type="text/javascript">//<![CDATA[')
'jquery.flot.axislabels.js'):
if getattr(args, 'js_embed', True):
out.write('<script type="text/javascript">//<![CDATA[\n')
out.write(getResource(script))
out.write('//]]></script>')
out.write('\n//]]></script>')
else:
out.write('<script type="text/javascript" src="%s/%s"></script>' % (
args.js, script))
out.write('<script type="text/javascript">$(function() {'
'$(".graph").each(function (i){'
'$.plot('
'this,'
'$.parseJSON($(this).attr("data-points")),'
'$.parseJSON($(this).attr("data-options")));'
'});'
'});</script>')
out.write('</head><body><h1>Overall</h1><h2>Parameters</h2>'
'<table class="stats">')
for caption, value in (
......
$(function() {
$(".graph").each(function (i){
$.plot(
this,
$.parseJSON($(this).attr('data-points')),
$.parseJSON($(this).attr('data-options')));
});
});
from os.path import join, exists
from setuptools import setup, find_packages
import hashlib
import sys
import urllib
FLOT_SHA = 'aefe4e729b2d14efe6e8c0db359cb0e9aa6aae52'
FLOT_AXISLABELS_SHA = '80453cd7fb8a9cad084cf6b581034ada3339dbf8'
JQUERY_VERSION = '1.9.1'
DEPS = {
'jquery.flot.js': (
'http://raw.github.com/flot/flot/%s/jquery.flot.js' % FLOT_SHA,
'7b599c575f19c33bf0d93a6bbac3af02',
),
'jquery.flot.time.js': (
'http://raw.github.com/flot/flot/%s/jquery.flot.time.js' % FLOT_SHA,
'c0aec1608bf2fbb79f24d1905673e2c3',
),
'jquery.flot.axislabels.js': (
'http://raw.github.com/markrcote/flot-axislabels/%s/'
'jquery.flot.axislabels.js' % FLOT_AXISLABELS_SHA,
'a8526e0c1ed3b5cbc1a6b3ebb22bf334',
),
'jquery.js': (
'http://code.jquery.com/jquery-%s.min.js' % JQUERY_VERSION,
'397754ba49e9e0cf4e7c190da78dda05',
),
}
def download(url, filename, hexdigest):
filename = join('apachedex', filename)
if not exists(filename):
urllib.urlretrieve(url, filename)
if hashlib.md5(open(filename).read()).hexdigest() != hexdigest:
raise EnvironmentError('Checksum mismatch downloading %r' % filename)
for filename, (url, hexdigest) in DEPS.items():
download(url, filename, hexdigest)
# XXX: turn this into a setuptool command ?
if sys.argv[1:] == ['deps']:
sys.exit(0)
description = open('README').read()
setup(
name='APacheDEX',
version='1.0',
description=(x for x in description.splitlines() if x.strip()).next(),
long_description=".. contents::\n\n" + description,
author='Vincent Pelletier',
author_email='vincent@nexedi.com',
url='http://git.erp5.org/gitweb/apachedex.git',
license='GPL 2+',
platforms=['any'],
classifiers=[
'Intended Audience :: Developers',
'License :: OSI Approved :: GNU General Public License v2 or later (GPLv2+)',
'Operating System :: OS Independent',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: Implementation :: PyPy',
'Programming Language :: Python :: Implementation :: CPython',
'Topic :: System :: Logging',
'Topic :: Text Processing :: Filters',
'Topic :: Text Processing :: Markup :: HTML',
],
packages=find_packages(),
entry_points = {
'console_scripts': [
'apachedex=apachedex:main',
],
},
package_data={
'apachedex': DEPS.keys(),
},
zip_safe=True,
)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment