Commit 88f4ef91 authored by Vincent Pelletier's avatar Vincent Pelletier

Initial packaging.

Embed main.js. It's too small to justify doing a python module out of a
command-line executable.
parent ff49a276
This diff is collapsed.
include README
include TODO
include COPYING
include apachedex/jquery*.js
Compute APDEX from Apache-style logs.
Overview
========
Parses Apache-style logs and generates several statistics useful to website
developers:
- APDEX (Application Performance inDEX, see http://www.apdex.org) ratio
(plotted)
- hit count (plotted)
- HTTP status codes, with optional detailed output of the most frequent URLs
per error status code, along with their most frequent referers
- Hottest pages (pages which use rendering time the most)
- ERP5 sites: per-module statistics, with module and document views separated
Some parsing performance figures:
On a 2.3Ghz Corei5, apachedex achieves 97000 lines/s (
pypy-c-jit-62994-bd32583a3f11-linux64) and 43000 lines/s (CPython 2.7).
Requirements
============
Dependencies
------------
As such, apachedex has no dependencies outside of standard python 2.7
installation.
But generated output needs a few javascript files which come from other
projects:
- jquery.js
- jquery.flot.js
- jquery.flot.time.js (official flot plugin)
- jquery.flot.axislabels.js (third-party flot plugin)
If you installed apachedex (using an egg or with a distribution's package) you
should have them already.
If you are running from repository, you need to fetch them first::
python setup.py deps
Input
-----
All default "combined" log format fields are supported (more can easily be
added), plus %D.
Mandatory fields are (in any order) `%t`, `%r` (for request's URL), `%>s`,
`%{Referer}i`, `%D`. Just tell apachedex the value from your apache log
configuration (see `--logformat` argument documentation).
Input files may be provided gzip'ed.
Output
------
The output is HTML + CSS + JS, so you need a web browser to read it.
Usage
=====
A few usage examples. See embedded help (-h/--help) for further options.
Most basic usage::
apachedex --default website access.log
Generate stand-alone output (suitable for inclusion in a mail, for example)::
apachedex --default website --js-embed access.log --out attachment.html
A log file with requests for 2 websites for which individual stats are
desired, and hits outside those base urls are ignored::
apachedex --base /site1 /site2
A log file with a site section to ignore. Order does not matter::
apachedex --skip-base /ignored --default website
A mix of both above examples. Order matters !::
apachedex --skip-base /site1/ignored --base /site1 /site2
- use some templating system instead of hardcoded html strings
- provide some form of raw data output, not just html
- allow user to specify min & max dates
...@@ -26,10 +26,6 @@ ...@@ -26,10 +26,6 @@
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# #
############################################################################## ##############################################################################
# TODO:
# - use some templating system instead of hardcoded html strings
# - provide some form of raw data output, not just html
# - allow user to specify min & max dates
from cgi import escape from cgi import escape
from collections import defaultdict, Counter from collections import defaultdict, Counter
from datetime import datetime, tzinfo, timedelta from datetime import datetime, tzinfo, timedelta
...@@ -44,7 +40,17 @@ import os ...@@ -44,7 +40,17 @@ import os
import re import re
import sys import sys
import time import time
try:
import pkg_resources
except ImportError:
# By default, assume resources are next to __file__
abs_file_container = os.path.abspath(os.path.dirname(__file__))
def getResource(name):
return open(os.path.join(abs_file_container, name)).read()
else:
abs_file_container = None
def getResource(name):
return pkg_resources.resource_string(__name__, name)
MONTH_VALUE_DICT = dict((y, x) for (x, y) in enumerate(('Jan', 'Feb', 'Mar', MONTH_VALUE_DICT = dict((y, x) for (x, y) in enumerate(('Jan', 'Feb', 'Mar',
'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'), 1)) 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'), 1))
...@@ -376,6 +382,7 @@ period_parser = { ...@@ -376,6 +382,7 @@ period_parser = {
} }
def main(): def main():
global abs_file_container
parser = argparse.ArgumentParser(description='Compute Apdex out of ' parser = argparse.ArgumentParser(description='Compute Apdex out of '
'apache-style log files') 'apache-style log files')
parser.add_argument('logfile', nargs='+', parser.add_argument('logfile', nargs='+',
...@@ -388,9 +395,6 @@ def main(): ...@@ -388,9 +395,6 @@ def main():
help='Filename to write output to. Use - for stdout. Default: %(default)s') help='Filename to write output to. Use - for stdout. Default: %(default)s')
parser.add_argument('-q', '--quiet', action='store_true', parser.add_argument('-q', '--quiet', action='store_true',
help='Suppress warnings about malformed lines.') help='Suppress warnings about malformed lines.')
parser.add_argument('--js',
default=os.path.abspath(os.path.dirname(__file__)),
help='Folder containing needed js files. Default: %(default)s')
group = parser.add_argument_group('generated content') group = parser.add_argument_group('generated content')
group.add_argument('-a', '--apdex', default=1.0, type=float, group.add_argument('-a', '--apdex', default=1.0, type=float,
...@@ -403,8 +407,13 @@ def main(): ...@@ -403,8 +407,13 @@ def main():
group.add_argument('-s', '--stats', action='store_true', group.add_argument('-s', '--stats', action='store_true',
help='Enable parsing stats (time spent parsing input, time spent ' help='Enable parsing stats (time spent parsing input, time spent '
'generating output, ...)') 'generating output, ...)')
group.add_argument('--js-embed', action='store_true', if abs_file_container is not None:
help='Embed js files instead of linking to them.') # Force embedding when file container is unknown (ex: pkg_resources).
# XXX: allow when --js is also provided ?
group.add_argument('--js', default=abs_file_container,
help='Folder containing needed js files. Default: %(default)s')
group.add_argument('--js-embed', action='store_true',
help='Embed js files instead of linking to them.')
group = parser.add_argument_group('site matching', 'Earlier arguments take ' group = parser.add_argument_group('site matching', 'Earlier arguments take '
'precedence. For example: --skip-base /foo/bar --base /foo generates ' 'precedence. For example: --skip-base /foo/bar --base /foo generates '
...@@ -423,6 +432,7 @@ def main(): ...@@ -423,6 +432,7 @@ def main():
help='Absolute base url(s) to ignore.') help='Absolute base url(s) to ignore.')
args = parser.parse_args() args = parser.parse_args()
abs_file_container = getattr(args, 'js', abs_file_container)
line_regex = '' line_regex = ''
try: try:
n = iter(args.logformat).next n = iter(args.logformat).next
...@@ -538,14 +548,22 @@ def main(): ...@@ -538,14 +548,22 @@ def main():
'.flot-x-axis .tickLabel { text-align: center; } ' '.flot-x-axis .tickLabel { text-align: center; } '
'</style>') '</style>')
for script in ('jquery.js', 'jquery.flot.js', 'jquery.flot.time.js', for script in ('jquery.js', 'jquery.flot.js', 'jquery.flot.time.js',
'jquery.flot.axislabels.js', 'main.js'): 'jquery.flot.axislabels.js'):
if args.js_embed: if getattr(args, 'js_embed', True):
out.write('<script type="text/javascript">//<![CDATA[') out.write('<script type="text/javascript">//<![CDATA[\n')
out.write(getResource(script)) out.write(getResource(script))
out.write('//]]></script>') out.write('\n//]]></script>')
else: else:
out.write('<script type="text/javascript" src="%s/%s"></script>' % ( out.write('<script type="text/javascript" src="%s/%s"></script>' % (
args.js, script)) args.js, script))
out.write('<script type="text/javascript">$(function() {'
'$(".graph").each(function (i){'
'$.plot('
'this,'
'$.parseJSON($(this).attr("data-points")),'
'$.parseJSON($(this).attr("data-options")));'
'});'
'});</script>')
out.write('</head><body><h1>Overall</h1><h2>Parameters</h2>' out.write('</head><body><h1>Overall</h1><h2>Parameters</h2>'
'<table class="stats">') '<table class="stats">')
for caption, value in ( for caption, value in (
......
$(function() {
$(".graph").each(function (i){
$.plot(
this,
$.parseJSON($(this).attr('data-points')),
$.parseJSON($(this).attr('data-options')));
});
});
from os.path import join, exists
from setuptools import setup, find_packages
import hashlib
import sys
import urllib
FLOT_SHA = 'aefe4e729b2d14efe6e8c0db359cb0e9aa6aae52'
FLOT_AXISLABELS_SHA = '80453cd7fb8a9cad084cf6b581034ada3339dbf8'
JQUERY_VERSION = '1.9.1'
DEPS = {
'jquery.flot.js': (
'http://raw.github.com/flot/flot/%s/jquery.flot.js' % FLOT_SHA,
'7b599c575f19c33bf0d93a6bbac3af02',
),
'jquery.flot.time.js': (
'http://raw.github.com/flot/flot/%s/jquery.flot.time.js' % FLOT_SHA,
'c0aec1608bf2fbb79f24d1905673e2c3',
),
'jquery.flot.axislabels.js': (
'http://raw.github.com/markrcote/flot-axislabels/%s/'
'jquery.flot.axislabels.js' % FLOT_AXISLABELS_SHA,
'a8526e0c1ed3b5cbc1a6b3ebb22bf334',
),
'jquery.js': (
'http://code.jquery.com/jquery-%s.min.js' % JQUERY_VERSION,
'397754ba49e9e0cf4e7c190da78dda05',
),
}
def download(url, filename, hexdigest):
filename = join('apachedex', filename)
if not exists(filename):
urllib.urlretrieve(url, filename)
if hashlib.md5(open(filename).read()).hexdigest() != hexdigest:
raise EnvironmentError('Checksum mismatch downloading %r' % filename)
for filename, (url, hexdigest) in DEPS.items():
download(url, filename, hexdigest)
# XXX: turn this into a setuptool command ?
if sys.argv[1:] == ['deps']:
sys.exit(0)
description = open('README').read()
setup(
name='APacheDEX',
version='1.0',
description=(x for x in description.splitlines() if x.strip()).next(),
long_description=".. contents::\n\n" + description,
author='Vincent Pelletier',
author_email='vincent@nexedi.com',
url='http://git.erp5.org/gitweb/apachedex.git',
license='GPL 2+',
platforms=['any'],
classifiers=[
'Intended Audience :: Developers',
'License :: OSI Approved :: GNU General Public License v2 or later (GPLv2+)',
'Operating System :: OS Independent',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: Implementation :: PyPy',
'Programming Language :: Python :: Implementation :: CPython',
'Topic :: System :: Logging',
'Topic :: Text Processing :: Filters',
'Topic :: Text Processing :: Markup :: HTML',
],
packages=find_packages(),
entry_points = {
'console_scripts': [
'apachedex=apachedex:main',
],
},
package_data={
'apachedex': DEPS.keys(),
},
zip_safe=True,
)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment