Commit 3df13f3f authored by Sven Franck's avatar Sven Franck

added python log parser

parent 0a363c3f
#READ generators: http://www.dabeaz.com/generators/Generators.pdf
import datetime
import time
import glob
import fileinput
import json
import re
import os
import sys
import operator
import gzip
months = ["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"]
match_record = re.compile(r"^[^ ]+ - (C[^ ]*) \[([^ ]+)").match
strptime = datetime.datetime.strptime
#this determines when machine_downtime occurs, 02:00 means that if there is
#a break of more than 2 minutes between two log entries for this
#machine, the difference (minus 2 minutes) is counted as machine_downtime
machine_down = time.strptime('00:02:00,000'.split(',')[0],'%H:%M:%S')
machine_tick = datetime.timedelta(hours=machine_down.tm_hour,minutes=machine_down.tm_min,seconds=machine_down.tm_sec).total_seconds()
system_down = time.strptime('00:02:00,000'.split(',')[0],'%H:%M:%S')
system_tick = datetime.timedelta(hours=system_down.tm_hour,minutes=system_down.tm_min,seconds=system_down.tm_sec).total_seconds()
zero_time = datetime.timedelta(hours=0,minutes=0,seconds=0)
zero_tick = zero_time.total_seconds()
format_date = '%d/%b/%Y:%H:%M:%S'
# variables for storing last time entry and datetime object
oldchunk = ""
last_time = zero_time
skip = 0
# start logging
startTime = datetime.datetime.now()
# convert to isoformat
def dthandler(o):
try:
return o.isoformat()
except AttributeError:
print "serialize error"
# convert string from 14/Nov/2012 to 2012-11-14
def convert(s):
p = s.split(':', 1)
t = p[0].split('/')
return "%s-%02d-%02d:" % (t[2], months.index(t[1]) + 1, int(t[0])) + p[1]
# return sortable date string(!)
def stringsplit(t):
return t.split("+",1)[0].split("[",1)[1].replace(t.split("+",1)[0].split("[",1)[1],convert(t.split("+",1)[0].split("[",1)[1]))
# response object
obj = {}
# multiple files ordering
order = {}
#j = 0;
out= open("logme", "wb" )
# single entry
# f = ['2001:470:1f14:169:15f3:824f:8a61:7b59 - SOFTINST-15414 [14/Nov/2012:09:32:31 +0100] "POST /setComputerPartitionConnectionXml HTTP/1.1" 200 4 "-" "-" 102356']
# sort and unzip zipped files
for f in glob.glob('logs/*'):
#file = open(f,'rb')
#last_pos = file.tell()
#if (file.read(2) == b'\x1f\x8b'):
#file.seek(last_pos)
# test for gzip
if f.endswith('.gz'):
#proc = subprocess.Popen(['zcat', f], stdout=subprocess.PIPE)
#file = proc.stdout
file = gzip.open(f, 'rb')
else:
file = open(f, 'rb')
i = 0
for line in file:
while i < 1:
order[stringsplit(line)] = f
i += 1
if (i == 1):
break
parse_order = sorted(order)
print (datetime.datetime.now()-startTime)
x = 0
while x < len(parse_order):
file_to_parse = order[parse_order[x]]
if file_to_parse.endswith('.gz'):
f2 = gzip.open(f, 'rb')
else:
f2 = open(f, 'rb')
f2 = open(file_to_parse,'r')
for line in f2:
try:
#if (j < 25000):
match = match_record(line)
if match is not None:
user, str_time = match.groups()
# don't call datetime for entry with exact same time (cuts processing time by ~50% :-)
if (oldchunk == str_time):
this_time = last_time
else:
skip = 0
this_time = strptime(str_time, format_date)
# compare with last_time before overwriting
if ((this_time-last_time) == this_time or this_time < last_time):
pass
else:
if ((this_time-last_time).total_seconds() > system_tick):
for z in obj:
obj[z][3] = 1
last_time = this_time
oldchunk = str_time
try:
machine = obj[user]
except KeyError, e:
machine = obj.setdefault(user,[this_time,this_time,0,0])
last = machine[1]
joker = machine[3]
diff = (this_time-last).total_seconds()
# machine_downtime machine, joker = system-down delay does not count
if (diff > machine_tick):
if (joker == 0):
machine[2] += diff-machine_tick
else:
machine[3] = 0
machine[1] = this_time
#print j
#j += 1
except Exception:
pass
# counter for number of files
x += 1
out.close()
print (datetime.datetime.now()-startTime)
response = {}
for i in obj:
this_object = obj[i]
total = (this_object[1] - this_object[0]).total_seconds()
if (this_object[2] == zero_tick):
perc = repr(100)
else:
perc = repr(100 - (100*(this_object[2]/total)))
try:
entry = response[i]
except KeyError, e:
entry = response.setdefault(i,{"percentile":perc,"totaltime":repr(total),"init":dthandler(this_object[0]),"last":dthandler(this_object[1]),"machine_downtime":repr(this_object[2])})
data = json.dumps(response)
# 11. output to file
myFile = open('stats_.json', 'w')
myFile.write(data)
myFile.close()
print (datetime.datetime.now()-startTime)
#print j
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment