Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cloud-quote
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
cloud-quote
Commits
3df13f3f
Commit
3df13f3f
authored
Mar 19, 2013
by
Sven Franck
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added python log parser
parent
0a363c3f
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
193 additions
and
0 deletions
+193
-0
py/my.py
py/my.py
+193
-0
No files found.
py/my.py
0 → 100644
View file @
3df13f3f
#READ generators: http://www.dabeaz.com/generators/Generators.pdf
import
datetime
import
time
import
glob
import
fileinput
import
json
import
re
import
os
import
sys
import
operator
import
gzip
months
=
[
"Jan"
,
"Feb"
,
"Mar"
,
"Apr"
,
"May"
,
"Jun"
,
"Jul"
,
"Aug"
,
"Sep"
,
"Oct"
,
"Nov"
,
"Dec"
]
match_record
=
re
.
compile
(
r"^[^ ]+ - (C[^ ]*) \
[([^ ]+)
").match
strptime = datetime.datetime.strptime
#this determines when machine_downtime occurs, 02:00 means that if there is
#a break of more than 2 minutes between two log entries for this
#machine, the difference (minus 2 minutes) is counted as machine_downtime
machine_down = time.strptime('00:02:00,000'.split(',')[0],'%H:%M:%S')
machine_tick = datetime.timedelta(hours=machine_down.tm_hour,minutes=machine_down.tm_min,seconds=machine_down.tm_sec).total_seconds()
system_down = time.strptime('00:02:00,000'.split(',')[0],'%H:%M:%S')
system_tick = datetime.timedelta(hours=system_down.tm_hour,minutes=system_down.tm_min,seconds=system_down.tm_sec).total_seconds()
zero_time = datetime.timedelta(hours=0,minutes=0,seconds=0)
zero_tick = zero_time.total_seconds()
format_date = '%d/%b/%Y:%H:%M:%S'
# variables for storing last time entry and datetime object
oldchunk = ""
last_time = zero_time
skip = 0
# start logging
startTime = datetime.datetime.now()
# convert to isoformat
def dthandler(o):
try:
return o.isoformat()
except AttributeError:
print "
serialize
error
"
# convert string from 14/Nov/2012 to 2012-11-14
def convert(s):
p = s.split(':', 1)
t = p[0].split('/')
return "
%
s
-%
02
d
-%
02
d
:
" % (t[2], months.index(t[1]) + 1, int(t[0])) + p[1]
# return sortable date string(!)
def stringsplit(t):
return t.split("
+
",1)[0].split("
[
",1)[1].replace(t.split("
+
",1)[0].split("
[
",1)[1],convert(t.split("
+
",1)[0].split("
[
",1)[1]))
# response object
obj = {}
# multiple files ordering
order = {}
#j = 0;
out= open("
logme
", "
wb
" )
# single entry
# f = ['2001:470:1f14:169:15f3:824f:8a61:7b59 - SOFTINST-15414 [14/Nov/2012:09:32:31 +0100] "
POST
/
setComputerPartitionConnectionXml
HTTP
/
1.1
" 200 4 "
-
" "
-
" 102356']
# sort and unzip zipped files
for f in glob.glob('logs/*'):
#file = open(f,'rb')
#last_pos = file.tell()
#if (file.read(2) == b'
\
x1f
\
x8b
'):
#file.seek(last_pos)
# test for gzip
if f.endswith('.gz'):
#proc = subprocess.Popen(['zcat', f], stdout=subprocess.PIPE)
#file = proc.stdout
file = gzip.open(f, 'rb')
else:
file = open(f, 'rb')
i = 0
for line in file:
while i < 1:
order[stringsplit(line)] = f
i += 1
if (i == 1):
break
parse_order = sorted(order)
print (datetime.datetime.now()-startTime)
x = 0
while x < len(parse_order):
file_to_parse = order[parse_order[x]]
if file_to_parse.endswith('.gz'):
f2 = gzip.open(f, 'rb')
else:
f2 = open(f, 'rb')
f2 = open(file_to_parse,'r')
for line in f2:
try:
#if (j < 25000):
match = match_record(line)
if match is not None:
user, str_time = match.groups()
# don't call datetime for entry with exact same time (cuts processing time by ~50% :-)
if (oldchunk == str_time):
this_time = last_time
else:
skip = 0
this_time = strptime(str_time, format_date)
# compare with last_time before overwriting
if ((this_time-last_time) == this_time or this_time < last_time):
pass
else:
if ((this_time-last_time).total_seconds() > system_tick):
for z in obj:
obj[z][3] = 1
last_time = this_time
oldchunk = str_time
try:
machine = obj[user]
except KeyError, e:
machine = obj.setdefault(user,[this_time,this_time,0,0])
last = machine[1]
joker = machine[3]
diff = (this_time-last).total_seconds()
# machine_downtime machine, joker = system-down delay does not count
if (diff > machine_tick):
if (joker == 0):
machine[2] += diff-machine_tick
else:
machine[3] = 0
machine[1] = this_time
#print j
#j += 1
except Exception:
pass
# counter for number of files
x += 1
out.close()
print (datetime.datetime.now()-startTime)
response = {}
for i in obj:
this_object = obj[i]
total = (this_object[1] - this_object[0]).total_seconds()
if (this_object[2] == zero_tick):
perc = repr(100)
else:
perc = repr(100 - (100*(this_object[2]/total)))
try:
entry = response[i]
except KeyError, e:
entry = response.setdefault(i,{"
percentile
":perc,"
totaltime
":repr(total),"
init
":dthandler(this_object[0]),"
last
":dthandler(this_object[1]),"
machine_downtime
":repr(this_object[2])})
data = json.dumps(response)
# 11. output to file
myFile = open('stats_.json', 'w')
myFile.write(data)
myFile.close()
print (datetime.datetime.now()-startTime)
#print j
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment