Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
A
apachedex
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Vincent Pelletier
apachedex
Commits
3cc66082
Commit
3cc66082
authored
Dec 27, 2023
by
Vincent Pelletier
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Use a context manager for logfile resource management
parent
466146fb
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
90 additions
and
81 deletions
+90
-81
apachedex/__init__.py
apachedex/__init__.py
+90
-81
No files found.
apachedex/__init__.py
View file @
3cc66082
...
...
@@ -42,6 +42,7 @@
from
html
import
escape
from
collections
import
defaultdict
,
Counter
from
contextlib
import
nullcontext
from
datetime
import
datetime
,
timedelta
,
date
,
tzinfo
from
functools
import
partial
from
operator
import
itemgetter
...
...
@@ -1520,6 +1521,7 @@ def main():
file=sys.stderr)
if filename == '
-
':
logfile = sys.stdin
logfile_context = nullcontext()
else:
for opener, exc in FILE_OPENER_LIST:
logfile = opener(filename, '
rt
', encoding=INPUT_ENCODING, errors=INPUT_ENCODING_ERROR_HANDLER)
...
...
@@ -1531,91 +1533,98 @@ def main():
logfile.seek(0)
break
else:
logfile = open(filename, 'r', encoding=INPUT_ENCODING, errors=INPUT_ENCODING_ERROR_HANDLER)
lineno = 0
for lineno, line in enumerate(logfile, 1):
if show_progress and lineno % 5000 == 0:
print(lineno, end='
\
r', file=sys.stderr)
match = matchline(line)
if match is None:
match = expensive_matchline(line)
logfile = open( # pylint: disable=consider-using-with
filename,
'r',
encoding=INPUT_ENCODING,
errors=INPUT_ENCODING_ERROR_HANDLER,
)
logfile_context = logfile
with logfile_context:
lineno = 0
for lineno, line in enumerate(logfile, 1):
if show_progress and lineno % 5000 == 0:
print(lineno, end='
\
r', file=sys.stderr)
match = matchline(line)
if match is None:
if not quiet:
print(f'
Malformed
line
at
{
filename
}:{
lineno
}:
{
line
}
',
file=sys.stderr)
malformed_lines += 1
match = expensive_matchline(line)
if match is None:
if not quiet:
print(f'
Malformed
line
at
{
filename
}:{
lineno
}:
{
line
}
',
file=sys.stderr)
malformed_lines += 1
continue
agent = match.group('
agent
')
if any(x(agent) for x in skip_user_agent):
skipped_user_agent += 1
continue
agent = match.group('
agent
')
if any(x(agent) for x in skip_user_agent):
skipped_user_agent += 1
continue
url_match = matchrequest(match.group('
request
'))
if url_match is None:
no_url_lines += 1
continue
url = url_match.group('
url
')
if url.startswith('
http
'):
url = splithost(splittype(url)[1])[1]
url = get_url_prefix(match, url)
for site, prefix_match, action in site_list:
if prefix_match(url) is not None:
break
else:
site = None
action = default_action
if action is None:
skipped_lines += 1
continue
hit_date = asDate(matchToDateTime(match))
if to_next_period is not None:
if latest_date is None or latest_date < hit_date:
latest_date = hit_date
if earliest_date is None or hit_date < earliest_date:
earliest_date = hit_date
next_period = getNextPeriod()
try:
while latest_date > next_period:
period, to_next_period = next_period_data()
url_match = matchrequest(match.group('
request
'))
if url_match is None:
no_url_lines += 1
continue
url = url_match.group('
url
')
if url.startswith('
http
'):
url = splithost(splittype(url)[1])[1]
url = get_url_prefix(match, url)
for site, prefix_match, action in site_list:
if prefix_match(url) is not None:
break
else:
site = None
action = default_action
if action is None:
skipped_lines += 1
continue
hit_date = asDate(matchToDateTime(match))
if to_next_period is not None:
if latest_date is None or latest_date < hit_date:
latest_date = hit_date
if earliest_date is None or hit_date < earliest_date:
earliest_date = hit_date
next_period = getNextPeriod()
except StopIteration:
to_next_period = None
if original_period != period:
original_period = period
if show_progress:
print(f'
Increasing
period
to
{
period
}...
', end='',
file=sys.stderr)
old_date_format = date_format
(
asDate,
decimator,
graph_period,
date_format,
placeholder_delta,
round_date,
graph_coefficient,
) = period_parser[period]
latest_date = rescale(latest_date)
earliest_date = rescale(earliest_date)
period_increase_start = time.time()
for site_data in per_site.values():
site_data.rescale(rescale, getDuration)
if show_progress:
print(f'
done
({
timedelta
(
seconds
=
time
.
time
()
-
period_increase_start
)})
',
try:
while latest_date > next_period:
period, to_next_period = next_period_data()
next_period = getNextPeriod()
except StopIteration:
to_next_period = None
if original_period != period:
original_period = period
if show_progress:
print(f'
Increasing
period
to
{
period
}...
', end='',
file=sys.stderr)
old_date_format = date_format
(
asDate,
decimator,
graph_period,
date_format,
placeholder_delta,
round_date,
graph_coefficient,
) = period_parser[period]
latest_date = rescale(latest_date)
earliest_date = rescale(earliest_date)
period_increase_start = time.time()
for site_data in per_site.values():
site_data.rescale(rescale, getDuration)
if show_progress:
print(f'
done
({
timedelta
(
seconds
=
time
.
time
()
-
period_increase_start
)})
',
file=sys.stderr)
hit_date = asDate(matchToDateTime(match))
try:
site_data = per_site[site]
except KeyError:
site_data = per_site[site] = action(threshold, getDuration,
error_detail=error_detail, user_agent_detail=user_agent_detail,
erp5_expand_other=erp5_expand_other)
try:
site_data.accumulate(match, url_match, hit_date)
except Exception: # pylint: disable=broad-exception-caught
if not quiet:
print(f'
Error
analysing
line
at
{
filename
}:{
lineno
}:
{
line
!
r
}
',
file=sys.stderr)
hit_date = asDate(matchToDateTime(match))
try:
site_data = per_site[site]
except KeyError:
site_data = per_site[site] = action(threshold, getDuration,
error_detail=error_detail, user_agent_detail=user_agent_detail,
erp5_expand_other=erp5_expand_other)
try:
site_data.accumulate(match, url_match, hit_date)
except Exception: # pylint: disable=broad-exception-caught
if not quiet:
print(f'
Error
analysing
line
at
{
filename
}:{
lineno
}:
{
line
!
r
}
',
file=sys.stderr)
traceback.print_exc(file=sys.stderr)
traceback.print_exc(file=sys.stderr)
all_lines += lineno
if show_progress:
print(lineno, file=sys.stderr)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment