Commit 855cee8e authored by Jérome Perrin's avatar Jérome Perrin

Prevent errors when parsing date on malformed lines

We observed lines in our logs where the timestamp field was still
respecting the timestamp regexp, so the line was not reported as
invalid, but parsing such timestamp caused a ValueError in
_matchToDateTime

The beginning of line was:
127.0.0.1 - - [14/Jul/2017:127.0.0.1 - - [14/Jul/2017:09:41:41 +0200]

Which uses `[14/Jul/2017:127.0.0.1 - - [14/Jul/2017:09:41:41 +0200]` as
timestamp, so this fail the simple .split() used to separate timestamp
and timezone.

Added a minimal test case to reproduce this specific problem.
parent bf83eb8d
......@@ -764,7 +764,7 @@ logformat_dict = {
'%b': r'(?P<bytes>[0-9-]*?)',
'%l': r'(?P<ident>[^ ]*)',
'%u': r'(?P<user>[^ ]*)',
'%t': r'\[(?P<timestamp>[^\]]*)\]',
'%t': r'\[(?P<timestamp>\d{2}/\w{3}/\d{4}:\d{2}:\d{2}:\d{2} [+-]\d{4})\]',
'%r': r'(?P<request>[^"]*)', # XXX: expected to be enclosed in ". See also REQUEST_PATTERN
'%>s': r'(?P<status>[0-9]*?)',
'%O': r'(?P<size>[0-9-]*?)',
......
import unittest
import sys
from StringIO import StringIO
import apachedex
class MalformedInputTestCase(unittest.TestCase):
def setUp(self):
self._original_sys_argv = sys.argv
self._original_sys_stdin = sys.stdin
self._original_sys_stderr = sys.stderr
self._original_sys_stdout = sys.stdout
sys.stderr = StringIO()
sys.stdout = StringIO()
def tearDown(self):
sys.argv = self._original_sys_argv
sys.stdin = self._original_sys_stdin
sys.stderr = self._original_sys_stderr
sys.stdout = self._original_sys_stdout
def test_timestamp_mixed_in_timestamp(self):
sys.argv = ['apachedex', '--base=/', '-']
sys.stdin = StringIO(
# this first line is valid, but second is not
'''127.0.0.1 - - [14/Jul/2017:09:41:41 +0200] "GET / HTTP/1.1" 200 7499 "https://example.org/" "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36" 1754
127.0.0.1 - - [14/Jul/2017:127.0.0.1 - - [14/Jul/2017:09:41:41 +0200] "GET / HTTP/1.1" 200 7499 "https://example.org/" "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36" 1754''')
apachedex.main()
self.assertNotIn('Malformed line at -:1', sys.stderr.getvalue())
self.assertIn('Malformed line at -:2', sys.stderr.getvalue())
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment