Commit 4037002c authored by Jérome Perrin's avatar Jérome Perrin Committed by Kirill Smelkov

Support absolute and relative time in tidrange

Using dateparser to support absolute and relative dates in natural language.

/reviewed-by @kirr
/reviewed-on nexedi/zodbtools!8
parents f0399d50 81b566c4
......@@ -20,10 +20,10 @@ setup(
keywords = 'zodb utility tool',
packages = find_packages(),
install_requires = ['ZODB', 'zodburi', 'zope.interface', 'pygolang >= 0.0.0.dev6', 'six'],
install_requires = ['ZODB', 'zodburi', 'zope.interface', 'pygolang >= 0.0.0.dev6', 'six', 'dateparser'],
extras_require = {
'test': ['pytest'],
'test': ['pytest', 'freezegun', 'pytz'],
},
entry_points= {'console_scripts': ['zodb = zodbtools.zodb:main']},
......
......@@ -6,6 +6,8 @@ envlist = py27-{ZODB3,ZODB4,ZODB5}
deps =
# XXX tox does not consult extras_require['test'] -> install pytest explicitly
pytest
freezegun
pytz
# latest ZODB from ZODB3 series
ZODB3: ZODB3 >=3.10, <3.11dev
......
......@@ -67,11 +67,8 @@ inclusive. Both tidmin and tidmax are optional and default to
If a tid (tidmin or tidmax) is given, it has to be specified as follows:
- a 16-digit hex number specifying transaction ID, e.g. 0285cbac258bf266
TODO (recheck what git does and use dateparser):
- absolute timestamp,
- relative timestamp, e.g. yesterday, 1.week.ago
- absolute timestamp, in RFC3339 or RFC822 formats
- relative timestamp, e.g. yesterday, 1 week ago
Example tid ranges:
......@@ -79,6 +76,11 @@ Example tid ranges:
000000000000aaaa.. transactions starting from 000000000000aaaa till latest
..000000000000bbbb transactions starting from database beginning till 000000000000bbbb
000000000000aaaa..000000000000bbbb transactions starting from 000000000000aaaa till 000000000000bbbb
1985-04-12T23:20:50.52Z..2018-01-01T10:30:00Z
transactions starting from 1985-04-12 at 23 hours
20 minutes 50 seconds and 520000000 nano seconds
in UTC till 2018-01-01 at 10 hours 30 minutes in UTC
1_week_ago..yesterday transactions from one week ago until yesterday.
In commands <tidrange> is optional - if it is not given at all, it defaults to
0..+∞, i.e. to whole database history.
......
# -*- coding: utf-8 -*-
# Copyright (C) 2019 Nexedi SA and Contributors.
#
# This program is free software: you can Use, Study, Modify and Redistribute
# it under the terms of the GNU General Public License version 3, or (at your
# option) any later version, as published by the Free Software Foundation.
#
# You can also Link and Combine this program with other software covered by
# the terms of any of the Free Software licenses or any of the Open Source
# Initiative approved licenses and Convey the resulting work. Corresponding
# source of such a combination shall include the source code for all other
# software used.
#
# This program is distributed WITHOUT ANY WARRANTY; without even the implied
# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# See COPYING file for full licensing terms.
# See https://www.nexedi.com/licensing for rationale and options.
import datetime
import os
import time
import pytest
import pytz
from freezegun import freeze_time
import tzlocal
from zodbtools.util import TidRangeInvalid, TidInvalid, ashex, parse_tid, parse_tidrange
@pytest.fixture
def fake_time():
"""Pytest's fixture to run this test as if now() was 2009-08-30T19:20:00Z
and if the machine timezone was Europe/Paris
"""
initial_tz = os.environ.get("TZ")
os.environ["TZ"] = "Europe/Paris"
time.tzset()
tzlocal.reload_localzone()
reference_time = datetime.datetime(2009, 8, 30, 19, 20, 0, 0,
pytz.utc).astimezone(
pytz.timezone("Europe/Paris"))
with freeze_time(reference_time):
yield
del os.environ["TZ"]
if initial_tz:
os.environ["TZ"] = initial_tz
time.tzset()
def test_tidrange_tid():
assert (
b"\x00\x00\x00\x00\x00\x00\xaa\xaa",
b"\x00\x00\x00\x00\x00\x00\xbb\xbb",
) == parse_tidrange("000000000000aaaa..000000000000bbbb")
assert (b"\x00\x00\x00\x00\x00\x00\xaa\xaa",
None) == parse_tidrange("000000000000aaaa..")
assert (None, b"\x00\x00\x00\x00\x00\x00\xbb\xbb"
) == parse_tidrange("..000000000000bbbb")
assert (None, None) == parse_tidrange("..")
with pytest.raises(TidRangeInvalid) as exc:
parse_tidrange("inv.alid")
assert exc.value.args == ("inv.alid", )
# range is correct, but a TID is invalid
with pytest.raises(TidInvalid) as exc:
parse_tidrange("invalid..")
assert exc.value.args == ("invalid", )
def test_tidrange_date():
assert (
b"\x03\xc4\x85v\x00\x00\x00\x00",
b"\x03\xc4\x88\xa0\x00\x00\x00\x00",
) == parse_tidrange(
"2018-01-01T10:30:00Z..2018-01-02T00:00:00.000000+00:00")
def test_parse_tid():
assert b"\x00\x00\x00\x00\x00\x00\xbb\xbb" == parse_tid("000000000000bbbb")
with pytest.raises(TidInvalid) as exc:
parse_tid("invalid")
assert exc.value.args == ("invalid", )
with pytest.raises(TidInvalid) as exc:
parse_tid('')
assert exc.value.args == ('', )
test_parameters = []
with open(
os.path.join(
os.path.dirname(__file__), "testdata",
"tid-time-format.txt")) as f:
for line in f:
line = line.strip()
if line and not line.startswith("#"):
test_parameters.append(line.split(" ", 2))
@pytest.mark.parametrize("reference_time,reference_tid,input_time",
test_parameters)
def test_parse_tid_time_format(fake_time, reference_time, reference_tid,
input_time):
assert reference_tid == ashex(parse_tid(input_time))
# check that the reference_tid matches the reference time, mainly
# to check that input is defined correctly.
assert reference_tid == ashex(parse_tid(reference_time))
# This is the supported time formats for zodbutils <tidrange>
# Format of this file is:
# <reference time> <tid in hex format> <time input format>
#
# These must be run with current time: 2009-08-30T19:20:00Z
# in Europe/Paris timezone.
# ( as a timestamp: 1251660000 )
# some absolute date formats:
# RFC3339
2018-01-01T10:30:00Z 03c4857600000000 2018-01-01T10:30:00Z
1985-04-12T23:20:50.520000Z 02b914f8d78d4fdf 1985-04-12T23:20:50.52Z
1996-12-20T00:39:57Z 03189927f3333333 1996-12-19T16:39:57-08:00
2018-01-01T05:30:00Z 03c4844a00000000 2018-01-01T10:30:00+05:00
# RFC822
1976-08-26T14:29:00Z 02728aa500000000 26 Aug 76 14:29 GMT
1976-08-26T12:29:00Z 02728a2d00000000 26 Aug 76 14:29 +02:00
# RFC850 -> not supported (by go implementation)
#2006-01-02T22:04:05Z 036277cc15555555 Monday, 02-Jan-06 15:04:05 MST
# RFC1123 -> not supported (by go implementation)
#2006-01-02T22:04:05Z 036277cc15555555 Mon, 02 Jan 2006 15:04:05 MST
#2006-01-02T22:04:05Z 036277cc15555555 Mon, 02 Jan 2006 23:04:05 GMT+1
# explicit UTC timezone
2018-01-01T10:30:00Z 03c4857600000000 2018-01-01 10:30:00 UTC
2018-01-02T00:00:00Z 03c488a000000000 2018-01-02 UTC
# Relative formats, based on git's test for approxidate
# (adapted for timezone Europe/Paris and extended a bit)
2009-08-30T19:20:00Z 03805ec800000000 now
2009-08-30T19:19:55Z 03805ec7eaaaaaaa 5 seconds ago
2009-08-30T19:19:55Z 03805ec7eaaaaaaa 5.seconds.ago
2009-08-30T19:10:00Z 03805ebe00000000 10.minutes.ago
2009-08-29T19:20:00Z 0380592800000000 yesterday
2009-08-27T19:20:00Z 03804de800000000 3.days.ago
2009-08-09T19:20:00Z 037fe8a800000000 3.weeks.ago
2009-05-30T19:20:00Z 037e53a800000000 3.months.ago
2009-08-30T19:19:00Z 03805ec700000000 1 minute ago
2009-08-29T19:20:00Z 0380592800000000 1 day ago
2009-07-30T19:20:00Z 037fb06800000000 1 month ago
# go's when does not support "chaining" like this
#2007-05-30T19:20:00Z 036dfaa800000000 2.years.3.months.ago
2009-08-29T04:00:00Z 0380559000000000 6am yesterday
2009-08-29T16:00:00Z 0380586000000000 6pm yesterday
2009-08-30T01:00:00Z 03805a7c00000000 3:00
2009-08-30T13:00:00Z 03805d4c00000000 15:00
2009-08-30T10:00:00Z 03805c9800000000 noon today
2009-08-29T10:00:00Z 038056f800000000 noon yesterday
# this input is a bit weird also, what does "noon pm" mean?
# it seems to trigger a bug in python's parser
# TypeError: can't compare offset-naive and offset-aware datetimes
#2009-01-05T12:00:00Z 037b0bd000000000 January 5th noon pm
# this input is "ambiguous"
#2009-08-29T12:00:00Z 0380577000000000 10am noon
# not supported by date parser
#2009-08-25T19:20:00Z 038042a800000000 last tuesday
# non consistent behavior ( go keep current hour:minutes - python use midnight )
# this also TypeError on python
#2009-07-05T00:00:00Z 037f1f4000000000 July 5th
# parsed as month/day (at least for me ... it might depend on some locale settings other than $TZ ?)
#2009-05-06T00:00:00Z 037dc82000000000 06.05.2009
# go parser is wrong on this one
#2009-06-06T05:00:00Z 037e77ac00000000 Jun 6, 5AM
# go parser is wrong on this one
#2009-06-06T05:00:00Z 037e77ac00000000 5AM Jun 6
2009-06-07T04:00:00Z 037e7d1000000000 6AM, June 7, 2009
# python and go disagree on these two, go see them as 00:00 UTC
#2008-11-30T23:00:00Z 037a3e4400000000 2008-12-01
#2009-11-30T23:00:00Z 03826ac400000000 2009-12-01
#2009-06-04T22:00:00Z 037e706800000000 06/05/2009
# ( end of tests from git )
# more tests
### works with python implementation, but not supported:
#2018-01-01T09:30:00Z 03c4853a00000000 le 1er janvier 2018 à 10h30
#2018-01-01T23:00:00Z 03c4886400000000 2018年1月2日
### some invalid formats that "looks OK"
# wrong format on timezone (should be 2009-06-01T22:00:00+09:00)
#2009-06-01T01:00:00Z 037e5a9c00000000 2009-06-01T10:00:00:+09:00
# day is 34
# ERROR XXX 2009-06-34T22:00:00Z
# one digits hour minutes
# ERROR XXX 2009-06-01T1:2:3
# month use a captital o instead of O
# ERROR XXX 2009-O6-01T22:00:00Z
......@@ -22,6 +22,8 @@ import hashlib, struct, codecs
import zodburi
from six.moves.urllib_parse import urlsplit, urlunsplit
from zlib import crc32, adler32
from ZODB.TimeStamp import TimeStamp
import dateparser
def ashex(s):
return s.encode('hex')
......@@ -61,9 +63,60 @@ def txnobjv(txn):
return objv
# "tidmin..tidmax" -> (tidmin, tidmax)
class TidRangeInvalid(Exception):
class TidInvalid(ValueError):
pass
class TidRangeInvalid(ValueError):
pass
def parse_tid(tid_string, raw_only=False):
"""Try to parse `tid_string` as a time and returns the
corresponding raw TID.
If `tid_string` cannot be parsed as a time, assume it was
already a TID.
This function also raise TidRangeInvalid when `tid_string`
is invalid.
"""
assert isinstance(tid_string, (str, bytes))
# If it "looks like a TID", don't try to parse it as time,
# because parsing is slow.
if len(tid_string) == 16:
try:
return fromhex(tid_string)
except ValueError:
pass
if raw_only:
# either it was not 16-char string or hex decoding failed
raise TidInvalid(tid_string)
# preprocess to support `1.day.ago` style formats like git log does.
if "ago" in tid_string:
tid_string = tid_string.replace(".", " ").replace("_", " ")
parsed_time = dateparser.parse(
tid_string,
settings={
'TO_TIMEZONE': 'UTC',
'RETURN_AS_TIMEZONE_AWARE': True
})
if not parsed_time:
# parsing as date failed
raise TidInvalid(tid_string)
# build a ZODB.TimeStamp to convert as a TID
return TimeStamp(
parsed_time.year,
parsed_time.month,
parsed_time.day,
parsed_time.hour,
parsed_time.minute,
parsed_time.second + parsed_time.microsecond / 1000000.).raw()
# parse_tidrange parses a string into (tidmin, tidmax).
#
# see `zodb help tidrange` for accepted tidrange syntax.
......@@ -73,11 +126,10 @@ def parse_tidrange(tidrange):
except ValueError: # not exactly 2 parts in between ".."
raise TidRangeInvalid(tidrange)
try:
tidmin = tidmin.decode("hex")
tidmax = tidmax.decode("hex")
except TypeError: # hex decoding error
raise TidRangeInvalid(tidrange)
if tidmin:
tidmin = parse_tid(tidmin)
if tidmax:
tidmax = parse_tid(tidmax)
# empty tid means -inf / +inf respectively
# ( which is None in IStorage.iterator() )
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment