Commit 81b566c4 authored by Jérome Perrin's avatar Jérome Perrin

tidrange: supports giving range as time

parent 3e35453c
......@@ -20,10 +20,10 @@ setup(
keywords = 'zodb utility tool',
packages = find_packages(),
install_requires = ['ZODB', 'zodburi', 'zope.interface', 'pygolang >= 0.0.0.dev6', 'six'],
install_requires = ['ZODB', 'zodburi', 'zope.interface', 'pygolang >= 0.0.0.dev6', 'six', 'dateparser'],
extras_require = {
'test': ['pytest'],
'test': ['pytest', 'freezegun', 'pytz'],
},
entry_points= {'console_scripts': ['zodb = zodbtools.zodb:main']},
......
......@@ -6,6 +6,8 @@ envlist = py27-{ZODB3,ZODB4,ZODB5}
deps =
# XXX tox does not consult extras_require['test'] -> install pytest explicitly
pytest
freezegun
pytz
# latest ZODB from ZODB3 series
ZODB3: ZODB3 >=3.10, <3.11dev
......
......@@ -67,11 +67,8 @@ inclusive. Both tidmin and tidmax are optional and default to
If a tid (tidmin or tidmax) is given, it has to be specified as follows:
- a 16-digit hex number specifying transaction ID, e.g. 0285cbac258bf266
TODO (recheck what git does and use dateparser):
- absolute timestamp,
- relative timestamp, e.g. yesterday, 1.week.ago
- absolute timestamp, in RFC3339 or RFC822 formats
- relative timestamp, e.g. yesterday, 1 week ago
Example tid ranges:
......@@ -79,6 +76,11 @@ Example tid ranges:
000000000000aaaa.. transactions starting from 000000000000aaaa till latest
..000000000000bbbb transactions starting from database beginning till 000000000000bbbb
000000000000aaaa..000000000000bbbb transactions starting from 000000000000aaaa till 000000000000bbbb
1985-04-12T23:20:50.52Z..2018-01-01T10:30:00Z
transactions starting from 1985-04-12 at 23 hours
20 minutes 50 seconds and 520000000 nano seconds
in UTC till 2018-01-01 at 10 hours 30 minutes in UTC
1_week_ago..yesterday transactions from one week ago until yesterday.
In commands <tidrange> is optional - if it is not given at all, it defaults to
0..+∞, i.e. to whole database history.
......
# -*- coding: utf-8 -*-
# Copyright (C) 2019 Nexedi SA and Contributors.
#
# This program is free software: you can Use, Study, Modify and Redistribute
......@@ -16,9 +17,37 @@
# See COPYING file for full licensing terms.
# See https://www.nexedi.com/licensing for rationale and options.
from pytest import raises
import datetime
import os
import time
from zodbtools.util import parse_tidrange, TidRangeInvalid
import pytest
import pytz
from freezegun import freeze_time
import tzlocal
from zodbtools.util import TidRangeInvalid, TidInvalid, ashex, parse_tid, parse_tidrange
@pytest.fixture
def fake_time():
"""Pytest's fixture to run this test as if now() was 2009-08-30T19:20:00Z
and if the machine timezone was Europe/Paris
"""
initial_tz = os.environ.get("TZ")
os.environ["TZ"] = "Europe/Paris"
time.tzset()
tzlocal.reload_localzone()
reference_time = datetime.datetime(2009, 8, 30, 19, 20, 0, 0,
pytz.utc).astimezone(
pytz.timezone("Europe/Paris"))
with freeze_time(reference_time):
yield
del os.environ["TZ"]
if initial_tz:
os.environ["TZ"] = initial_tz
time.tzset()
def test_tidrange_tid():
......@@ -35,6 +64,52 @@ def test_tidrange_tid():
assert (None, None) == parse_tidrange("..")
with raises(TidRangeInvalid) as exc:
parse_tidrange("invalid")
with pytest.raises(TidRangeInvalid) as exc:
parse_tidrange("inv.alid")
assert exc.value.args == ("inv.alid", )
# range is correct, but a TID is invalid
with pytest.raises(TidInvalid) as exc:
parse_tidrange("invalid..")
assert exc.value.args == ("invalid", )
def test_tidrange_date():
assert (
b"\x03\xc4\x85v\x00\x00\x00\x00",
b"\x03\xc4\x88\xa0\x00\x00\x00\x00",
) == parse_tidrange(
"2018-01-01T10:30:00Z..2018-01-02T00:00:00.000000+00:00")
def test_parse_tid():
assert b"\x00\x00\x00\x00\x00\x00\xbb\xbb" == parse_tid("000000000000bbbb")
with pytest.raises(TidInvalid) as exc:
parse_tid("invalid")
assert exc.value.args == ("invalid", )
with pytest.raises(TidInvalid) as exc:
parse_tid('')
assert exc.value.args == ('', )
test_parameters = []
with open(
os.path.join(
os.path.dirname(__file__), "testdata",
"tid-time-format.txt")) as f:
for line in f:
line = line.strip()
if line and not line.startswith("#"):
test_parameters.append(line.split(" ", 2))
@pytest.mark.parametrize("reference_time,reference_tid,input_time",
test_parameters)
def test_parse_tid_time_format(fake_time, reference_time, reference_tid,
input_time):
assert reference_tid == ashex(parse_tid(input_time))
# check that the reference_tid matches the reference time, mainly
# to check that input is defined correctly.
assert reference_tid == ashex(parse_tid(reference_time))
# This is the supported time formats for zodbutils <tidrange>
# Format of this file is:
# <reference time> <tid in hex format> <time input format>
#
# These must be run with current time: 2009-08-30T19:20:00Z
# in Europe/Paris timezone.
# ( as a timestamp: 1251660000 )
# some absolute date formats:
# RFC3339
2018-01-01T10:30:00Z 03c4857600000000 2018-01-01T10:30:00Z
1985-04-12T23:20:50.520000Z 02b914f8d78d4fdf 1985-04-12T23:20:50.52Z
1996-12-20T00:39:57Z 03189927f3333333 1996-12-19T16:39:57-08:00
2018-01-01T05:30:00Z 03c4844a00000000 2018-01-01T10:30:00+05:00
# RFC822
1976-08-26T14:29:00Z 02728aa500000000 26 Aug 76 14:29 GMT
1976-08-26T12:29:00Z 02728a2d00000000 26 Aug 76 14:29 +02:00
# RFC850 -> not supported (by go implementation)
#2006-01-02T22:04:05Z 036277cc15555555 Monday, 02-Jan-06 15:04:05 MST
# RFC1123 -> not supported (by go implementation)
#2006-01-02T22:04:05Z 036277cc15555555 Mon, 02 Jan 2006 15:04:05 MST
#2006-01-02T22:04:05Z 036277cc15555555 Mon, 02 Jan 2006 23:04:05 GMT+1
# explicit UTC timezone
2018-01-01T10:30:00Z 03c4857600000000 2018-01-01 10:30:00 UTC
2018-01-02T00:00:00Z 03c488a000000000 2018-01-02 UTC
# Relative formats, based on git's test for approxidate
# (adapted for timezone Europe/Paris and extended a bit)
2009-08-30T19:20:00Z 03805ec800000000 now
2009-08-30T19:19:55Z 03805ec7eaaaaaaa 5 seconds ago
2009-08-30T19:19:55Z 03805ec7eaaaaaaa 5.seconds.ago
2009-08-30T19:10:00Z 03805ebe00000000 10.minutes.ago
2009-08-29T19:20:00Z 0380592800000000 yesterday
2009-08-27T19:20:00Z 03804de800000000 3.days.ago
2009-08-09T19:20:00Z 037fe8a800000000 3.weeks.ago
2009-05-30T19:20:00Z 037e53a800000000 3.months.ago
2009-08-30T19:19:00Z 03805ec700000000 1 minute ago
2009-08-29T19:20:00Z 0380592800000000 1 day ago
2009-07-30T19:20:00Z 037fb06800000000 1 month ago
# go's when does not support "chaining" like this
#2007-05-30T19:20:00Z 036dfaa800000000 2.years.3.months.ago
2009-08-29T04:00:00Z 0380559000000000 6am yesterday
2009-08-29T16:00:00Z 0380586000000000 6pm yesterday
2009-08-30T01:00:00Z 03805a7c00000000 3:00
2009-08-30T13:00:00Z 03805d4c00000000 15:00
2009-08-30T10:00:00Z 03805c9800000000 noon today
2009-08-29T10:00:00Z 038056f800000000 noon yesterday
# this input is a bit weird also, what does "noon pm" mean?
# it seems to trigger a bug in python's parser
# TypeError: can't compare offset-naive and offset-aware datetimes
#2009-01-05T12:00:00Z 037b0bd000000000 January 5th noon pm
# this input is "ambiguous"
#2009-08-29T12:00:00Z 0380577000000000 10am noon
# not supported by date parser
#2009-08-25T19:20:00Z 038042a800000000 last tuesday
# non consistent behavior ( go keep current hour:minutes - python use midnight )
# this also TypeError on python
#2009-07-05T00:00:00Z 037f1f4000000000 July 5th
# parsed as month/day (at least for me ... it might depend on some locale settings other than $TZ ?)
#2009-05-06T00:00:00Z 037dc82000000000 06.05.2009
# go parser is wrong on this one
#2009-06-06T05:00:00Z 037e77ac00000000 Jun 6, 5AM
# go parser is wrong on this one
#2009-06-06T05:00:00Z 037e77ac00000000 5AM Jun 6
2009-06-07T04:00:00Z 037e7d1000000000 6AM, June 7, 2009
# python and go disagree on these two, go see them as 00:00 UTC
#2008-11-30T23:00:00Z 037a3e4400000000 2008-12-01
#2009-11-30T23:00:00Z 03826ac400000000 2009-12-01
#2009-06-04T22:00:00Z 037e706800000000 06/05/2009
# ( end of tests from git )
# more tests
### works with python implementation, but not supported:
#2018-01-01T09:30:00Z 03c4853a00000000 le 1er janvier 2018 à 10h30
#2018-01-01T23:00:00Z 03c4886400000000 2018年1月2日
### some invalid formats that "looks OK"
# wrong format on timezone (should be 2009-06-01T22:00:00+09:00)
#2009-06-01T01:00:00Z 037e5a9c00000000 2009-06-01T10:00:00:+09:00
# day is 34
# ERROR XXX 2009-06-34T22:00:00Z
# one digits hour minutes
# ERROR XXX 2009-06-01T1:2:3
# month use a captital o instead of O
# ERROR XXX 2009-O6-01T22:00:00Z
......@@ -22,6 +22,8 @@ import hashlib, struct, codecs
import zodburi
from six.moves.urllib_parse import urlsplit, urlunsplit
from zlib import crc32, adler32
from ZODB.TimeStamp import TimeStamp
import dateparser
def ashex(s):
return s.encode('hex')
......@@ -61,9 +63,60 @@ def txnobjv(txn):
return objv
# "tidmin..tidmax" -> (tidmin, tidmax)
class TidRangeInvalid(Exception):
class TidInvalid(ValueError):
pass
class TidRangeInvalid(ValueError):
pass
def parse_tid(tid_string, raw_only=False):
"""Try to parse `tid_string` as a time and returns the
corresponding raw TID.
If `tid_string` cannot be parsed as a time, assume it was
already a TID.
This function also raise TidRangeInvalid when `tid_string`
is invalid.
"""
assert isinstance(tid_string, (str, bytes))
# If it "looks like a TID", don't try to parse it as time,
# because parsing is slow.
if len(tid_string) == 16:
try:
return fromhex(tid_string)
except ValueError:
pass
if raw_only:
# either it was not 16-char string or hex decoding failed
raise TidInvalid(tid_string)
# preprocess to support `1.day.ago` style formats like git log does.
if "ago" in tid_string:
tid_string = tid_string.replace(".", " ").replace("_", " ")
parsed_time = dateparser.parse(
tid_string,
settings={
'TO_TIMEZONE': 'UTC',
'RETURN_AS_TIMEZONE_AWARE': True
})
if not parsed_time:
# parsing as date failed
raise TidInvalid(tid_string)
# build a ZODB.TimeStamp to convert as a TID
return TimeStamp(
parsed_time.year,
parsed_time.month,
parsed_time.day,
parsed_time.hour,
parsed_time.minute,
parsed_time.second + parsed_time.microsecond / 1000000.).raw()
# parse_tidrange parses a string into (tidmin, tidmax).
#
# see `zodb help tidrange` for accepted tidrange syntax.
......@@ -73,11 +126,10 @@ def parse_tidrange(tidrange):
except ValueError: # not exactly 2 parts in between ".."
raise TidRangeInvalid(tidrange)
try:
tidmin = tidmin.decode("hex")
tidmax = tidmax.decode("hex")
except TypeError: # hex decoding error
raise TidRangeInvalid(tidrange)
if tidmin:
tidmin = parse_tid(tidmin)
if tidmax:
tidmax = parse_tid(tidmax)
# empty tid means -inf / +inf respectively
# ( which is None in IStorage.iterator() )
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment