Commit c71efd58 authored by Ophélie Gagnard's avatar Ophélie Gagnard Committed by Julien Muchembled

New filtering mechanism, rework command-line options

See merge request !9Co-authored-by: Julien Muchembled's avatarJulien Muchembled <jm@nexedi.com>
parent aa384e83
Pipeline #34015 passed with stage
in 0 seconds
...@@ -12,11 +12,14 @@ ...@@ -12,11 +12,14 @@
# #
############################################################################## ##############################################################################
from __future__ import print_function
import argparse import argparse
import hashlib import hashlib
import json import json
import logging import logging
import operator
import os import os
import re
import ssl import ssl
import shutil import shutil
import sys import sys
...@@ -473,36 +476,98 @@ class NetworkcacheClient(object): ...@@ -473,36 +476,98 @@ class NetworkcacheClient(object):
pass pass
return False return False
class NetworkcacheFilter(object):
parse_criterion = re.compile("(<<|>>|[<>]=?|==)").split
operator_mapping = {
">=": operator.ge,
"<=": operator.le,
">": operator.gt,
"<": operator.lt,
"==": operator.eq,
">>": operator.gt,
"<<": operator.lt,
}
def __init__(self, criterion_list=()):
''' Return a list of parsed selection criteria
'''
if type(criterion_list) is tuple and len(criterion_list) == 3:
self.criterion_list = criterion_list
elif type(criterion_list) is list:
parsed_criterion_list = []
for criterion in criterion_list:
parsed_criterion = self.parse_criterion(criterion, maxsplit=1)
if len(parsed_criterion) != 3:
raise NetworkcacheException(
'Could not parse criterion: missing or invalid separator (%s)'
% criterion)
parsed_criterion[2] = json.loads(parsed_criterion[2])
parsed_criterion_list.append(parsed_criterion)
self.criterion_list = parsed_criterion_list
else:
raise NetworkcacheException('Invalid criteria: %s' % criterion_list)
def __call__(self, data_dict_list):
''' Return a list of shadir entries that match given criteria
'''
def safe_op(data_dict):
try:
return _op(data_dict[key], value)
except TypeError as e:
logger.warning('Comparison failed: %r %s %r (%s)',
data_dict[key], op, value, type(e).__name__)
for key, op, value in self.criterion_list:
data_dict_list = [data_dict for data_dict in data_dict_list
if key in data_dict]
if not data_dict_list:
break
_op = self.operator_mapping[op]
if op in ("<<", ">>"):
filtered_data_dict_list = []
for data_dict in data_dict_list:
if safe_op(data_dict):
filtered_data_dict_list = [data_dict]
value = data_dict[key]
elif filtered_data_dict_list and data_dict[key] == value:
filtered_data_dict_list.append(data_dict)
data_dict_list = filtered_data_dict_list
else:
data_dict_list = list(filter(safe_op, data_dict_list))
return data_dict_list
class NetworkcacheException(Exception): class NetworkcacheException(Exception):
pass pass
DirectoryNotFound = UploadError = NetworkcacheException # BBB DirectoryNotFound = UploadError = NetworkcacheException # BBB
key_help = \ key_help = (
" The key will be concatenation of PREFIX_KEY, md5(URL) and SUFFIX_KEY." "The identifier under which the data is indexed."
" Defaults to 'file-urlmd5:md5(URL)'"
)
def _newArgumentParser(url_help): def _newArgumentParser(url_help, key_help, key_required):
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--config', type=argparse.FileType('r'), required=True, parser.add_argument('-c', '--config', type=argparse.FileType('r'), required=True,
help='SlapOS configuration file.') help='SlapOS configuration file.')
parser.add_argument('--prefix-key', default='', _ = parser.add_mutually_exclusive_group(required=key_required).add_argument
help="Prefix used for the shadir URL, not a cryptografic key.") _('-k', '--key', help=key_help)
parser.add_argument('--suffix-key', default='', _('-u', '--url', help=url_help)
help="Suffix used for the shadir URL, not a cryptografic key.")
parser.add_argument('--url', help=url_help)
return parser return parser
def cmd_upload(*args): def cmd_upload(*args):
parser = _newArgumentParser( parser = _newArgumentParser(
"Upload data pointed to by this argument, unless --file is specified." "Upload data pointed to by this argument, unless --file is specified."
" Non-local contents is first downloaded to a temporary file." " Non-local contents is first downloaded to a temporary file.",
"%s If not given, the uploaded data is not indexed." % key_help) key_help + " if --url is given, else with neither --url nor --key the"
parser.add_argument('--file', " uploaded data is not indexed. This should be a unique value that refers"
help="Upload the contents of this file, overriding --url") " to related entries, and that starts with 'SCHEME:' where SCHEME indicates"
parser.add_argument('--id', " how (or by what) the data is processed. For performance reasons,"
help="Identifier used for the shadir URL. Overriding --prefix-key, --suffix-key and --url") " avoid having too many entries by making your key more specific.",
parser.add_argument('--metadata', False)
parser.add_argument('-f', '--file',
help="Upload the contents of this file, overriding --url.")
parser.add_argument('-m', '--metadata',
help="Take a file containing a json-serializable dictionary with shadir metadata.") help="Take a file containing a json-serializable dictionary with shadir metadata.")
parser.add_argument('meta', nargs='*', metavar='KEY=VALUE', parser.add_argument('meta', nargs='*', metavar='KEY=VALUE',
help="Extra metadata. Warning: interpreted as string.") help="Extra metadata. Warning: interpreted as string.")
...@@ -512,7 +577,7 @@ def cmd_upload(*args): ...@@ -512,7 +577,7 @@ def cmd_upload(*args):
try: try:
if args.file: if args.file:
f = open(args.file, 'rb') f = open(args.file, 'rb')
if not args.url and not args.id: if not args.url and not args.key: # no shadir entry
nc.upload(f) nc.upload(f)
return return
elif args.url: elif args.url:
...@@ -522,39 +587,50 @@ def cmd_upload(*args): ...@@ -522,39 +587,50 @@ def cmd_upload(*args):
if args.metadata: if args.metadata:
with open(args.metadata) as g: with open(args.metadata) as g:
try: try:
metadata_dict = json.loads(g.read()) metadata_dict = json.load(g)
if type(metadata_dict) != dict: except json.decoder.JSONDecodeError as e:
raise NetworkcacheException("Not a json-serializable dictionary: %s" % args.metadata) sys.exit("%s: %s" % (args.metadata, e))
except json.decoder.JSONDecodeError: if type(metadata_dict) is not dict:
raise NetworkcacheException("Invalid json in %s" % args.metadata) sys.exit("Not a dictionary: %s" % args.metadata)
else: else:
metadata_dict = dict() metadata_dict = {}
metadata_dict.update(dict(x.split('=', 1) for x in args.meta)) metadata_dict.update(x.split('=', 1) for x in args.meta)
if args.id: if args.key:
metadata_dict.setdefault('id', args.id) identifier = args.key
key = args.id
else: else:
metadata_dict.setdefault('url', args.url) metadata_dict.setdefault('url', args.url)
urlmd5 = hashlib.md5(args.url.encode()).hexdigest() urlmd5 = hashlib.md5(args.url.encode()).hexdigest()
key = args.prefix_key + urlmd5 + args.suffix_key identifier = "file-urlmd5:" + urlmd5
nc.upload(f, key, **metadata_dict) nc.upload(f, identifier, **metadata_dict)
finally: finally:
f is None or f.close() f is None or f.close()
def cmd_download(*args): def cmd_download(*args):
parser = _newArgumentParser("URL of data to download." + key_help) parser = _newArgumentParser("URL of data to download.", key_help, True)
parser.add_argument('--id', parser.add_argument('-l', '--list', action='store_true',
help="Identifier of the shadir URL, overriding --prefix-key and --suffix-key.") help="List found results instead of downloading the first one.")
parser.add_argument('meta', nargs='*', metavar='KEY=VALUE', parser.add_argument('meta', nargs='*', metavar='KEY{==,<=,>=,<,>,<<,>>}VALUE',
help="Extra metadata.") help="Filter metadata. Each argument represents a filter with a comparison"
" condition. The filters will be applied one by one with the arguments"
" processed in the order of appearance. VALUE is expected to be a json"
" dump of a comparable object in Python (strings included). << & >>"
" return the lowest & highest values respectively, in comparison to"
" VALUE. For example, `>=[4,2] <<Infinity` selects the oldest version"
" that is at least `[4,2]`.")
args = parser.parse_args(args or sys.argv[1:]) args = parser.parse_args(args or sys.argv[1:])
nc = NetworkcacheClient(args.config) nc = NetworkcacheClient(args.config)
kw = dict(x.split('=', 1) for x in args.meta) if args.key:
if args.id: identifier = args.key
key = args.id
else: else:
urlmd5 = hashlib.md5(args.url.encode()).hexdigest() urlmd5 = hashlib.md5(args.url.encode()).hexdigest()
key = args.prefix_key + urlmd5 + args.suffix_key identifier = "file-urlmd5:" + urlmd5
data_list = NetworkcacheFilter(args.meta)(list(nc.select(identifier)))
if not data_list:
sys.exit("No result found with given criteria.")
if args.list:
json.dump(data_list, sys.stdout, indent=2, sort_keys=True)
print()
else:
f = sys.stdout f = sys.stdout
shutil.copyfileobj(nc.download(next(nc.select(key, kw))['sha512']), shutil.copyfileobj(nc.download(data_list[0]['sha512']),
getattr(f, 'buffer', f)) getattr(f, 'buffer', f)) # Py3
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment