Commit 500f097b authored by Ophélie Gagnard's avatar Ophélie Gagnard Committed by Julien Muchembled

New filtering mechanism, rework command-line options

Co-authored-by: Julien Muchembled's avatarJulien Muchembled <jm@nexedi.com>
parent aa384e83
......@@ -12,11 +12,14 @@
#
##############################################################################
from __future__ import print_function
import argparse
import hashlib
import json
import logging
import operator
import os
import re
import ssl
import shutil
import sys
......@@ -473,36 +476,98 @@ class NetworkcacheClient(object):
pass
return False
class NetworkcacheFilter(object):
parse_criterion = re.compile("(<<|>>|[<>]=?|==)").split
operator_mapping = {
">=": operator.ge,
"<=": operator.le,
">": operator.gt,
"<": operator.lt,
"==": operator.eq,
">>": operator.gt,
"<<": operator.lt,
}
def __init__(self, criterion_list=()):
''' Return a list of parsed selection criteria
'''
if type(criterion_list) is tuple and len(criterion_list) == 3:
self.criterion_list = criterion_list
elif type(criterion_list) is list:
parsed_criterion_list = []
for criterion in criterion_list:
parsed_criterion = self.parse_criterion(criterion, maxsplit=1)
if len(parsed_criterion) != 3:
raise NetworkcacheException(
'Could not parse criterion: missing or invalid separator (%s)'
% criterion)
parsed_criterion[2] = json.loads(parsed_criterion[2])
parsed_criterion_list.append(parsed_criterion)
self.criterion_list = parsed_criterion_list
else:
raise NetworkcacheException('Invalid criteria: %s' % criterion_list)
def __call__(self, data_dict_list):
''' Return a list of shadir entries that match given criteria
'''
def safe_op(data_dict):
try:
return _op(data_dict[key], value)
except TypeError as e:
logger.warning('Comparison failed: %r %s %r (%s)',
data_dict[key], op, value, type(e).__name__)
for key, op, value in self.criterion_list:
data_dict_list = [data_dict for data_dict in data_dict_list
if key in data_dict]
if not data_dict_list:
break
_op = self.operator_mapping[op]
if op in ("<<", ">>"):
filtered_data_dict_list = []
for data_dict in data_dict_list:
if safe_op(data_dict):
filtered_data_dict_list = [data_dict]
value = data_dict[key]
elif filtered_data_dict_list and data_dict[key] == value:
filtered_data_dict_list.append(data_dict)
data_dict_list = filtered_data_dict_list
else:
data_dict_list = list(filter(safe_op, data_dict_list))
return data_dict_list
class NetworkcacheException(Exception):
pass
DirectoryNotFound = UploadError = NetworkcacheException # BBB
key_help = \
" The key will be concatenation of PREFIX_KEY, md5(URL) and SUFFIX_KEY."
key_help = (
"The identifier under which the data is indexed."
" Defaults to 'file-urlmd5:md5(URL)'"
)
def _newArgumentParser(url_help):
def _newArgumentParser(url_help, key_help, key_required):
parser = argparse.ArgumentParser()
parser.add_argument('--config', type=argparse.FileType('r'), required=True,
parser.add_argument('-c', '--config', type=argparse.FileType('r'), required=True,
help='SlapOS configuration file.')
parser.add_argument('--prefix-key', default='',
help="Prefix used for the shadir URL, not a cryptografic key.")
parser.add_argument('--suffix-key', default='',
help="Suffix used for the shadir URL, not a cryptografic key.")
parser.add_argument('--url', help=url_help)
_ = parser.add_mutually_exclusive_group(required=key_required).add_argument
_('-k', '--key', help=key_help)
_('-u', '--url', help=url_help)
return parser
def cmd_upload(*args):
parser = _newArgumentParser(
"Upload data pointed to by this argument, unless --file is specified."
" Non-local contents is first downloaded to a temporary file."
"%s If not given, the uploaded data is not indexed." % key_help)
parser.add_argument('--file',
help="Upload the contents of this file, overriding --url")
parser.add_argument('--id',
help="Identifier used for the shadir URL. Overriding --prefix-key, --suffix-key and --url")
parser.add_argument('--metadata',
" Non-local contents is first downloaded to a temporary file.",
key_help + " if --url is given, else with neither --url nor --key the"
" uploaded data is not indexed. This should be a unique value that refers"
" to related entries, and that starts with 'SCHEME:' where SCHEME indicates"
" how (or by what) the data is processed. For performance reasons,"
" avoid having too many entries by making your key more specific.",
False)
parser.add_argument('-f', '--file',
help="Upload the contents of this file, overriding --url.")
parser.add_argument('-m', '--metadata',
help="Take a file containing a json-serializable dictionary with shadir metadata.")
parser.add_argument('meta', nargs='*', metavar='KEY=VALUE',
help="Extra metadata. Warning: interpreted as string.")
......@@ -512,7 +577,7 @@ def cmd_upload(*args):
try:
if args.file:
f = open(args.file, 'rb')
if not args.url and not args.id:
if not args.url and not args.key: # no shadir entry
nc.upload(f)
return
elif args.url:
......@@ -522,39 +587,50 @@ def cmd_upload(*args):
if args.metadata:
with open(args.metadata) as g:
try:
metadata_dict = json.loads(g.read())
if type(metadata_dict) != dict:
raise NetworkcacheException("Not a json-serializable dictionary: %s" % args.metadata)
except json.decoder.JSONDecodeError:
raise NetworkcacheException("Invalid json in %s" % args.metadata)
metadata_dict = json.load(g)
except json.decoder.JSONDecodeError as e:
sys.exit("%s: %s" % (args.metadata, e))
if type(metadata_dict) is not dict:
sys.exit("Not a dictionary: %s" % args.metadata)
else:
metadata_dict = dict()
metadata_dict.update(dict(x.split('=', 1) for x in args.meta))
if args.id:
metadata_dict.setdefault('id', args.id)
key = args.id
metadata_dict = {}
metadata_dict.update(x.split('=', 1) for x in args.meta)
if args.key:
identifier = args.key
else:
metadata_dict.setdefault('url', args.url)
urlmd5 = hashlib.md5(args.url.encode()).hexdigest()
key = args.prefix_key + urlmd5 + args.suffix_key
nc.upload(f, key, **metadata_dict)
identifier = "file-urlmd5:" + urlmd5
nc.upload(f, identifier, **metadata_dict)
finally:
f is None or f.close()
def cmd_download(*args):
parser = _newArgumentParser("URL of data to download." + key_help)
parser.add_argument('--id',
help="Identifier of the shadir URL, overriding --prefix-key and --suffix-key.")
parser.add_argument('meta', nargs='*', metavar='KEY=VALUE',
help="Extra metadata.")
parser = _newArgumentParser("URL of data to download.", key_help, True)
parser.add_argument('-l', '--list', action='store_true',
help="List found results instead of downloading the first one.")
parser.add_argument('meta', nargs='*', metavar='KEY{==,<=,>=,<,>,<<,>>}VALUE',
help="Filter metadata. Each argument represents a filter with a comparison"
" condition. The filters will be applied one by one with the arguments"
" processed in the order of appearance. VALUE is expected to be a json"
" dump of a comparable object in Python (strings included). << & >>"
" return the lowest & highest values respectively, in comparison to"
" VALUE. For example, `>=[4,2] <<Infinity` selects the oldest version"
" that is at least `[4,2]`.")
args = parser.parse_args(args or sys.argv[1:])
nc = NetworkcacheClient(args.config)
kw = dict(x.split('=', 1) for x in args.meta)
if args.id:
key = args.id
if args.key:
identifier = args.key
else:
urlmd5 = hashlib.md5(args.url.encode()).hexdigest()
key = args.prefix_key + urlmd5 + args.suffix_key
f = sys.stdout
shutil.copyfileobj(nc.download(next(nc.select(key, kw))['sha512']),
getattr(f, 'buffer', f))
identifier = "file-urlmd5:" + urlmd5
data_list = NetworkcacheFilter(args.meta)(list(nc.select(identifier)))
if not data_list:
sys.exit("No result found with given criteria.")
if args.list:
json.dump(data_list, sys.stdout, indent=2, sort_keys=True)
print()
else:
f = sys.stdout
shutil.copyfileobj(nc.download(data_list[0]['sha512']),
getattr(f, 'buffer', f)) # Py3
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment