Commit 39d5a98c authored by Klaus Wölfel's avatar Klaus Wölfel

get data stream from wendelin

parent 537d8ba0
import argparse
import glob
import os
import pprint
import requests
import hashlib
import argparse
import json
import subprocess
import urllib
import sys
embulk_command = "/root/.embulk/bin/embulk"
config_path = "/root/wwm_prod_onetime.yml.liquid"
......@@ -23,6 +24,8 @@ def check_files(base_path=None,
......@@ -31,6 +34,13 @@ def check_files(base_path=None,
not_uploaded_list = []
diff_md5sum_list = []
base_path = os.path.normpath(base_path)
if not url:
reference = urllib.quote(os.path.join(tag, base_path, file_expression).replace('/', '.'))
r = requests.get("%s?reference=%s" %(info_url, reference), auth=(user, password))
url = str(r.text)
print url
if start is not None:
start = os.path.join(base_path, start)
if end is not None:
......@@ -52,7 +62,8 @@ def check_files(base_path=None,
# replace i.e. 2017/10/17280145.BMR -> 2017.10.17280145.BMRa and url quote
key = urllib.quote(path[len(base_path):].replace('/', '.'))
key = urllib.quote(path[len(base_path)+len(os.sep):].replace('/', '.'))
r = requests.get("%s/hasBucketKey?key=%s" %(url, key), auth=(user, password))
......@@ -85,6 +96,14 @@ def check_files(base_path=None,
return sorted(not_uploaded_list), sorted(diff_md5sum_list)
def main(**kw):
not_uploaded_list, diff_md5sum_list = check_files(**kw)
print("number of not uploaded files : %s" %len(not_uploaded_list))
print("list of not uploaded files: %s" %not_uploaded_list)
print("number of uploaded files with different md5sum : %s" %len(diff_md5sum_list))
print("list of uploaded files with different md5sum: %s" %diff_md5sum_list)
if __name__ == "__main__":
# get the command line arguments
parser = argparse.ArgumentParser(description='Test if all files are uploaded')
......@@ -92,18 +111,15 @@ if __name__ == "__main__":
parser.add_argument("-f", "--file_expression", help = 'File expression of the files to be uploaded, e.g. "*/*/*.BMR"', required = True, default = "")
parser.add_argument("-up","--upload", help = 'Upload missing files and files with different md5sum', action='store_true')
parser.add_argument("-d", "--delete_uploaded", help = "Delete files on local file system which are uploaded an have same md5sum on server as locally", action='store_true')
parser.add_argument("-u", "--url", help = "Request url to data bucket stream, e.g.", required = True, default = "")
parser_mutex = parser.add_mutually_exclusive_group(required=True)
parser_mutex.add_argument("-u", "--url", help = "Request url to data bucket stream, e.g.", default = "")
parser_mutex.add_argument("-i", "--info_url", help = "Request url to retrieve url of data bucket stream ", default = "")
parser.add_argument("-t", "--tag", help = "Tag", required = True, default = "")
parser.add_argument("-U", "--user", help = "Username", required = True, default = "")
parser.add_argument("-P", "--password", help = "Password", required = True, default = "")
parser.add_argument("-s", "--start", help = "First file to check", required = False, default = None)
parser.add_argument("-e", "--end", help = "Last file to check", required = False, default = None)
argument_dict = vars(parser.parse_args())
not_uploaded_list, diff_md5sum_list = check_files(**argument_dict)
print("number of not uploaded files : %s" %len(not_uploaded_list))
print("list of not uploaded files: %s" %not_uploaded_list)
print("number of uploaded files with different md5sum : %s" %len(diff_md5sum_list))
print("list of uploaded files with different md5sum: %s" %diff_md5sum_list)
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment