Commit 39d5a98c authored by Klaus Wölfel's avatar Klaus Wölfel

get data stream from wendelin

parent 537d8ba0
#!/usr/bin/python #!/usr/bin/python
import argparse
import glob import glob
import os import os
import pprint import pprint
import requests import requests
import hashlib import hashlib
import argparse
import json import json
import subprocess import subprocess
import urllib import urllib
import sys
embulk_command = "/root/.embulk/bin/embulk" embulk_command = "/root/.embulk/bin/embulk"
config_path = "/root/wwm_prod_onetime.yml.liquid" config_path = "/root/wwm_prod_onetime.yml.liquid"
...@@ -23,6 +24,8 @@ def check_files(base_path=None, ...@@ -23,6 +24,8 @@ def check_files(base_path=None,
upload=False, upload=False,
delete_uploaded=False, delete_uploaded=False,
url=None, url=None,
info_url=None,
tag="",
user=None, user=None,
password=None, password=None,
start=None, start=None,
...@@ -31,6 +34,13 @@ def check_files(base_path=None, ...@@ -31,6 +34,13 @@ def check_files(base_path=None,
not_uploaded_list = [] not_uploaded_list = []
diff_md5sum_list = [] diff_md5sum_list = []
base_path = os.path.normpath(base_path)
if not url:
reference = urllib.quote(os.path.join(tag, base_path, file_expression).replace('/', '.'))
r = requests.get("%s?reference=%s" %(info_url, reference), auth=(user, password))
url = str(r.text)
print url
if start is not None: if start is not None:
start = os.path.join(base_path, start) start = os.path.join(base_path, start)
if end is not None: if end is not None:
...@@ -52,7 +62,8 @@ def check_files(base_path=None, ...@@ -52,7 +62,8 @@ def check_files(base_path=None,
continue continue
# replace i.e. 2017/10/17280145.BMR -> 2017.10.17280145.BMRa and url quote # replace i.e. 2017/10/17280145.BMR -> 2017.10.17280145.BMRa and url quote
key = urllib.quote(path[len(base_path):].replace('/', '.'))
key = urllib.quote(path[len(base_path)+len(os.sep):].replace('/', '.'))
print(key) print(key)
r = requests.get("%s/hasBucketKey?key=%s" %(url, key), auth=(user, password)) r = requests.get("%s/hasBucketKey?key=%s" %(url, key), auth=(user, password))
...@@ -85,6 +96,14 @@ def check_files(base_path=None, ...@@ -85,6 +96,14 @@ def check_files(base_path=None,
return sorted(not_uploaded_list), sorted(diff_md5sum_list) return sorted(not_uploaded_list), sorted(diff_md5sum_list)
def main(**kw):
not_uploaded_list, diff_md5sum_list = check_files(**kw)
print("number of not uploaded files : %s" %len(not_uploaded_list))
print("list of not uploaded files: %s" %not_uploaded_list)
print("number of uploaded files with different md5sum : %s" %len(diff_md5sum_list))
print("list of uploaded files with different md5sum: %s" %diff_md5sum_list)
if __name__ == "__main__": if __name__ == "__main__":
# get the command line arguments # get the command line arguments
parser = argparse.ArgumentParser(description='Test if all files are uploaded') parser = argparse.ArgumentParser(description='Test if all files are uploaded')
...@@ -92,18 +111,15 @@ if __name__ == "__main__": ...@@ -92,18 +111,15 @@ if __name__ == "__main__":
parser.add_argument("-f", "--file_expression", help = 'File expression of the files to be uploaded, e.g. "*/*/*.BMR"', required = True, default = "") parser.add_argument("-f", "--file_expression", help = 'File expression of the files to be uploaded, e.g. "*/*/*.BMR"', required = True, default = "")
parser.add_argument("-up","--upload", help = 'Upload missing files and files with different md5sum', action='store_true') parser.add_argument("-up","--upload", help = 'Upload missing files and files with different md5sum', action='store_true')
parser.add_argument("-d", "--delete_uploaded", help = "Delete files on local file system which are uploaded an have same md5sum on server as locally", action='store_true') parser.add_argument("-d", "--delete_uploaded", help = "Delete files on local file system which are uploaded an have same md5sum on server as locally", action='store_true')
parser.add_argument("-u", "--url", help = "Request url to data bucket stream, e.g. https://monitoring.woelfel.de/neo-erp5/data_stream_module/26", required = True, default = "") parser_mutex = parser.add_mutually_exclusive_group(required=True)
parser_mutex.add_argument("-u", "--url", help = "Request url to data bucket stream, e.g. https://monitoring.woelfel.de/neo-erp5/data_stream_module/26", default = "")
parser_mutex.add_argument("-i", "--info_url", help = "Request url to retrieve url of data bucket stream ", default = "")
parser.add_argument("-t", "--tag", help = "Tag", required = True, default = "")
parser.add_argument("-U", "--user", help = "Username", required = True, default = "") parser.add_argument("-U", "--user", help = "Username", required = True, default = "")
parser.add_argument("-P", "--password", help = "Password", required = True, default = "") parser.add_argument("-P", "--password", help = "Password", required = True, default = "")
parser.add_argument("-s", "--start", help = "First file to check", required = False, default = None) parser.add_argument("-s", "--start", help = "First file to check", required = False, default = None)
parser.add_argument("-e", "--end", help = "Last file to check", required = False, default = None) parser.add_argument("-e", "--end", help = "Last file to check", required = False, default = None)
argument_dict = vars(parser.parse_args()) argument_dict = vars(parser.parse_args())
pprint.pprint(argument_dict) main(**argument_dict)
not_uploaded_list, diff_md5sum_list = check_files(**argument_dict)
print("number of not uploaded files : %s" %len(not_uploaded_list))
print("list of not uploaded files: %s" %not_uploaded_list)
print("number of uploaded files with different md5sum : %s" %len(diff_md5sum_list))
print("list of uploaded files with different md5sum: %s" %diff_md5sum_list)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment