Commit 9fa8c08e authored by Klaus Wölfel's avatar Klaus Wölfel

enable duo, enable xmr and bmr with same command, simplify arguments, refactor

parent e4164cf8
#!/usr/bin/python
import glob
import os
import pprint
import requests
import hashlib
import argparse
def get_not_uploaded_files_list(path, path_with_dots):
r = requests.get(request_string_has_bucket_key + path_with_dots, auth=(username, password))
def check_files(base_path=None,
file_expression=None,
url=None,
user=None,
password=None,
start=None,
end=None,
file_size=None):
not_uploaded_list = []
diff_md5sum_list = []
if start is not None:
start = os.path.join(base_path, start)
if end is not None:
end = os.path.join(base_path, end)
for path in glob.iglob(os.path.join(base_path, file_expression)):
if start > path:
continue
if end is not None:
if end < path:
continue
print(path)
if file_size is not None:
if os.stat(path).st_size != file_size:
print("Not loaded because of size")
continue
if path.startswith(".") :
print("Not loaded because hidden")
continue
# replace i.e. 2017/10/17280145.BMR -> 2017.10.17280145.BMR
key = path[len(base_path):].replace('/', '.')
print(key)
r = requests.get("%s/hasBucketKey?key=%s" %(url, key), auth=(user, password))
has_bucket_key_res = str(r.text)
md5sum_local_file = hashlib.md5(open(path, 'rb').read()).hexdigest() # get md5 sum of the file_to_be_uploaded
r = requests.get(request_string_md5sum + path_with_dots, auth=(username, password))
r = requests.get("%s/getMd5sum?key=%s" %(url, key), auth=(user, password))
md5sum_uploaded_file = r.text # get md5 sum of the uploaded file
if has_bucket_key_res == 'True' :
if md5sum_local_file == md5sum_uploaded_file :
print (path)
print ("local file ", md5sum_local_file)
print ("uploaded file= ", md5sum_uploaded_file)
print ("UPLOADED")
next
else :
print (path)
print ("File is uploaded BUT md5sum is Different")
print ("local file ", md5sum_local_file)
print ("uploaded file= ", md5sum_uploaded_file)
uploaded_with_diff_md5sum.append(path)
if md5sum_local_file == md5sum_uploaded_file :
print("UPLOADED")
else :
print("File is uploaded BUT md5sum is Different")
diff_md5sum_list.append(path)
print("local file ", md5sum_local_file)
print("uploaded file= ", md5sum_uploaded_file)
else :
print (path)
print ("NOT UPLOADED")
files_not_uploaded.append(path)
return files_not_uploaded, uploaded_with_diff_md5sum
def check_files (files, *args, **kwargs):
start = kwargs.get('start', None)
end = kwargs.get('end', None)
if end != None and start != None :
for file in files[start:end] :
file_size = os.stat(file).st_size # check only files with the proper size
if file_size != 1792 or file.startswith(".") :
# if file.startswith(".") :
print (file)
print ("Not loaded because of size or hidden")
next
else :
path_after_background = file.split('background/')[1]
# path_after_background = file.split('events/')[1]
path_with_dots = path_after_background.replace('/', '.') # replace i.e. 2017/10/17280145.BMR -> 2017.10.17280145.BMR
print ("path_after_background", path_after_background)
print ("path_with_dots", path_with_dots)
files_not_uploaded, uploaded_with_diff_md5sum = get_not_uploaded_files_list(file, path_with_dots)
else :
for file in files :
file_size = os.stat(file).st_size # check only files with the proper size
if file_size != 1792 or file.startswith(".") :
# if file.startswith(".") :
print (file)
print ("Not uploaded because of size or hidden")
next
else :
path_after_background = file.split('background/')[1]
# path_after_background = file.split('events/')[1]
path_with_dots = path_after_background.replace('/', '.') # replace i.e. 2017/10/17280145.BMR -> 2017.10.17280145.BMR
files_not_uploaded, uploaded_with_diff_md5sum = get_not_uploaded_files_list(file, path_with_dots)
return sorted(files_not_uploaded), sorted(uploaded_with_diff_md5sum)
# start
# get the command line arguments
parser = argparse.ArgumentParser(description='Test if all files are uploaded')
parser.add_argument("-p", "--path", help = "Path of the files to be uploaded, e.g. /mic/syscomtestuser/syscom/SYSCOM02-12400555/background/", required = True, default = "")
parser.add_argument("-string_md5sum", "--request_string_md5sum", help = "Request string to get md5sum, e.g. https://monitoring.woelfel.de/neo-erp5/data_stream_module/26/getMd5sum?key=SYSCOM02-12400555.background.", required = True, default = "")
parser.add_argument("-string_has_bucket_key", "--request_string_has_bucket_key", help = "Request string to get hasBucketKey value, e.g. https://monitoring.woelfel.de/neo-erp5/data_stream_module/26/hasBucketKey?key=SYSCOM02-12400555.background.", required = True, default = "")
parser.add_argument("-user", "--username", help = "Username", required = True, default = "")
parser.add_argument("-pswd", "--password", help = "Password", required = True, default = "")
parser.add_argument("-c", "--chunk", help = "Check only chunk of files", required = False, action='store_true')
parser.add_argument("-s", "--start", help = "Start of the chunk. Used only when -c", required = False, default = "")
parser.add_argument("-e", "--end", help = "End of the chunk. Used only when -c and -s", required = False, default = "")
args = parser.parse_args()
chunk_start = ""
chunk_end = ""
if args.chunk :
print ("chunk is set")
if not args.start or not args.end :
print ("Start and/or End of the chunk is not given")
exit()
else :
chunk_start = args.start
chunk_end = args.end
dir_path = args.path
request_string_md5sum = args.request_string_md5sum
request_string_has_bucket_key = args.request_string_has_bucket_key
username = args.username
password = args.password
print ("Directory to be uploaded")
print (dir_path)
print ("Request string to get md5sum")
print (request_string_md5sum)
print ("Request string to get the value of hasBucketKey")
print (request_string_has_bucket_key)
print ("Username")
print (username)
print ("Password")
print (password)
# specify the directory of files to be uploaded and request strings
# data_stream_module/26/ : the very first one with 2016
#dir_path = "/mic/syscomtestuser/syscom/SYSCOM02-12400555/background/"
#request_string_md5sum = "https://monitoring.woelfel.de/neo-erp5/data_stream_module/26/getMd5sum?key=SYSCOM02-12400555.background."
#request_string_has_bucket_key = "https://monitoring.woelfel.de/neo-erp5/data_stream_module/26/hasBucketKey?key=SYSCOM02-12400555.background."
#username = "test"
#password = "Lty5Gg54gtzr"
# data_stream_module/42/ : the one i deleted
#dir_path = "/mic/L0444-001/syscom/syscom004-14360007/background/"
#request_string_md5sum = "https://monitoring.woelfel.de/neo-erp5/data_stream_module/42/getMd5sum?key=syscom004-14360007.background."
#request_string_has_bucket_key = "https://monitoring.woelfel.de/neo-erp5/data_stream_module/42/hasBucketKey?key=syscom004-14360007.background."
# my own test linked to /data_stream_module/28/
#dir_path = "/home/eteri/data/syscom004-14360007/background/" #my own test
#request_string_md5sum = 'https://softinst84835.host.vifib.net/erp5/data_stream_module/28/getMd5sum?key=SYSCOM013-17090003.background.'
#request_string_has_bucket_key = "https://softinst84835.host.vifib.net/erp5/data_stream_module/28/hasBucketKey?key=SYSCOM013-17090003.background."
#username = "zope"
#password = "dbguylpn"
# some initializations
files_not_uploaded = []
path_with_dots = ""
files_full_path = []
uploaded_with_diff_md5sum = []
# walk through directories, get the files and compare md5sums with uploaded ones
for root, dirs, files in os.walk(dir_path):
files[:] = [f for f in files if not f.startswith('.')]
for file in files:
path = os.path.join(root,file)
files_full_path.append(path)
files_full_path = sorted(files_full_path)
if args.chunk and len(files_full_path) != 0:
index_start = files_full_path.index(chunk_start)
index_end = files_full_path.index(chunk_end)
files_not_uploaded, uploaded_with_diff_md5sum = check_files (files_full_path, start = index_start, end = index_end)
else :
files_not_uploaded, uploaded_with_diff_md5sum = check_files (files_full_path)
print("number of not uploaded files : %s" %( len(files_not_uploaded)))
print("list of not uploaded files: %s" %( files_not_uploaded))
print("number of uploaded files with different md5sum : %s" %( len(uploaded_with_diff_md5sum)))
print("list of uploaded files with different md5sum: %s" %( uploaded_with_diff_md5sum))
print("NOT UPLOADED")
not_uploaded_list.append(path)
return sorted(not_uploaded_list), sorted(diff_md5sum_list)
if __name__ == "__main__":
# get the command line arguments
parser = argparse.ArgumentParser(description='Test if all files are uploaded')
parser.add_argument("-b", "--base_path", help = "Base path of the files to be uploaded, e.g. /mic/syscomtestuser/syscom/SYSCOM02-12400555/background", required = True, default = "")
parser.add_argument("-f", "--file_expression", help = 'File expression of the files to be uploaded, e.g. "*/*/*.BMR"', required = True, default = "")
parser.add_argument("-u", "--url", help = "Request url to data bucket stream, e.g. https://monitoring.woelfel.de/neo-erp5/data_stream_module/26", required = True, default = "")
parser.add_argument("-U", "--user", help = "Username", required = True, default = "")
parser.add_argument("-P", "--password", help = "Password", required = True, default = "")
parser.add_argument("-s", "--start", help = "Start of the chunk. Used only when -c", required = False, default = None)
parser.add_argument("-e", "--end", help = "End of the chunk. Used only when -c and -s", required = False, default = None)
argument_dict = vars(parser.parse_args())
pprint.pprint(argument_dict)
not_uploaded_list, diff_md5sum_list = check_files(**argument_dict)
print("number of not uploaded files : %s" %len(not_uploaded_list))
print("list of not uploaded files: %s" %not_uploaded_list)
print("number of uploaded files with different md5sum : %s" %len(diff_md5sum_list))
print("list of uploaded files with different md5sum: %s" %diff_md5sum_list)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment