Commit 9fa8c08e authored by Klaus Wölfel's avatar Klaus Wölfel

enable duo, enable xmr and bmr with same command, simplify arguments, refactor

parent e4164cf8
import glob
import os
import pprint
import requests
import hashlib
import argparse
def get_not_uploaded_files_list(path, path_with_dots):
r = requests.get(request_string_has_bucket_key + path_with_dots, auth=(username, password))
def check_files(base_path=None,
not_uploaded_list = []
diff_md5sum_list = []
if start is not None:
start = os.path.join(base_path, start)
if end is not None:
end = os.path.join(base_path, end)
for path in glob.iglob(os.path.join(base_path, file_expression)):
if start > path:
if end is not None:
if end < path:
if file_size is not None:
if os.stat(path).st_size != file_size:
print("Not loaded because of size")
if path.startswith(".") :
print("Not loaded because hidden")
# replace i.e. 2017/10/17280145.BMR -> 2017.10.17280145.BMR
key = path[len(base_path):].replace('/', '.')
r = requests.get("%s/hasBucketKey?key=%s" %(url, key), auth=(user, password))
has_bucket_key_res = str(r.text)
md5sum_local_file = hashlib.md5(open(path, 'rb').read()).hexdigest() # get md5 sum of the file_to_be_uploaded
r = requests.get(request_string_md5sum + path_with_dots, auth=(username, password))
r = requests.get("%s/getMd5sum?key=%s" %(url, key), auth=(user, password))
md5sum_uploaded_file = r.text # get md5 sum of the uploaded file
if has_bucket_key_res == 'True' :
if md5sum_local_file == md5sum_uploaded_file :
print (path)
print ("local file ", md5sum_local_file)
print ("uploaded file= ", md5sum_uploaded_file)
print ("UPLOADED")
else :
print (path)
print ("File is uploaded BUT md5sum is Different")
print ("local file ", md5sum_local_file)
print ("uploaded file= ", md5sum_uploaded_file)
if md5sum_local_file == md5sum_uploaded_file :
else :
print("File is uploaded BUT md5sum is Different")
print("local file ", md5sum_local_file)
print("uploaded file= ", md5sum_uploaded_file)
else :
print (path)
print ("NOT UPLOADED")
return files_not_uploaded, uploaded_with_diff_md5sum
def check_files (files, *args, **kwargs):
start = kwargs.get('start', None)
end = kwargs.get('end', None)
if end != None and start != None :
for file in files[start:end] :
file_size = os.stat(file).st_size # check only files with the proper size
if file_size != 1792 or file.startswith(".") :
# if file.startswith(".") :
print (file)
print ("Not loaded because of size or hidden")
else :
path_after_background = file.split('background/')[1]
# path_after_background = file.split('events/')[1]
path_with_dots = path_after_background.replace('/', '.') # replace i.e. 2017/10/17280145.BMR -> 2017.10.17280145.BMR
print ("path_after_background", path_after_background)
print ("path_with_dots", path_with_dots)
files_not_uploaded, uploaded_with_diff_md5sum = get_not_uploaded_files_list(file, path_with_dots)
else :
for file in files :
file_size = os.stat(file).st_size # check only files with the proper size
if file_size != 1792 or file.startswith(".") :
# if file.startswith(".") :
print (file)
print ("Not uploaded because of size or hidden")
else :
path_after_background = file.split('background/')[1]
# path_after_background = file.split('events/')[1]
path_with_dots = path_after_background.replace('/', '.') # replace i.e. 2017/10/17280145.BMR -> 2017.10.17280145.BMR
files_not_uploaded, uploaded_with_diff_md5sum = get_not_uploaded_files_list(file, path_with_dots)
return sorted(files_not_uploaded), sorted(uploaded_with_diff_md5sum)
# start
# get the command line arguments
parser = argparse.ArgumentParser(description='Test if all files are uploaded')
parser.add_argument("-p", "--path", help = "Path of the files to be uploaded, e.g. /mic/syscomtestuser/syscom/SYSCOM02-12400555/background/", required = True, default = "")
parser.add_argument("-string_md5sum", "--request_string_md5sum", help = "Request string to get md5sum, e.g.", required = True, default = "")
parser.add_argument("-string_has_bucket_key", "--request_string_has_bucket_key", help = "Request string to get hasBucketKey value, e.g.", required = True, default = "")
parser.add_argument("-user", "--username", help = "Username", required = True, default = "")
parser.add_argument("-pswd", "--password", help = "Password", required = True, default = "")
parser.add_argument("-c", "--chunk", help = "Check only chunk of files", required = False, action='store_true')
parser.add_argument("-s", "--start", help = "Start of the chunk. Used only when -c", required = False, default = "")
parser.add_argument("-e", "--end", help = "End of the chunk. Used only when -c and -s", required = False, default = "")
args = parser.parse_args()
chunk_start = ""
chunk_end = ""
if args.chunk :
print ("chunk is set")
if not args.start or not args.end :
print ("Start and/or End of the chunk is not given")
else :
chunk_start = args.start
chunk_end = args.end
dir_path = args.path
request_string_md5sum = args.request_string_md5sum
request_string_has_bucket_key = args.request_string_has_bucket_key
username = args.username
password = args.password
print ("Directory to be uploaded")
print (dir_path)
print ("Request string to get md5sum")
print (request_string_md5sum)
print ("Request string to get the value of hasBucketKey")
print (request_string_has_bucket_key)
print ("Username")
print (username)
print ("Password")
print (password)
# specify the directory of files to be uploaded and request strings
# data_stream_module/26/ : the very first one with 2016
#dir_path = "/mic/syscomtestuser/syscom/SYSCOM02-12400555/background/"
#request_string_md5sum = ""
#request_string_has_bucket_key = ""
#username = "test"
#password = "Lty5Gg54gtzr"
# data_stream_module/42/ : the one i deleted
#dir_path = "/mic/L0444-001/syscom/syscom004-14360007/background/"
#request_string_md5sum = ""
#request_string_has_bucket_key = ""
# my own test linked to /data_stream_module/28/
#dir_path = "/home/eteri/data/syscom004-14360007/background/" #my own test
#request_string_md5sum = ''
#request_string_has_bucket_key = ""
#username = "zope"
#password = "dbguylpn"
# some initializations
files_not_uploaded = []
path_with_dots = ""
files_full_path = []
uploaded_with_diff_md5sum = []
# walk through directories, get the files and compare md5sums with uploaded ones
for root, dirs, files in os.walk(dir_path):
files[:] = [f for f in files if not f.startswith('.')]
for file in files:
path = os.path.join(root,file)
files_full_path = sorted(files_full_path)
if args.chunk and len(files_full_path) != 0:
index_start = files_full_path.index(chunk_start)
index_end = files_full_path.index(chunk_end)
files_not_uploaded, uploaded_with_diff_md5sum = check_files (files_full_path, start = index_start, end = index_end)
else :
files_not_uploaded, uploaded_with_diff_md5sum = check_files (files_full_path)
print("number of not uploaded files : %s" %( len(files_not_uploaded)))
print("list of not uploaded files: %s" %( files_not_uploaded))
print("number of uploaded files with different md5sum : %s" %( len(uploaded_with_diff_md5sum)))
print("list of uploaded files with different md5sum: %s" %( uploaded_with_diff_md5sum))
return sorted(not_uploaded_list), sorted(diff_md5sum_list)
if __name__ == "__main__":
# get the command line arguments
parser = argparse.ArgumentParser(description='Test if all files are uploaded')
parser.add_argument("-b", "--base_path", help = "Base path of the files to be uploaded, e.g. /mic/syscomtestuser/syscom/SYSCOM02-12400555/background", required = True, default = "")
parser.add_argument("-f", "--file_expression", help = 'File expression of the files to be uploaded, e.g. "*/*/*.BMR"', required = True, default = "")
parser.add_argument("-u", "--url", help = "Request url to data bucket stream, e.g.", required = True, default = "")
parser.add_argument("-U", "--user", help = "Username", required = True, default = "")
parser.add_argument("-P", "--password", help = "Password", required = True, default = "")
parser.add_argument("-s", "--start", help = "Start of the chunk. Used only when -c", required = False, default = None)
parser.add_argument("-e", "--end", help = "End of the chunk. Used only when -c and -s", required = False, default = None)
argument_dict = vars(parser.parse_args())
not_uploaded_list, diff_md5sum_list = check_files(**argument_dict)
print("number of not uploaded files : %s" %len(not_uploaded_list))
print("list of not uploaded files: %s" %not_uploaded_list)
print("number of uploaded files with different md5sum : %s" %len(diff_md5sum_list))
print("list of uploaded files with different md5sum: %s" %diff_md5sum_list)
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment