From 372c7949f42ec508253e5d1dd412a1dbbf455e2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Nowak?= <luke@nexedi.com> Date: Tue, 3 Mar 2009 15:46:30 +0000 Subject: [PATCH] - refactor usage of TID backup and restore by cutting restored file instead of doing backup until found TID - spellcheking corrections - described new behaviour in documentation - create methods to do typical tasks in repozo related scripts - forced usage of python2.4 when using ZODB - cleaned up files docstrings - cleaned up options analysis git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@25836 20353a03-c40f-0410-a6d1-a30d3c3de9de --- product/TIDStorage/README | 56 +++++-- .../TIDStorage/repozo/from_z2.8.8_repozo.diff | 112 ------------- .../TIDStorage/repozo/repozo_tidstorage.py | 154 +++++++----------- .../TIDStorage/repozo/restore_tidstorage.py | 25 +-- .../TIDStorage/repozo/sample_configuration.py | 6 +- 5 files changed, 121 insertions(+), 232 deletions(-) delete mode 100644 product/TIDStorage/repozo/from_z2.8.8_repozo.diff diff --git a/product/TIDStorage/README b/product/TIDStorage/README index 764ef1805f..346a26bf1f 100644 --- a/product/TIDStorage/README +++ b/product/TIDStorage/README @@ -28,6 +28,13 @@ because transactions are stored in ZODB in the order they are committed. So is T2 is in the backup, a part of T1 will also be, and backup will be inconsistent (T1 commit on B never happened). +TIDStorage log and server log +----------------------------- + +TIDStorage uses two logfiles - one which is used to inform administrator +about server state (logfile_name in configuration) and TIDStorage log to which +TIDs are appended (status_file in configuration). + USAGE ===== @@ -49,6 +56,31 @@ Example: PYTHONPATH=/usr/lib/erp5/lib/python:/usr/lib/erp5/lib/python/Products/TIDStorage +Typical scenario with failure, restoring from backup +---------------------------------------------------- + + * Zopes and Zeos running + * TIDStorage running + * backups done using repozo/repozo_tidstorage.py (they might contain + incoherency), for every backup tidstorage.tid is saved + * system failure + * restore using repozo/repozo_tidstorage.py with -t tidstorage.tid from last + backup + +In this scenario only on restoration destination file is cut at point of last +known TID position. This step is optional, as in some cases administrator +might want to not cut this file. + +Typical scenario with failure, no restoring needed +-------------------------------------------------- + + * Zopes and Zeos running + * TIDStorage running + * system failure + * no need to restore from backup, but there might be some laying transactions + in different ZODB files, system is incoherent + * administrator use repozo/restore_tidstorage.py to cut not correctly commited + transactions, system is coherent again TECHNICAL DETAILS ================= @@ -70,11 +102,13 @@ TIDStorage is composed of 3 parts: - A daemon This is TIDStorage itself, receiving TIDs from Zopes and delivering coherency points to backup scripts. - - Backup scripts + - Backup scripts and other utilities Those scripts are (mostly) wrappers for repozo backup script, fetching coherency points from TIDStorage daemon and invoking repozo. - This requires a patch to be applied to regular repozo, so that it can - backup ZODBs only up to a given TID. + No changes to repozo.py are needed, as it is used only as subsystem + to do reliable backups and restore. + Using provided utils in utils/ directory is it possible to query + for last known TID from server and operate on TIDStorage log. Constraints under which TIDStorage was designed: - Zope performance @@ -100,13 +134,15 @@ Constraints under which TIDStorage was designed: from crashed ones - as long as they are not corrupted. Limits: - - Backup "lag" - As TIDStorage can only offer a coherency point when interdependent - transactions are all finished (committed or aborted), a backup started at - time T might actually contain data from moments before. There are pathologic - cases where no coherency point can be found, so no backup can happen. - Also, bootstrap can prevent backups from happening if daemon is - misconfigured. + - Restore "lag" + As TIDStorage can only offer a coherency point when inderdependent + transactions are all finished (committed or aborted), TIDStorage log file + backup from time T might actually contain data from moments before. + So while doing restore with -t option data will be cut to state as + time T - undefined, small lag. + + There are even pathologic cases where no coherency point can be found, + so TIDStorage log file won't have any information. PROTOCOL SPECIFICATION ====================== diff --git a/product/TIDStorage/repozo/from_z2.8.8_repozo.diff b/product/TIDStorage/repozo/from_z2.8.8_repozo.diff deleted file mode 100644 index 2c786f2bcf..0000000000 --- a/product/TIDStorage/repozo/from_z2.8.8_repozo.diff +++ /dev/null @@ -1,112 +0,0 @@ ---- /home/vincent/bin/zope2.8/bin/repozo.py 2007-02-09 13:52:35.000000000 +0100 -+++ repozo.py 2007-10-26 15:30:43.311046075 +0200 -@@ -50,6 +50,12 @@ - Compress with gzip the backup files. Uses the default zlib - compression level. By default, gzip compression is not used. - -+ -m / --max-tid -+ Stop at given TID when saving the Data.fs. -+ -+ -M / --print-max-tid -+ Print the last saved transaction's tid. -+ - Options for -R/--recover: - -D str - --date=str -@@ -70,6 +76,7 @@ - import time - import errno - import getopt -+import base64 - - from ZODB.FileStorage import FileStorage - -@@ -104,10 +111,11 @@ - def parseargs(): - global VERBOSE - try: -- opts, args = getopt.getopt(sys.argv[1:], 'BRvhf:r:FD:o:Qz', -+ opts, args = getopt.getopt(sys.argv[1:], 'BRvhf:r:FD:o:Qzm:M', - ['backup', 'recover', 'verbose', 'help', - 'file=', 'repository=', 'full', 'date=', -- 'output=', 'quick', 'gzip']) -+ 'output=', 'quick', 'gzip', 'max-tid=', -+ 'print-max-tid']) - except getopt.error, msg: - usage(1, msg) - -@@ -120,6 +128,8 @@ - output = None # where to write recovered data; None = stdout - quick = False # -Q flag state - gzip = False # -z flag state -+ print_tid = False # -M flag state -+ max_tid = None # -m argument, if any - - options = Options() - -@@ -150,6 +160,10 @@ - options.output = arg - elif opt in ('-z', '--gzip'): - options.gzip = True -+ elif opt in ('-M', '--print-max-tid'): -+ options.print_tid = True -+ elif opt in ('-m', '--max-tid'): -+ options.max_tid = base64.decodestring(arg) - else: - assert False, (opt, arg) - -@@ -174,6 +188,12 @@ - if options.file is not None: - log('--file option is ignored in recover mode') - options.file = None -+ if options.print_tid: -+ log('--print-max-tid is ignored in recover mode') -+ options.print_tid = False -+ if options.max_tid is not None: -+ log('--max-tid is ignored in recover mode') -+ options.max_tid = None - return options - - -@@ -349,13 +369,19 @@ - - def do_full_backup(options): - # Find the file position of the last completed transaction. -- fs = FileStorage(options.file, read_only=True) -+ fs = FileStorage(options.file, read_only=True, stop=options.max_tid) - # Note that the FileStorage ctor calls read_index() which scans the file - # and returns "the position just after the last valid transaction record". - # getSize() then returns this position, which is exactly what we want, - # because we only want to copy stuff from the beginning of the file to the - # last valid transaction record. - pos = fs.getSize() -+ if options.print_tid: -+ undo_log = fs.undoLog(last=-1) -+ if len(undo_log): -+ print >> sys.stdout, 'Last TID: %s' % (undo_log[0]['id'], ) -+ else: -+ print >> sys.stderr, 'Cannot get latest TID' - fs.close() - options.full = True - dest = os.path.join(options.repository, gen_filename(options)) -@@ -375,13 +401,19 @@ - - def do_incremental_backup(options, reposz, repofiles): - # Find the file position of the last completed transaction. -- fs = FileStorage(options.file, read_only=True) -+ fs = FileStorage(options.file, read_only=True, stop=options.max_tid) - # Note that the FileStorage ctor calls read_index() which scans the file - # and returns "the position just after the last valid transaction record". - # getSize() then returns this position, which is exactly what we want, - # because we only want to copy stuff from the beginning of the file to the - # last valid transaction record. - pos = fs.getSize() -+ if options.print_tid: -+ undo_log = fs.undoLog(last=-1) -+ if len(undo_log): -+ print >> sys.stdout, 'Last TID: %s' % (undo_log[0]['id'], ) -+ else: -+ print >> sys.stderr, 'Cannot get latest TID' - fs.close() - options.full = False - dest = os.path.join(options.repository, gen_filename(options)) diff --git a/product/TIDStorage/repozo/repozo_tidstorage.py b/product/TIDStorage/repozo/repozo_tidstorage.py index 77f970e30d..7c34de35c1 100755 --- a/product/TIDStorage/repozo/repozo_tidstorage.py +++ b/product/TIDStorage/repozo/repozo_tidstorage.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python2.4 ############################################################################## # @@ -16,11 +16,11 @@ # Essentialy "usage" and "parseargs" methods. # So it's released under the ZPL v2.0, as is Zope 2.8.8 . -""" repozo wrapper to backup for multiple Data.fs files in a consistent way. +""" repozo wrapper to backup for multiple Data.fs files and restore them in a consistent way. Usage: %(program)s [-h|--help] [-c|--config configuration_file] [--repozo repozo_command] [-R|--recover|--recover_check] - [-H|--host address] [-p|--port port_number] [-u|--url formated_url] + [--tid_log tid_log_file] [...] -h @@ -40,91 +40,43 @@ Usage: %(program)s [-h|--help] [-c|--config configuration_file] -R --recover Instead of saving existing Data.fs, perform an automated recovery from - backups + timestamp file. + backups + timestamp file with optionally cutting file at found transaction. --recover_check Similar to above, except that it restores file to temp folder and compares with existing file. Files restored this way are automaticaly deleted after check. - -H address - --host address - TIDStorage server host address. - Overrides setting found in configuration_file. - Not required if recovering (see above). - - -p port_number - --port port_number - TIDStorage port nuber. - Overrides setting found in configuration_file. - Not required if recovering (see above). - - -u formated_url - --url formated_url - Zope base url, optionnaly with credentials. - Overrides setting found in configuration_file. - Not required if recovering (see above). - - All others parameters are transmitted to repozo but are partly processed by - getopt. To transmit unprocessed parameters to repozo, pass them as an - argument. + -t tid_log_file + --tid_log tid_log_file + TID log file, which will be used to find TID, which then will be used to + cut restored file at found TID """ -from TIDClient import TIDClient -from ExchangeProtocol import ExchangeProtocol +from ZODB.FileStorage import FileStorage -import socket -import base64 import imp import getopt import sys import os -# urllib2 does not support (?) urls containing credentials -# (http://login:password@...) but it's fine with urllib. -from urllib import urlopen -import traceback import md5 import time import tempfile -from struct import pack +from shutil import copy + +from repozo.restore_tidstorage import parse, get_tid_position program = sys.argv[0] def log(message): print message -def backup(address, known_tid_storage_identifier_dict, repozo_formated_command, zope_formated_url=None): - connection = TIDClient(address) - to_load = known_tid_storage_identifier_dict.keys() - load_count = 2 - while len(to_load): - if load_count < 1: - raise ValueError('It was impossible to retrieve all required TIDs. Missing: %s' % to_load) - to_load = [] - load_count -= 1 - stored_tid_dict = connection.dump_all() - #log(stored_tid_dict) - for key, (file_path, storage_path, object_path) in known_tid_storage_identifier_dict.iteritems(): - if key not in stored_tid_dict and zope_formated_url is not None: - to_load.append(key) - if object_path is not None: - serialize_url = zope_formated_url % (object_path, ) - log(serialize_url) - try: - response = urlopen(serialize_url) - except Exception, message: - # Prevent exceptions from interrupting the backup. - # We don't care about how well the web server is working, the only - # important thing is to get all TIDs in TIDStorage, and it's checked - # later. - log(''.join(traceback.format_exception(*sys.exc_info()))) - +def backup(known_tid_storage_identifier_dict, repozo_formated_command): + """Backups all ZODB files""" backup_count = 0 total_count = len(known_tid_storage_identifier_dict) for key, (file_path, storage_path, object_path) in known_tid_storage_identifier_dict.iteritems(): - tid_as_int = stored_tid_dict[key] + 1 - tid = base64.encodestring(pack('>Q', tid_as_int)).rstrip() - repozo_command = repozo_formated_command % (storage_path, file_path, tid) + repozo_command = repozo_formated_command % (storage_path, file_path) if not os.access(storage_path, os.R_OK): os.makedirs(storage_path) log('Runing %r...' % (repozo_command, )) @@ -133,7 +85,7 @@ def backup(address, known_tid_storage_identifier_dict, repozo_formated_command, if status == 0: backup_count += 1 else: - log('Error occured while saving %s: exit status=%i' % (file_path, status)) + log('Error occurred while saving %s: exit status=%i' % (file_path, status)) log('Saved %i FileStorages out of %i.' % (backup_count, total_count)) return total_count - backup_count @@ -147,14 +99,15 @@ def get_md5_diggest(file_instance, length): to_read = min(BLOCK_SIZE, length) buffer = read(to_read) if len(buffer) != to_read: - log('Warning: read %i instead of requiested %i, stopping read' % (len(buffer), to_read)) + log('Warning: read %i instead of requested %i, stopping read' % (len(buffer), to_read)) length = 0 else: length -= to_read update(buffer) return md5sum.hexdigest() -def recover(known_tid_storage_identifier_dict, repozo_formated_command, check=False): +def recover(known_tid_storage_identifier_dict, repozo_formated_command, check=False, last_tid_dict=None): + """Recovers all ZODB files, when last_tid_dict is passed cut them at proper byte""" recovered_count = 0 total_count = len(known_tid_storage_identifier_dict) for key, (file_path, storage_path, object_path) in known_tid_storage_identifier_dict.iteritems(): @@ -169,8 +122,18 @@ def recover(known_tid_storage_identifier_dict, repozo_formated_command, check=Fa status = os.WEXITSTATUS(status) if status == 0: recovered_count += 1 + if last_tid_dict is not None: + pos = get_tid_position(file_path, last_tid_dict[key]) + print 'Cutting restored file %s at %s byte' % (file_path, pos), + f = open(file_path,'a') + if not check: + f.truncate(pos) + print + else: + print 'only check, file untouched' + f.close() else: - log('Error occured while recovering %s: exit status=%i' % (file_path, status)) + log('Error occurred while recovering %s: exit status=%i' % (file_path, status)) if check: log('Info: Comparing restored %s with original %s' % (file_path, original_file_path)) recovered_file = open(file_path, 'r') @@ -217,11 +180,11 @@ def usage(code, msg=''): def parseargs(): try: - opts, args = getopt.getopt(sys.argv[1:], 'vQr:FhzMRc:H:p:u:', + opts, args = getopt.getopt(sys.argv[1:], 'vQrt:FhzRc:', ['help', 'verbose', 'quick', 'full', - 'gzip', 'print-max-tid', 'repository', - 'repozo=', 'config=', 'host=', 'port=', - 'url=', 'recover', 'recover_check']) + 'gzip', 'repository', 'repozo=', + 'config=','recover', 'recover_check', + 'tid_log=']) except getopt.error, msg: usage(1, msg) @@ -230,12 +193,12 @@ def parseargs(): repozo_file_name = 'repozo.py' configuration_file_name = None repozo_opts = ['-B'] - host = None - port = None - base_url = None known_tid_storage_identifier_dict = {} recover = False dry_run = False + status_file = None + status_file_backup_dir = None + recover_status_file = None options = Options() @@ -254,17 +217,10 @@ def parseargs(): options.recover = True if opt == '--recover_check': options.dry_run = True - elif opt in ('-H', '--host'): - options.host = arg - elif opt in ('-p', '--port'): - try: - options.port = int(port) - except ValueError, msg: - usage(1, msg) - elif opt in ('-u', '--url'): - options.url = arg elif opt in ('-r', '--repository'): options.repozo_opts.append('%s %s' % (opt, arg)) + elif opt in ('-t', '--tid_log'): + options.recover_status_file = arg else: options.repozo_opts.append(opt) @@ -285,25 +241,28 @@ def parseargs(): options.timestamp_file_path = module.timestamp_file_path except AttributeError, msg: usage(1, msg) - for option_id in ('port', 'host', 'base_url'): + for option_id in ('status_file', 'status_file_backup_dir' ): if getattr(options, option_id) is None: setattr(options, option_id, getattr(module, option_id, None)) # XXX: we do not check any option this way, it's too dangerous. #options.repozo_opts.extend(getattr(module, 'repozo_opts', [])) - if options.port is None: - options.port = 9001 - - if options.host is None: - usage(1, 'Either -H or --host is required (or host value should be set in configuration file).') return options +def backupStatusFile(status_file,destination_directory): + file_name = os.path.basename(status_file) + '-' + '%04d-%02d-%02d-%02d-%02d-%02d' % time.gmtime()[:6] + copy(status_file, os.path.sep.join((destination_directory,file_name))) + log("Written status file backup as %s" % os.path.sep.join((destination_directory,file_name))) + def main(): options = parseargs() - address = (options.host, options.port) - zope_formated_url = options.base_url - if options.base_url is not None and '%s' not in zope_formated_url: - raise ValueError, 'Given base url (%r) is not properly formated, it must contain one \'%%s\'.' % (zope_formated_url, ) + if not options.recover and options.recover_status_file: + raise ValueError("Status file path only for recovering") + + last_tid_dict = None + if options.recover_status_file: + last_tid_dict = parse(options.recover_status_file) + repozo_formated_command = '%s %s -r "%%s"' % (options.repozo_file_name, ' '.join(options.repozo_opts)) if options.recover: timestamp_file = open(options.timestamp_file_path, 'r') @@ -318,13 +277,14 @@ def main(): result = recover( known_tid_storage_identifier_dict=options.known_tid_storage_identifier_dict, repozo_formated_command=repozo_formated_command, - check=options.dry_run) + check=options.dry_run, + last_tid_dict=last_tid_dict) else: - repozo_formated_command += ' -f "%s" -m "%s"' + repozo_formated_command += ' -f "%s"' + if options.status_file is not None and options.status_file_backup_dir is not None: + backupStatusFile(options.status_file, options.status_file_backup_dir) result = backup( - address=address, known_tid_storage_identifier_dict=options.known_tid_storage_identifier_dict, - zope_formated_url=zope_formated_url, repozo_formated_command=repozo_formated_command) if result == 0: # Paranoid mode: diff --git a/product/TIDStorage/repozo/restore_tidstorage.py b/product/TIDStorage/repozo/restore_tidstorage.py index af39c1db36..ec09232630 100755 --- a/product/TIDStorage/repozo/restore_tidstorage.py +++ b/product/TIDStorage/repozo/restore_tidstorage.py @@ -79,6 +79,19 @@ def parse(status_file): READCHUNK = 10 * 1024 * 1024 +def get_tid_position(filepath,last_tid): + tid = pack('>Q', last_tid + 1) + # Find the file position of the last completed transaction. + fs = FileStorage(filepath, read_only=True, stop=tid) + # Note that the FileStorage ctor calls read_index() which scans the file + # and returns "the position just after the last valid transaction record". + # getSize() then returns this position, which is exactly what we want, + # because we only want to copy stuff from the beginning of the file to the + # last valid transaction record. + pos = fs.getSize() + fs.close() + return pos + def recover(data_fs_backup_path_dict, status_file): last_tid_dict = parse(status_file) for storage_id, (file_path, backup_path) in data_fs_backup_path_dict.iteritems(): @@ -105,17 +118,7 @@ def recover(data_fs_backup_path_dict, status_file): else: print 'Cannot find any file for %r: %r and %r do not exist.' % (storage_id, file_path, backup_path) if can_restore: - last_tid = last_tid_dict[storage_id] + 1 - tid = pack('>Q', last_tid) - # Find the file position of the last completed transaction. - fs = FileStorage(backup_path, read_only=True, stop=tid) - # Note that the FileStorage ctor calls read_index() which scans the file - # and returns "the position just after the last valid transaction record". - # getSize() then returns this position, which is exactly what we want, - # because we only want to copy stuff from the beginning of the file to the - # last valid transaction record. - pos = fs.getSize() - fs.close() + pos = get_tid_position(backup_path,last_tid_dict[storage_id]) print 'Restoring backup: %s bytes (transaction %r) from %s to %s' % (pos, tid, backup_path, file_path) source_file = open(backup_path, 'rb') destination_file = open(file_path, 'wb') diff --git a/product/TIDStorage/repozo/sample_configuration.py b/product/TIDStorage/repozo/sample_configuration.py index 56ea8cc01d..7fdfb281ee 100644 --- a/product/TIDStorage/repozo/sample_configuration.py +++ b/product/TIDStorage/repozo/sample_configuration.py @@ -1,5 +1,5 @@ # COMMON -# This part is used both by server_v2.py and repozo_tidstorage_v2.py +# This part is used both by tidstorage.py and repozo_tidstorage.py known_tid_storage_identifier_dict = { "((('localhost', 8200),), '2')": ('/home/vincent/zeo2/var2/Data.fs', @@ -30,6 +30,8 @@ burst_period = 30 full_dump_period = 300 # REPOZO_TIDSTORAGE -# This part is only used by repozo_tidstorage_v2.py +# This part is only used by repozo_tidstorage.py timestamp_file_path = 'repozo_tidstorage_timestamp.log' +# place to put backuped TIDStorage status_file logs +status_file_backup_dir = '/home/vincent/tmp/repozo' -- 2.30.9