#!/usr/bin/env python
# rdiff-backup -- Mirror files while keeping incremental changes
# Version released May 25, 2002
# Version released May 25, 2002
# Copyright (C) 2001, 2002 Ben Escoto <>
# This program is licensed under the GNU General Public License (GPL).
......@@ -240,6 +240,7 @@ class HLDestinationStruct:
"""Apply diffs and finalize, with checkpointing and statistics"""
collated = RORPIter.CollateIterators(diffs, cls.initial_dsiter2)
finalizer, ITR = cls.get_finalizer(), cls.get_MirrorITR(inc_rpath)
dsrp = None
def error_checked():
......@@ -262,13 +263,15 @@ class HLDestinationStruct:
cls.check_skip_error(finalizer.Finish, dsrp)
except: cls.handle_last_error(dsrp, finalizer, ITR)
if Globals.preserve_hardlinks: Hardlink.final_writedata()
def patch_increment_and_finalize(cls, dest_rpath, diffs, inc_rpath):
"""Apply diffs, write increment if necessary, and finalize"""
collated = RORPIter.CollateIterators(diffs, cls.initial_dsiter2)
finalizer, ITR = cls.get_finalizer(), cls.get_ITR(inc_rpath)
dsrp = None
def error_checked():
......@@ -292,7 +295,8 @@ class HLDestinationStruct:
cls.check_skip_error(finalizer.Finish, dsrp)
except: cls.handle_last_error(dsrp, finalizer, ITR)
if Globals.preserve_hardlinks: Hardlink.final_writedata()
def check_skip_error(cls, thunk, dsrp):
......@@ -323,19 +327,4 @@ class HLDestinationStruct:
def write_statistics(cls, ITR):
"""Write session statistics to file, log"""
stat_inc = Inc.get_inc(Globals.rbdir.append("session_statistics"),
Time.curtime, "data")
ITR.StartTime = Time.curtime
ITR.EndTime = time.time()
if Globals.preserve_hardlinks and Hardlink.final_inc:
# include hardlink data in size of increments
ITR.IncrementFileSize += Hardlink.final_inc.getsize()
if Globals.print_statistics:
message = ITR.get_stats_logstring("Session statistics")
......@@ -22,7 +22,11 @@ class Inc:
mirror is the mirrored file from the last backup,
incpref is the prefix of the increment file.
This function basically moves mirror -> incpref.
This function basically moves the information about the mirror
file to incpref.
The returned RobustAction when executed should return the name
of the incfile, or None if none was created.
if not (new and new.lstat() or mirror.lstat()):
......@@ -39,13 +43,15 @@ class Inc:
else: return Inc.makesnapshot_action(mirror, incpref)
def Increment(new, mirror, incpref):
Inc.Increment_action(new, mirror, incpref).execute()
return Inc.Increment_action(new, mirror, incpref).execute()
def makemissing_action(incpref):
"""Signify that mirror file was missing"""
return RobustAction(lambda: None,
Inc.get_inc_ext(incpref, "missing").touch,
lambda exp: None)
def final(init_val):
incrp = Inc.get_inc_ext(incpref, "missing")
return incrp
return RobustAction(None, final, None)
def makesnapshot_action(mirror, incpref):
"""Copy mirror to incfile, since new is quite different"""
......@@ -62,22 +68,29 @@ class Inc:
if (Globals.compression and
not Globals.no_compression_regexp.match(mirror.path)):
diff = Inc.get_inc_ext(incpref, "diff.gz")
return Robust.chain([Rdiff.write_delta_action(new, mirror,
diff, 1),
Robust.copy_attribs_action(mirror, diff)])
compress = 1
diff = Inc.get_inc_ext(incpref, "diff")
return Robust.chain([Rdiff.write_delta_action(new, mirror,
diff, None),
Robust.copy_attribs_action(mirror, diff)])
compress = None
diff_tf =
sig_tf =, None)
def init():
Rdiff.write_delta(new, mirror, diff_tf, compress, sig_tf)
RPath.copy_attribs(mirror, diff_tf)
return diff
return Robust.make_tf_robustaction(init, (diff_tf, sig_tf),
(diff, None))
def makedir_action(mirrordir, incpref):
"""Make file indicating directory mirrordir has changed"""
dirsign = Inc.get_inc_ext(incpref, "dir")
def final():
RPath.copy_attribs(mirrordir, dirsign)
return RobustAction(lambda: None, final, dirsign.delete)
tf =
def init():
RPath.copy_attribs(mirrordir, tf)
return dirsign
return Robust.make_tf_robustaction(init, tf, dirsign)
def get_inc(rp, time, typestr):
"""Return increment like rp but with time and typestr suffixes"""
......@@ -127,8 +140,15 @@ class IncrementITR(StatsITR):
Remember this object needs to be pickable.
mirror_isdirectory, directory_replacement = None, None
# Iff true, mirror file was a directory
mirror_isdirectory = None
# If set, what the directory on the mirror side will be replaced with
directory_replacement = None
# True iff there has been some change at this level or lower (used
# for marking directories to be flagged)
changed = None
# Holds the RPath of the created increment file, if any
incrp = None
def __init__(self, inc_rpath):
"""Set inc_rpath, an rpath of the base of the tree"""
......@@ -184,26 +204,34 @@ class IncrementITR(StatsITR):
if not (incpref.lstat() and incpref.isdir()): incpref.mkdir()
if diff_rorp and diff_rorp.isreg() and diff_rorp.file:
tf =
def init():
RPathStatic.copy_with_attribs(diff_rorp, tf)
def error(exc, ran_init, init_val): tf.delete()
RobustAction(init, None, error).execute()
self.directory_replacement = tf
def init_non_dir(self, dsrp, diff_rorp, incpref):
"""Process a non directory file (initial pass)"""
if not diff_rorp: return # no diff, so no change necessary
if diff_rorp.isreg() and (dsrp.isreg() or diff_rorp.isflaglinked()):
tf =
# Write updated mirror to temp file so we can compute
# reverse diff locally
mirror_tf =
def init_thunk():
if diff_rorp.isflaglinked():
Hardlink.link_rp(diff_rorp, tf, dsrp)
Hardlink.link_rp(diff_rorp, mirror_tf, dsrp)
else: Rdiff.patch_with_attribs_action(dsrp, diff_rorp,
Inc.Increment_action(tf, dsrp, incpref).execute()
Robust.make_tf_robustaction(init_thunk, (tf,), (dsrp,)).execute()
Robust.chain([Inc.Increment_action(diff_rorp, dsrp, incpref),
RORPIter.patchonce_action(None, dsrp, diff_rorp)]
self.incrp = Inc.Increment_action(mirror_tf, dsrp,
def final(init_val): mirror_tf.rename(dsrp)
def error(exc, ran_init, init_val): mirror_tf.delete()
RobustAction(init_thunk, final, error).execute()
else: self.incrp = Robust.chain(
Inc.Increment_action(diff_rorp, dsrp, incpref),
RORPIter.patchonce_action(None, dsrp, diff_rorp)).execute()[0]
self.changed = 1
def end_process(self):
......@@ -217,19 +245,18 @@ class IncrementITR(StatsITR):
if self.directory_replacement:
tf = self.directory_replacement
Inc.Increment(tf, dsrp, incpref)
RORPIter.patchonce_action(None, dsrp, tf).execute()
self.incrp = Robust.chain(
Inc.Increment_action(tf, dsrp, incpref),
RORPIter.patchonce_action(None, dsrp, tf)).execute()[0]
Inc.Increment(diff_rorp, dsrp, incpref)
self.incrp = Inc.Increment(diff_rorp, dsrp, incpref)
if diff_rorp:
RORPIter.patchonce_action(None, dsrp, diff_rorp).execute()
self.end_stats(diff_rorp, dsrp, Inc._inc_file)
if self.incpref.isdir() and (self.mirror_isdirectory or dsrp.isdir()):
Time.curtime, "data"))
self.end_stats(diff_rorp, dsrp, self.incrp)
if self.mirror_isdirectory or dsrp.isdir():
Stats.write_dir_stats_line(self, dsrp.index)
def branch_process(self, subinstance):
"""Update statistics, and the has_changed flag if change in branch"""
......@@ -239,6 +266,8 @@ class IncrementITR(StatsITR):
class MirrorITR(StatsITR):
"""Like IncrementITR, but only patch mirror directory, don't increment"""
# This is always None since no increments will be created
incrp = None
def __init__(self, inc_rpath):
"""Set inc_rpath, an rpath of the base of the inc tree"""
self.inc_rpath = inc_rpath
......@@ -251,9 +280,6 @@ class MirrorITR(StatsITR):
RORPIter.patchonce_action(None, mirror_dsrp, diff_rorp).execute()
self.incpref = self.inc_rpath.new_index(index)
if mirror_dsrp.isdir() and not self.incpref.lstat():
self.incpref.mkdir() # holds the statistics files
self.diff_rorp, self.mirror_dsrp = diff_rorp, mirror_dsrp
def end_process(self):
......@@ -262,11 +288,9 @@ class MirrorITR(StatsITR):
except AttributeError: # Some error above prevented these being set
self.end_stats(self.diff_rorp, self.mirror_dsrp)
if self.incpref.isdir():
Time.curtime, "data"))
self.end_stats(diff_rorp, mirror_dsrp)
if mirror_dsrp.isdir():
Stats.write_dir_stats_line(self, mirror_dsrp.index)
def branch_process(self, subinstance):
"""Update statistics with subdirectory results"""
......@@ -140,5 +140,4 @@ class Logger:
(exc_info[1], exc_info[0]), verbosity)
logging_func("".join(traceback.format_tb(exc_info[2])), verbosity+1)
Log = Logger()
......@@ -212,13 +212,13 @@ class RORPIter:
"""Return action patching basisrp using diff_rorp"""
assert diff_rorp, "Missing diff index %s" % basisrp.index
if not diff_rorp.lstat():
return RobustAction(lambda: None, basisrp.delete, lambda e: None)
return RobustAction(None, lambda init_val: basisrp.delete(), None)
if Globals.preserve_hardlinks and diff_rorp.isflaglinked():
if not basisrp: basisrp = base_rp.new_index(diff_rorp.index)
return RobustAction(lambda: None,
lambda: Hardlink.link_rp(diff_rorp, basisrp),
lambda e: None)
tf =
def init(): Hardlink.link_rp(diff_rorp, tf, basisrp)
return Robust.make_tf_robustaction(init, tf, basisrp)
elif basisrp and basisrp.isreg() and diff_rorp.isreg():
assert diff_rorp.get_attached_filetype() == 'diff'
return Rdiff.patch_with_attribs_action(basisrp, diff_rorp)
......@@ -16,13 +16,29 @@ class StatsObj:
'DeletedFiles', 'DeletedFileSize',
'ChangedSourceSize', 'ChangedMirrorSize',
'IncrementFiles', 'IncrementFileSize')
stat_time_attrs = ('StartTime', 'EndTime', 'ElapsedTime')
stat_attrs = stat_time_attrs + stat_file_attrs
stat_attrs = ('Filename',) + stat_time_attrs + stat_file_attrs
# Below, the second value in each pair is true iff the value
# indicates a number of bytes
stat_file_pairs = (('SourceFiles', None), ('SourceFileSize', 1),
('MirrorFiles', None), ('MirrorFileSize', 1),
('NewFiles', None), ('NewFileSize', 1),
('DeletedFiles', None), ('DeletedFileSize', 1),
('ChangedFiles', None),
('ChangedSourceSize', 1), ('ChangedMirrorSize', 1),
('IncrementFiles', None), ('IncrementFileSize', 1))
# Set all stats to None, indicating info not available
for attr in stat_attrs: locals()[attr] = None
# This is used in get_byte_summary_string below
byte_abbrev_list = ((1024*1024*1024*1024, "TB"),
(1024*1024*1024, "GB"),
(1024*1024, "MB"),
(1024, "KB"))
def get_stat(self, attribute):
"""Get a statistic"""
try: return self.__dict__[attribute]
......@@ -34,33 +50,89 @@ class StatsObj:
"""Set attribute to given value"""
self.__dict__[attr] = value
def get_stats_line(self, index):
"""Return one line abbreviated version of full stats string"""
file_attrs = map(lambda attr: str(self.get_stat(attr)),
if not index: filename = "."
# use repr to quote newlines in relative filename, then
# take of leading and trailing quote.
filename = repr(apply(os.path.join, index))[1:-1]
return " ".join([filename,] + file_attrs)
def set_stats_from_line(self, line):
"""Set statistics from given line"""
def error(): raise StatsException("Bad line '%s'" % line)
if line[-1] == "\n": line = line[:-1]
lineparts = line.split(" ")
if len(lineparts) < len(stat_file_attrs): error()
for attr, val_string in zip(stat_file_attrs,
try: val = long(val_string)
except ValueError:
try: val = float(val_string)
except ValueError: error()
self.set_stat(attr, val)
return self
def get_stats_string(self):
"""Return string printing out statistics"""
"""Return extended string printing out statistics"""
return self.get_timestats_string() + self.get_filestats_string()
def get_timestats_string(self):
"""Return portion of statistics string dealing with time"""
timelist = []
if self.StartTime is not None:
timelist.append("StartTime %s (%s)\n" %
timelist.append("StartTime %.2f (%s)\n" %
(self.StartTime, Time.timetopretty(self.StartTime)))
if self.EndTime is not None:
timelist.append("EndTime %s (%s)\n" %
timelist.append("EndTime %.2f (%s)\n" %
(self.EndTime, Time.timetopretty(self.EndTime)))
if self.StartTime is not None and self.EndTime is not None:
if self.ElapsedTime or (self.StartTime is not None and
self.EndTime is not None):
if self.ElapsedTime is None:
self.ElapsedTime = self.EndTime - self.StartTime
timelist.append("ElapsedTime %s (%s)\n" %
timelist.append("ElapsedTime %.2f (%s)\n" %
(self.ElapsedTime, Time.inttopretty(self.ElapsedTime)))
return "".join(timelist)
def get_filestats_string(self):
"""Return portion of statistics string about files and bytes"""
def fileline(stat_file_pair):
"""Return zero or one line of the string"""
attr, in_bytes = stat_file_pair
val = self.get_stat(attr)
if val is None: return ""
if in_bytes:
return "%s %s (%s)\n" % (attr, val,
else: return "%s %s\n" % (attr, val)
return "".join(map(fileline, self.stat_file_pairs))
filelist = ["%s %s\n" % (attr, self.get_stat(attr))
for attr in self.stat_file_attrs
if self.get_stat(attr) is not None]
return "".join(timelist + filelist)
def get_byte_summary_string(self, byte_count):
"""Turn byte count into human readable string like "7.23GB" """
for abbrev_bytes, abbrev_string in self.byte_abbrev_list:
if byte_count >= abbrev_bytes:
# Now get 3 significant figures
abbrev_count = float(byte_count)/abbrev_bytes
if abbrev_count >= 100: precision = 0
elif abbrev_count >= 10: precision = 1
else: precision = 2
return "%%.%df %s" % (precision, abbrev_string) \
% (abbrev_count,)
byte_count = round(byte_count)
if byte_count == 1: return "1 byte"
else: return "%d bytes" % (byte_count,)
def get_stats_logstring(self, title):
"""Like get_stats_string, but add header and footer"""
header = "-------------[ %s ]-------------" % title
header = "--------------[ %s ]--------------" % title
footer = "-" * len(header)
return "%s\n%s%s\n" % (header, self.get_stats_string(), footer)
def init_stats_from_string(self, s):
def set_stats_from_string(self, s):
"""Initialize attributes from string, return self for convenience"""
def error(line): raise StatsException("Bad line '%s'" % line)
......@@ -91,7 +163,7 @@ class StatsObj:
def read_stats_from_rp(self, rp):
"""Set statistics from rpath, return self for convenience"""
fp ="r")
return self
......@@ -162,22 +234,96 @@ class StatsITR(IterTreeReducer, StatsObj):
self.ChangedFiles += 1
self.ChangedSourceSize += mirror_dsrp.getsize()
self.ChangedMirrorSize += self.mirror_base_size
self.IncrementFileSize += inc_rp and inc_rp.getsize() or 0
if inc_rp:
self.IncrementFiles += 1
self.IncrementFileSize += inc_rp.getsize()
else: # new file was created
self.NewFiles += 1
self.NewFileSize += mirror_dsrp.getsize()
self.IncrementFileSize += inc_rp and inc_rp.getsize() or 0
if inc_rp:
self.IncrementFiles += 1
self.IncrementFileSize += inc_rp.getsize()
if self.mirror_base_exists: # file was deleted from mirror
self.MirrorFiles += 1
self.MirrorFileSize += self.mirror_base_size
self.DeletedFiles += 1
self.DeletedFileSize += self.mirror_base_size
self.IncrementFileSize += inc_rp and inc_rp.getsize() or 0
if inc_rp:
self.IncrementFiles += 1
self.IncrementFileSize += inc_rp.getsize()
def add_file_stats(self, subinstance):
"""Add all file statistics from subinstance to current totals"""
for attr in self.stat_file_attrs:
self.get_stat(attr) + subinstance.get_stat(attr))
class Stats:
"""Misc statistics methods, pertaining to dir and session stat files"""
# This is the RPath of the directory statistics file, and the
# associated open file. It will hold a line of statistics for
# each directory that is backed up.
_dir_stats_rp = None
_dir_stats_fp = None
# This goes at the beginning of the directory statistics file and
# explains the format.
_dir_stats_header = """# rdiff-backup directory statistics file
# Each line is in the following format:
# RelativeDirName %s
""" % " ".join(StatsObj.stat_file_attrs)
def open_dir_stats_file(cls):
"""Open directory statistics file, write header"""
assert not cls._dir_stats_fp, "Directory file already open"
if Globals.compression: suffix = "data.gz"
else: suffix = "data"
cls._dir_stats_rp = Inc.get_inc(Globals.rbdir.append(
"directory_statistics"), Time.curtime, suffix)
if cls._dir_stats_rp.lstat():
Log("Warning, statistics file %s already exists, appending", 2)
cls._dir_stats_fp ="ab",
else: cls._dir_stats_fp = \"wb", Globals.compression)
def write_dir_stats_line(cls, statobj, index):
"""Write info from statobj about rpath to statistics file"""
cls._dir_stats_fp.write(statobj.get_stats_line(index) +"\n")
def close_dir_stats_file(cls):
"""Close directory statistics file if its open"""
if cls._dir_stats_fp:
cls._dir_stats_fp = None
def write_session_statistics(cls, statobj):
"""Write session statistics into file, log"""
stat_inc = Inc.get_inc(Globals.rbdir.append("session_statistics"),
Time.curtime, "data")
statobj.StartTime = Time.curtime
statobj.EndTime = time.time()
# include hardlink data and dir stats in size of increments
if Globals.preserve_hardlinks and Hardlink.final_inc:
# include hardlink data in size of increments
statobj.IncrementFiles += 1
statobj.IncrementFileSize += Hardlink.final_inc.getsize()
if cls._dir_stats_rp and cls._dir_stats_rp.lstat():
statobj.IncrementFiles += 1
statobj.IncrementFileSize += cls._dir_stats_rp.getsize()
if Globals.print_statistics:
message = statobj.get_stats_logstring("Session statistics")
......@@ -8,7 +8,7 @@ import re, os
class Globals:
# The current version of rdiff-backup
version = ""
version = ""
# If this is set, use this value in seconds as the current time
# instead of reading it from the clock.
#!/usr/bin/env python
# rdiff-backup -- Mirror files while keeping incremental changes
# Version released May 25, 2002
# Version released May 25, 2002
# Copyright (C) 2001, 2002 Ben Escoto <>
# This program is licensed under the GNU General Public License (GPL).
......@@ -240,6 +240,7 @@ class HLDestinationStruct:
"""Apply diffs and finalize, with checkpointing and statistics"""
collated = RORPIter.CollateIterators(diffs, cls.initial_dsiter2)
finalizer, ITR = cls.get_finalizer(), cls.get_MirrorITR(inc_rpath)
dsrp = None
def error_checked():
......@@ -262,13 +263,15 @@ class HLDestinationStruct:
cls.check_skip_error(finalizer.Finish, dsrp)
except: cls.handle_last_error(dsrp, finalizer, ITR)
if Globals.preserve_hardlinks: Hardlink.final_writedata()
def patch_increment_and_finalize(cls, dest_rpath, diffs, inc_rpath):
"""Apply diffs, write increment if necessary, and finalize"""
collated = RORPIter.CollateIterators(diffs, cls.initial_dsiter2)
finalizer, ITR = cls.get_finalizer(), cls.get_ITR(inc_rpath)
dsrp = None
def error_checked():
......@@ -292,7 +295,8 @@ class HLDestinationStruct:
cls.check_skip_error(finalizer.Finish, dsrp)
except: cls.handle_last_error(dsrp, finalizer, ITR)
if Globals.preserve_hardlinks: Hardlink.final_writedata()
def check_skip_error(cls, thunk, dsrp):
......@@ -323,19 +327,4 @@ class HLDestinationStruct:
def write_statistics(cls, ITR):
"""Write session statistics to file, log"""
stat_inc = Inc.get_inc(Globals.rbdir.append("session_statistics"),
Time.curtime, "data")
ITR.StartTime = Time.curtime
ITR.EndTime = time.time()
if Globals.preserve_hardlinks and Hardlink.final_inc:
# include hardlink data in size of increments
ITR.IncrementFileSize += Hardlink.final_inc.getsize()
if Globals.print_statistics:
message = ITR.get_stats_logstring("Session statistics")
......@@ -22,7 +22,11 @@ class Inc:
mirror is the mirrored file from the last backup,
incpref is the prefix of the increment file.
This function basically moves mirror -> incpref.
This function basically moves the information about the mirror
file to incpref.
The returned RobustAction when executed should return the name
of the incfile, or None if none was created.
if not (new and new.lstat() or mirror.lstat()):
......@@ -39,13 +43,15 @@ class Inc:
else: return Inc.makesnapshot_action(mirror, incpref)
def Increment(new, mirror, incpref):
Inc.Increment_action(new, mirror, incpref).execute()
return Inc.Increment_action(new, mirror, incpref).execute()
def makemissing_action(incpref):
"""Signify that mirror file was missing"""
return RobustAction(lambda: None,
Inc.get_inc_ext(incpref, "missing").touch,
lambda exp: None)
def final(init_val):
incrp = Inc.get_inc_ext(incpref, "missing")
return incrp
return RobustAction(None, final, None)
def makesnapshot_action(mirror, incpref):
"""Copy mirror to incfile, since new is quite different"""
......@@ -62,22 +68,29 @@ class Inc:
if (Globals.compression and
not Globals.no_compression_regexp.match(mirror.path)):
diff = Inc.get_inc_ext(incpref, "diff.gz")
return Robust.chain([Rdiff.write_delta_action(new, mirror,
diff, 1),
Robust.copy_attribs_action(mirror, diff)])
compress = 1
diff = Inc.get_inc_ext(incpref, "diff")
return Robust.chain([Rdiff.write_delta_action(new, mirror,
diff, None),
Robust.copy_attribs_action(mirror, diff)])
compress = None
diff_tf =
sig_tf =, None)
def init():
Rdiff.write_delta(new, mirror, diff_tf, compress, sig_tf)
RPath.copy_attribs(mirror, diff_tf)
return diff
return Robust.make_tf_robustaction(init, (diff_tf, sig_tf),
(diff, None))
def makedir_action(mirrordir, incpref):
"""Make file indicating directory mirrordir has changed"""
dirsign = Inc.get_inc_ext(incpref, "dir")
def final():
RPath.copy_attribs(mirrordir, dirsign)
return RobustAction(lambda: None, final, dirsign.delete)
tf =
def init():
RPath.copy_attribs(mirrordir, tf)
return dirsign
return Robust.make_tf_robustaction(init, tf, dirsign)
def get_inc(rp, time, typestr):
"""Return increment like rp but with time and typestr suffixes"""
......@@ -127,8 +140,15 @@ class IncrementITR(StatsITR):
Remember this object needs to be pickable.
mirror_isdirectory, directory_replacement = None, None
# Iff true, mirror file was a directory
mirror_isdirectory = None
# If set, what the directory on the mirror side will be replaced with
directory_replacement = None
# True iff there has been some change at this level or lower (used
# for marking directories to be flagged)
changed = None
# Holds the RPath of the created increment file, if any
incrp = None
def __init__(self, inc_rpath):
"""Set inc_rpath, an rpath of the base of the tree"""
......@@ -184,26 +204,34 @@ class IncrementITR(StatsITR):
if not (incpref.lstat() and incpref.isdir()): incpref.mkdir()
if diff_rorp and diff_rorp.isreg() and diff_rorp.file:
tf =
def init():
RPathStatic.copy_with_attribs(diff_rorp, tf)
def error(exc, ran_init, init_val): tf.delete()
RobustAction(init, None, error).execute()
self.directory_replacement = tf
def init_non_dir(self, dsrp, diff_rorp, incpref):
"""Process a non directory file (initial pass)"""
if not diff_rorp: return # no diff, so no change necessary
if diff_rorp.isreg() and (dsrp.isreg() or diff_rorp.isflaglinked()):
tf =
# Write updated mirror to temp file so we can compute
# reverse diff locally
mirror_tf =
def init_thunk():
if diff_rorp.isflaglinked():
Hardlink.link_rp(diff_rorp, tf, dsrp)
Hardlink.link_rp(diff_rorp, mirror_tf, dsrp)
else: Rdiff.patch_with_attribs_action(dsrp, diff_rorp,
Inc.Increment_action(tf, dsrp, incpref).execute()
Robust.make_tf_robustaction(init_thunk, (tf,), (dsrp,)).execute()
Robust.chain([Inc.Increment_action(diff_rorp, dsrp, incpref),
RORPIter.patchonce_action(None, dsrp, diff_rorp)]
self.incrp = Inc.Increment_action(mirror_tf, dsrp,
def final(init_val): mirror_tf.rename(dsrp)
def error(exc, ran_init, init_val): mirror_tf.delete()
RobustAction(init_thunk, final, error).execute()
else: self.incrp = Robust.chain(
Inc.Increment_action(diff_rorp, dsrp, incpref),
RORPIter.patchonce_action(None, dsrp, diff_rorp)).execute()[0]
self.changed = 1
def end_process(self):
......@@ -217,19 +245,18 @@ class IncrementITR(StatsITR):
if self.directory_replacement:
tf = self.directory_replacement
Inc.Increment(tf, dsrp, incpref)
RORPIter.patchonce_action(None, dsrp, tf).execute()
self.incrp = Robust.chain(
Inc.Increment_action(tf, dsrp, incpref),
RORPIter.patchonce_action(None, dsrp, tf)).execute()[0]
Inc.Increment(diff_rorp, dsrp, incpref)
self.incrp = Inc.Increment(diff_rorp, dsrp, incpref)
if diff_rorp:
RORPIter.patchonce_action(None, dsrp, diff_rorp).execute()
self.end_stats(diff_rorp, dsrp, Inc._inc_file)
if self.incpref.isdir() and (self.mirror_isdirectory or dsrp.isdir()):
Time.curtime, "data"))
self.end_stats(diff_rorp, dsrp, self.incrp)
if self.mirror_isdirectory or dsrp.isdir():
Stats.write_dir_stats_line(self, dsrp.index)
def branch_process(self, subinstance):
"""Update statistics, and the has_changed flag if change in branch"""
......@@ -239,6 +266,8 @@ class IncrementITR(StatsITR):
class MirrorITR(StatsITR):
"""Like IncrementITR, but only patch mirror directory, don't increment"""
# This is always None since no increments will be created
incrp = None
def __init__(self, inc_rpath):
"""Set inc_rpath, an rpath of the base of the inc tree"""
self.inc_rpath = inc_rpath
......@@ -251,9 +280,6 @@ class MirrorITR(StatsITR):
RORPIter.patchonce_action(None, mirror_dsrp, diff_rorp).execute()
self.incpref = self.inc_rpath.new_index(index)
if mirror_dsrp.isdir() and not self.incpref.lstat():
self.incpref.mkdir() # holds the statistics files
self.diff_rorp, self.mirror_dsrp = diff_rorp, mirror_dsrp
def end_process(self):
......@@ -262,11 +288,9 @@ class MirrorITR(StatsITR):
except AttributeError: # Some error above prevented these being set
self.end_stats(self.diff_rorp, self.mirror_dsrp)
if self.incpref.isdir():
Time.curtime, "data"))
self.end_stats(diff_rorp, mirror_dsrp)
if mirror_dsrp.isdir():
Stats.write_dir_stats_line(self, mirror_dsrp.index)
def branch_process(self, subinstance):
"""Update statistics with subdirectory results"""
......@@ -140,5 +140,4 @@ class Logger:
(exc_info[1], exc_info[0]), verbosity)
logging_func("".join(traceback.format_tb(exc_info[2])), verbosity+1)
Log = Logger()
......@@ -146,7 +146,8 @@ class Main:
self.action == "remove-older-than"):
self.commandline_error("Only use one argument, "
"the root of the backup directory")
if l > 2: self.commandline_error("Too many arguments given")
if l > 2 and self.action != "calculate-average":
self.commandline_error("Too many arguments given")
def commandline_error(self, message):
sys.stderr.write("Error: %s\n" % message)
......@@ -24,11 +24,11 @@ class Rdiff:
"""Like get_delta but signature is in a file object"""
sig_tf =, None)
rdiff_popen_obj = Rdiff.get_delta(sig_tf, rp_new)
rdiff_popen_obj = Rdiff.get_delta_sigrp(sig_tf, rp_new)
return rdiff_popen_obj
def get_delta(rp_signature, rp_new):
def get_delta_sigrp(rp_signature, rp_new):
"""Take signature rp and new rp, return delta file object"""
assert rp_signature.conn is rp_new.conn
Log("Getting delta of %s with signature %s" %
......@@ -45,18 +45,18 @@ class Rdiff:
sig_tf =, None)
delta_tf =
def init():
Log("Writing delta %s from %s -> %s" %
(basis.path, new.path, delta.path), 7)
delta_tf.write_from_fileobj(Rdiff.get_delta(sig_tf, new), compress)
def init(): Rdiff.write_delta(basis, new, delta_tf, compress, sig_tf)
return Robust.make_tf_robustaction(init, (sig_tf, delta_tf),
(None, delta))
def write_delta(basis, new, delta, compress = None):
def write_delta(basis, new, delta, compress = None, sig_tf = None):
"""Write rdiff delta which brings basis to new"""
Rdiff.write_delta_action(basis, new, delta, compress).execute()
Log("Writing delta %s from %s -> %s" %
(basis.path, new.path, delta.path), 7)
if not sig_tf: sig_tf =, None)
delta.write_from_fileobj(Rdiff.get_delta_sigrp(sig_tf, new), compress)
def patch_action(rp_basis, rp_delta, rp_out = None,
out_tf = None, delta_compressed = None):
......@@ -106,18 +106,20 @@ class Rdiff:
if not rp_out: rp_out = rp_basis
delta_tf =, None)
def init(): delta_tf.write_from_fileobj(delta_fileobj)
return Robust.chain_nested([RobustAction(init, delta_tf.delete,
lambda exc: delta_tf.delete),
def final(init_val): delta_tf.delete()
def error(exc, ran_init, init_val): delta_tf.delete()
write_delta_action = RobustAction(init, final, error)
return Robust.chain(write_delta_action,
Rdiff.patch_action(rp_basis, delta_tf,
rp_out, out_tf)])
rp_out, out_tf))
def patch_with_attribs_action(rp_basis, rp_delta, rp_out = None):
"""Like patch_action, but also transfers attributs from rp_delta"""
if not rp_out: rp_out = rp_basis
tf =
return Robust.chain_nested(
[Rdiff.patch_action(rp_basis, rp_delta, rp_out, tf),
Robust.copy_attribs_action(rp_delta, tf)])
Rdiff.patch_action(rp_basis, rp_delta, rp_out, tf),
Robust.copy_attribs_action(rp_delta, tf))
def copy_action(rpin, rpout):
"""Use rdiff to copy rpin to rpout, conserving bandwidth"""
......@@ -212,13 +212,13 @@ class RORPIter:
"""Return action patching basisrp using diff_rorp"""
assert diff_rorp, "Missing diff index %s" % basisrp.index
if not diff_rorp.lstat():
return RobustAction(lambda: None, basisrp.delete, lambda e: None)
return RobustAction(None, lambda init_val: basisrp.delete(), None)
if Globals.preserve_hardlinks and diff_rorp.isflaglinked():
if not basisrp: basisrp = base_rp.new_index(diff_rorp.index)
return RobustAction(lambda: None,
lambda: Hardlink.link_rp(diff_rorp, basisrp),
lambda e: None)
tf =
def init(): Hardlink.link_rp(diff_rorp, tf, basisrp)
return Robust.make_tf_robustaction(init, tf, basisrp)
elif basisrp and basisrp.isreg() and diff_rorp.isreg():
assert diff_rorp.get_attached_filetype() == 'diff'
return Rdiff.patch_with_attribs_action(basisrp, diff_rorp)
......@@ -16,13 +16,29 @@ class StatsObj:
'DeletedFiles', 'DeletedFileSize',
'ChangedSourceSize', 'ChangedMirrorSize',
'IncrementFiles', 'IncrementFileSize')
stat_time_attrs = ('StartTime', 'EndTime', 'ElapsedTime')
stat_attrs = stat_time_attrs + stat_file_attrs
stat_attrs = ('Filename',) + stat_time_attrs + stat_file_attrs
# Below, the second value in each pair is true iff the value
# indicates a number of bytes
stat_file_pairs = (('SourceFiles', None), ('SourceFileSize', 1),
('MirrorFiles', None), ('MirrorFileSize', 1),
('NewFiles', None), ('NewFileSize', 1),
('DeletedFiles', None), ('DeletedFileSize', 1),
('ChangedFiles', None),
('ChangedSourceSize', 1), ('ChangedMirrorSize', 1),
('IncrementFiles', None), ('IncrementFileSize', 1))
# Set all stats to None, indicating info not available
for attr in stat_attrs: locals()[attr] = None
# This is used in get_byte_summary_string below
byte_abbrev_list = ((1024*1024*1024*1024, "TB"),
(1024*1024*1024, "GB"),
(1024*1024, "MB"),
(1024, "KB"))
def get_stat(self, attribute):
"""Get a statistic"""
try: return self.__dict__[attribute]
......@@ -34,33 +50,89 @@ class StatsObj:
"""Set attribute to given value"""
self.__dict__[attr] = value
def get_stats_line(self, index):
"""Return one line abbreviated version of full stats string"""
file_attrs = map(lambda attr: str(self.get_stat(attr)),
if not index: filename = "."
# use repr to quote newlines in relative filename, then
# take of leading and trailing quote.
filename = repr(apply(os.path.join, index))[1:-1]
return " ".join([filename,] + file_attrs)
def set_stats_from_line(self, line):
"""Set statistics from given line"""
def error(): raise StatsException("Bad line '%s'" % line)
if line[-1] == "\n": line = line[:-1]
lineparts = line.split(" ")
if len(lineparts) < len(stat_file_attrs): error()
for attr, val_string in zip(stat_file_attrs,
try: val = long(val_string)
except ValueError:
try: val = float(val_string)
except ValueError: error()
self.set_stat(attr, val)
return self
def get_stats_string(self):
"""Return string printing out statistics"""
"""Return extended string printing out statistics"""
return self.get_timestats_string() + self.get_filestats_string()
def get_timestats_string(self):
"""Return portion of statistics string dealing with time"""
timelist = []
if self.StartTime is not None:
timelist.append("StartTime %s (%s)\n" %
timelist.append("StartTime %.2f (%s)\n" %
(self.StartTime, Time.timetopretty(self.StartTime)))
if self.EndTime is not None:
timelist.append("EndTime %s (%s)\n" %
timelist.append("EndTime %.2f (%s)\n" %
(self.EndTime, Time.timetopretty(self.EndTime)))
if self.StartTime is not None and self.EndTime is not None:
if self.ElapsedTime or (self.StartTime is not None and
self.EndTime is not None):
if self.ElapsedTime is None:
self.ElapsedTime = self.EndTime - self.StartTime
timelist.append("ElapsedTime %s (%s)\n" %
timelist.append("ElapsedTime %.2f (%s)\n" %
(self.ElapsedTime, Time.inttopretty(self.ElapsedTime)))
return "".join(timelist)
def get_filestats_string(self):
"""Return portion of statistics string about files and bytes"""
def fileline(stat_file_pair):
"""Return zero or one line of the string"""
attr, in_bytes = stat_file_pair
val = self.get_stat(attr)
if val is None: return ""
if in_bytes:
return "%s %s (%s)\n" % (attr, val,
else: return "%s %s\n" % (attr, val)
return "".join(map(fileline, self.stat_file_pairs))
filelist = ["%s %s\n" % (attr, self.get_stat(attr))
for attr in self.stat_file_attrs
if self.get_stat(attr) is not None]
return "".join(timelist + filelist)
def get_byte_summary_string(self, byte_count):
"""Turn byte count into human readable string like "7.23GB" """
for abbrev_bytes, abbrev_string in self.byte_abbrev_list:
if byte_count >= abbrev_bytes:
# Now get 3 significant figures
abbrev_count = float(byte_count)/abbrev_bytes
if abbrev_count >= 100: precision = 0
elif abbrev_count >= 10: precision = 1
else: precision = 2
return "%%.%df %s" % (precision, abbrev_string) \
% (abbrev_count,)
byte_count = round(byte_count)
if byte_count == 1: return "1 byte"
else: return "%d bytes" % (byte_count,)
def get_stats_logstring(self, title):
"""Like get_stats_string, but add header and footer"""
header = "-------------[ %s ]-------------" % title
header = "--------------[ %s ]--------------" % title
footer = "-" * len(header)
return "%s\n%s%s\n" % (header, self.get_stats_string(), footer)
def init_stats_from_string(self, s):
def set_stats_from_string(self, s):
"""Initialize attributes from string, return self for convenience"""
def error(line): raise StatsException("Bad line '%s'" % line)
......@@ -91,7 +163,7 @@ class StatsObj:
def read_stats_from_rp(self, rp):
"""Set statistics from rpath, return self for convenience"""
fp ="r")
return self
......@@ -162,22 +234,96 @@ class StatsITR(IterTreeReducer, StatsObj):
self.ChangedFiles += 1
self.ChangedSourceSize += mirror_dsrp.getsize()
self.ChangedMirrorSize += self.mirror_base_size
self.IncrementFileSize += inc_rp and inc_rp.getsize() or 0
if inc_rp:
self.IncrementFiles += 1
self.IncrementFileSize += inc_rp.getsize()
else: # new file was created
self.NewFiles += 1
self.NewFileSize += mirror_dsrp.getsize()
self.IncrementFileSize += inc_rp and inc_rp.getsize() or 0
if inc_rp:
self.IncrementFiles += 1
self.IncrementFileSize += inc_rp.getsize()
if self.mirror_base_exists: # file was deleted from mirror
self.MirrorFiles += 1
self.MirrorFileSize += self.mirror_base_size
self.DeletedFiles += 1
self.DeletedFileSize += self.mirror_base_size
self.IncrementFileSize += inc_rp and inc_rp.getsize() or 0
if inc_rp:
self.IncrementFiles += 1
self.IncrementFileSize += inc_rp.getsize()
def add_file_stats(self, subinstance):
"""Add all file statistics from subinstance to current totals"""
for attr in self.stat_file_attrs:
self.get_stat(attr) + subinstance.get_stat(attr))
class Stats:
"""Misc statistics methods, pertaining to dir and session stat files"""
# This is the RPath of the directory statistics file, and the
# associated open file. It will hold a line of statistics for
# each directory that is backed up.
_dir_stats_rp = None
_dir_stats_fp = None
# This goes at the beginning of the directory statistics file and
# explains the format.
_dir_stats_header = """# rdiff-backup directory statistics file
# Each line is in the following format:
# RelativeDirName %s
""" % " ".join(StatsObj.stat_file_attrs)
def open_dir_stats_file(cls):
"""Open directory statistics file, write header"""
assert not cls._dir_stats_fp, "Directory file already open"
if Globals.compression: suffix = "data.gz"
else: suffix = "data"
cls._dir_stats_rp = Inc.get_inc(Globals.rbdir.append(
"directory_statistics"), Time.curtime, suffix)
if cls._dir_stats_rp.lstat():
Log("Warning, statistics file %s already exists, appending", 2)
cls._dir_stats_fp ="ab",
else: cls._dir_stats_fp = \"wb", Globals.compression)
def write_dir_stats_line(cls, statobj, index):
"""Write info from statobj about rpath to statistics file"""
cls._dir_stats_fp.write(statobj.get_stats_line(index) +"\n")
def close_dir_stats_file(cls):
"""Close directory statistics file if its open"""
if cls._dir_stats_fp:
cls._dir_stats_fp = None
def write_session_statistics(cls, statobj):
"""Write session statistics into file, log"""
stat_inc = Inc.get_inc(Globals.rbdir.append("session_statistics"),
Time.curtime, "data")
statobj.StartTime = Time.curtime
statobj.EndTime = time.time()
# include hardlink data and dir stats in size of increments
if Globals.preserve_hardlinks and Hardlink.final_inc:
# include hardlink data in size of increments
statobj.IncrementFiles += 1
statobj.IncrementFileSize += Hardlink.final_inc.getsize()
if cls._dir_stats_rp and cls._dir_stats_rp.lstat():
statobj.IncrementFiles += 1
statobj.IncrementFileSize += cls._dir_stats_rp.getsize()
if Globals.print_statistics:
message = statobj.get_stats_logstring("Session statistics")
......@@ -95,7 +95,9 @@ class Time:
if seconds == 1: partlist.append("1 second")
elif not partlist or seconds > 1:
if isinstance(seconds, int) or isinstance(seconds, long):
partlist.append("%s seconds" % seconds)
else: partlist.append("%.2f seconds" % seconds)
return " ".join(partlist)
def intstringtoseconds(cls, interval_string):
