Commit 6ce884c2 authored by Nicolas Delaby's avatar Nicolas Delaby

- Performance improvement:Use iterator to compare files,

this avoid reading entirely the files if they differ during reading.
- Use parser with remove_blank_text flag to normalise output and c14n.
- Do not append tail to xupdate elements if not needed.
parent 7a94aff6
......@@ -22,17 +22,32 @@
##############################################################################
from lxml import etree
parser = etree.XMLParser(remove_blank_text=True)
import sys
import getopt
import os
from StringIO import StringIO
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
import re
import codecs
from copy import deepcopy
from interfaces.erp5diff import IERP5Diff
import zope.interface
class FileContentNotEqual(Exception):
pass
def fileComparisonIterator(file1, file2):
value1 = file1.next()
value2 = file2.next()
if value1 == value2:
yield value1, value2
else:
raise FileContentNotEqual
class ERP5Diff:
"""
Make a difference between two XML documents using XUpdate.
......@@ -52,7 +67,7 @@ class ERP5Diff:
# Declarative interfaces
zope.interface.implements(IERP5Diff,)
__version__ = 0.4
__version__ = 0.5
def __init__(self):
"""
......@@ -82,9 +97,9 @@ class ERP5Diff:
doc_list = []
for a in args:
if isinstance(a, str):
doc_list.append(etree.fromstring(a))
doc_list.append(etree.fromstring(a, parser))
else:
element_tree = etree.parse(a)
element_tree = etree.parse(a, parser)
doc_list.append(element_tree.getroot())
return doc_list
......@@ -252,7 +267,7 @@ class ERP5Diff:
for child in element:
clone_node = deepcopy(child)
child_element.append(clone_node)
if self._hasChildren(child_element):
if self._hasChildren(child_element) and element.text is not None:
child_element[-1].tail = element.text
else:
child_element.text = element.text
......@@ -393,17 +408,25 @@ class ERP5Diff:
new_candidate_list = new_list[:]
for old_element in old_list:
old_tree = etree.fromstring(etree.tostring(old_element)).getroottree()
f = StringIO()
old_tree.write_c14n(f)
old_C14n = f.getvalue()
old_c14n = StringIO()
old_tree.write_c14n(old_c14n)
old_c14n.seek(0)
for new_element in new_list:
if new_element not in new_candidate_list:
continue
new_tree = etree.fromstring(etree.tostring(new_element)).getroottree()
f = StringIO()
new_tree.write_c14n(f)
new_C14n = f.getvalue()
if old_C14n == new_C14n:
new_c14n = StringIO()
new_tree.write_c14n(new_c14n)
new_c14n.seek(0)
file_equality = True
try:
#Use generator to avoid reading file entirely
#Stop iteration at first difference
list(fileComparisonIterator(old_c14n, new_c14n))
except FileContentNotEqual:
file_equality = False
old_c14n.seek(0)
if file_equality:
if new_element in new_candidate_list:
new_candidate_list.remove(new_element)
if old_element in old_candidate_list:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment