pax_global_header 0000666 0000000 0000000 00000000064 14310341301 0014500 g ustar 00root root 0000000 0000000 52 comment=6561c395cf31f759380974607be1ca73089a8955
erp5diff-0.8.1.7/ 0000775 0000000 0000000 00000000000 14310341301 0013357 5 ustar 00root root 0000000 0000000 erp5diff-0.8.1.7/CHANGES.txt 0000664 0000000 0000000 00000004760 14310341301 0015177 0 ustar 00root root 0000000 0000000 0.8.1.7 (2015/04/23)
--------------------
* Fix a regression that was introduced in 0.8.1.6.
0.8.1.6 (2015/04/23)
--------------------
* Disable _removeStrictEqualsSubNodeList that can make a wrong result
0.8.1.5 (2011/08/08)
--------------------
* Fix rst syntax
* Improve Handling of mixed content
0.8.1.4 (2011/08/05)
--------------------
* Tail text nodes was not detected (...blablabla...)
* fix import issue of main() function
0.8.1.3 (2011/01/25)
--------------------
* add long_description, improve README
* add missing namespace declaration of egg
0.8.1.2 (2011/01/25)
--------------------
* [fix] installation of egg
0.8.1.1 (2011/01/25)
--------------------
* [Fix] position starts to 1 [Nicolas Delaby]
version 0.8.1 Nicolas Delaby
============================
Bug Fix
--------
* Some nodes stay orphans if they are replaced by another one and followed
by a modification (test 30)
* Exclude comments or processing instruction as sibling node
version 0.8 Nicolas Delaby
==========================
Features
--------
* Include 'gid' in attributes allowed to build an xpath expression
with a unique identifier.
* Use better algorithm to compare two xml nodes (faster).
Bug Fix
-------
* In node comparaison discard text nodes with only withe-spaces.
* Fix relative calculation of position for xupdate:insert-before nodes
* Add namespace declaration on xupdate nodes which are using
prefix in builded xpath expression.
version 0.7 Nicolas Delaby
==========================
Bug fix
-------
* Nodes whose position change were discarded.
* Declare namespaces used in xpath expression on xupdate output.
version 0.6 Nicolas Delaby
==========================
Bug Fix
-------
* Fix generated xpath expression, the root element was missing.
version 0.5 Nicolas Delaby
==========================
Features
--------
* Add support of namespaces
* Support xupdate:insert-after
version 0.4 Nicolas Delaby
==========================
Features
--------
* Change output of xupdate:append by adding Implied attribute child
version 0.3 Nicolas Delaby
==========================
Bug Fix
-------
* Append position in xpath expression when value of id attribute is not unique regarding is sibling
version 0.2 Nicolas Delaby
==========================
Bug Fix
-------
* Position in xpath starts from 1
version 0.1 Tatuya Kamada
=========================
Features
--------
* ERP5diff implemented with ElemenTree (lxml)
version 0 Yoshinori Okuji
=========================
* initial ERP5diff with DOM API (minidom)
erp5diff-0.8.1.7/MAINTAINERS.txt 0000664 0000000 0000000 00000000007 14310341301 0015667 0 ustar 00root root 0000000 0000000 yo
seb
erp5diff-0.8.1.7/README 0000664 0000000 0000000 00000002473 14310341301 0014245 0 ustar 00root root 0000000 0000000 Introduction
============
This is a XUpdate Generator to compare any XML document.
See for information on
XUpdate.
Installation
============
python setup install
Test
====
python setup test
Usage
=====
Once you have installed erp5diff, you can use "erp5diff" in a shell::
erp5diff old.xml new.xml
Or in a python console::
from ERP5Diff import ERP5Diff
erp5diff = ERP5Diff()
old_xml = """
1
2
3
4
5
6
7
8
9
"""
new_xml = """
5
6
1
2
3
4
7
8
9
"""
erp5diff.compare(old_xml, new_xml)
erp5diff.output()
56
- 2003-12-04, Yoshinori OKUJI
- 2009-09-15, Tatuya Kamada
- 2009-2011, Nicolas Delaby
erp5diff-0.8.1.7/bootstrap.py 0000664 0000000 0000000 00000003477 14310341301 0015761 0 ustar 00root root 0000000 0000000 ##############################################################################
#
# Copyright (c) 2006 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""Bootstrap a buildout-based project
Simply run this script in a directory containing a buildout.cfg.
The script accepts buildout command-line options, so you can
use the -c option to specify an alternate configuration file.
$Id: bootstrap.py 77225 2007-06-29 09:20:13Z dobe $
"""
import os, shutil, sys, tempfile, urllib2
tmpeggs = tempfile.mkdtemp()
try:
import pkg_resources
except ImportError:
ez = {}
exec urllib2.urlopen('http://peak.telecommunity.com/dist/ez_setup.py'
).read() in ez
ez['use_setuptools'](to_dir=tmpeggs, download_delay=0)
import pkg_resources
cmd = 'from setuptools.command.easy_install import main; main()'
if sys.platform == 'win32':
cmd = '"%s"' % cmd # work around spawn lamosity on windows
ws = pkg_resources.working_set
assert os.spawnle(
os.P_WAIT, sys.executable, sys.executable,
'-c', cmd, '-mqNxd', tmpeggs, 'zc.buildout',
dict(os.environ,
PYTHONPATH=
ws.find(pkg_resources.Requirement.parse('setuptools')).location
),
) == 0
ws.add_entry(tmpeggs)
ws.require('zc.buildout')
import zc.buildout.buildout
zc.buildout.buildout.main(sys.argv[1:] + ['bootstrap'])
shutil.rmtree(tmpeggs)
erp5diff-0.8.1.7/buildout.cfg 0000664 0000000 0000000 00000000133 14310341301 0015664 0 ustar 00root root 0000000 0000000 [buildout]
develop = .
parts = test
[test]
recipe = zc.recipe.testrunner
eggs = erp5diff
erp5diff-0.8.1.7/setup.cfg 0000664 0000000 0000000 00000000073 14310341301 0015200 0 ustar 00root root 0000000 0000000 [egg_info]
tag_build =
tag_date = 0
tag_svn_revision = 0
erp5diff-0.8.1.7/setup.py 0000775 0000000 0000000 00000002367 14310341301 0015104 0 ustar 00root root 0000000 0000000 #! /usr/bin/env python
# -*- coding: utf-8 -*-
from setuptools import setup, find_packages
import re
api_version = re.search(r'\s*__version__\s*=\s*(\S+)',
open('src/ERP5Diff/ERP5Diff.py').read()).group(1).strip()
revision = 7
version = '%s.%s' % (api_version.replace("'", ''), revision)
def read(name):
return open(name).read()
long_description=(
read('README')
+ '\n' +
read('CHANGES.txt')
)
setup(name="erp5diff",
version=version,
description="XUpdate Generator for ERP5",
long_description=long_description,
author="Yoshinori OKUJI",
author_email="yo@nexedi.com",
url="http://www.erp5.org/",
license="GPL",
packages=find_packages('src'),
package_dir={'': 'src'},
entry_points={'console_scripts': ["erp5diff = ERP5Diff:main"]},
data_files=[('share/man/man1', ['src/erp5diff.1'])],
install_requires=['lxml'],
classifiers=['License :: OSI Approved :: GNU General Public License (GPL)',
'Operating System :: OS Independent',
'Topic :: Text Processing :: Markup :: XML',
'Topic :: Utilities'],
include_package_data=True,
zip_safe=False,
test_suite='tests',
)
erp5diff-0.8.1.7/src/ 0000775 0000000 0000000 00000000000 14310341301 0014146 5 ustar 00root root 0000000 0000000 erp5diff-0.8.1.7/src/ERP5Diff/ 0000775 0000000 0000000 00000000000 14310341301 0015452 5 ustar 00root root 0000000 0000000 erp5diff-0.8.1.7/src/ERP5Diff/ERP5Diff.py 0000664 0000000 0000000 00000071456 14310341301 0017345 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
##############################################################################
#
# Yoshinori OKUJI
#
# Copyright (C) 2003 Nexedi SARL
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ?See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ?02111-1307, USA.
#
##############################################################################
from lxml import etree
parser = etree.XMLParser(remove_blank_text=True)
import sys
import getopt
import os
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
import re
import codecs
from copy import deepcopy
from interfaces.erp5diff import IERP5Diff
import zope.interface
def isNodeEquals(old, new):
if old.tag != new.tag or old.attrib != new.attrib:
return False
if old.text != new.text or old.tail != new.tail:
return False
if len(old) != len(new):
return False
for old_child, new_child in zip(old, new):
if not isNodeEquals(old_child, new_child):
return False
return True
class ERP5Diff:
"""
Make a difference between two XML documents using XUpdate.
Use some assumptions in ERP5's data representation.
The strategy is:
1. Find a matching element among elements of the other XML document at the same depth.
2. Use the first matching element, even if there can be other better elements.
3. Assume that two elements are matching, if the tag names are identical. If either of
them has an attribute 'id', the values of the attributes 'id' also must be identical.
4. Don't use xupdate:rename for elements. It should be quite rare to rename tag names
in ERP5, and it is too complicated to support this renaming.
5. Ignore some types of nodes, such as EntityReference and Comment, because they are not
used in ERP5 XML documents.
"""
# Declarative interfaces
zope.interface.implements(IERP5Diff,)
__version__ = '0.8.1'
def __init__(self):
"""
Initialize itself.
"""
self._verbose = 0
self._result = None
self._ns = 'http://www.xmldb.org/xupdate'
def setVerbosity(self, verbose):
"""
Set the verbosity.
"""
self._verbose = verbose
def _p(self, msg):
"""
Print a message only if being verbose.
"""
if self._verbose:
sys.stderr.write(str(msg) + os.linesep)
def _makeDocList(self, *args):
"""
Make a list of Document objects.
"""
doc_list = []
for a in args:
if isinstance(a, str):
doc_list.append(etree.fromstring(a, parser))
else:
element_tree = etree.parse(a, parser)
doc_list.append(element_tree.getroot())
return doc_list
def _concatPath(self, p1, p2, separator='/'):
"""
Concatenate 'p1' and 'p2'. Add a separator between them,
only if 'p1' does not end with a separator.
"""
if p1.endswith(separator):
return p1 + p2
return p1 + separator + p2
def _getResultRoot(self):
"""
Return the root element of the result document.
"""
return self._result
#return self._result.getroottree()
def _hasChildren(self, element):
"""
Check whether the element has any children
"""
return bool(len(element))
def _getQName(self, element, attr_name):
"""Return qualified name compatible with xpath
"""
if '{' == attr_name[0]:
#This is a Qualified attribute
index = attr_name.index('}')
local_name = attr_name[index+1:]
namespace_uri = attr_name[1:index]
if namespace_uri == 'http://www.w3.org/XML/1998/namespace':
prefix = 'xml'
else:
prefix = [t[0] for t in element.nsmap.iteritems() if t[1] == namespace_uri][0]
return '%s:%s' % (prefix, local_name,), namespace_uri
else:
return attr_name, None
def _xupdateAppendAttributes(self, attr_dict, path, nsmap=None):
"""
Append attrib to the element at 'path'.
"""
root = self._getResultRoot()
append_element = etree.Element('{%s}append' % self._ns, nsmap=root.nsmap)
append_element.attrib['select'] = path
key_list = attr_dict.keys()
key_list.sort()
for name in key_list:
val = attr_dict[name]
attr_element = etree.Element('{%s}attribute' % self._ns, nsmap=nsmap)
name, namespace_uri = name
attr_element.attrib['name'] = name
if namespace_uri:
attr_element.attrib['namespace'] = namespace_uri
attr_element.text = val
append_element.append(attr_element)
root.append(append_element)
def _xupdateRemoveAttribute(self, name, path, nsmap=None):
"""
Remove an attribute from the element at 'path'.
"""
root = self._getResultRoot()
remove_element = etree.Element('{%s}remove' % self._ns, nsmap=nsmap)
remove_element.attrib['select'] = self._concatPath(path, 'attribute::' + name[0])
root.append(remove_element)
def _xupdateUpdateAttribute(self, name, val, path, nsmap=None):
"""
Update the value of an attribute of the element at 'path'.
"""
root = self._getResultRoot()
update_element = etree.Element('{%s}update' % self._ns, nsmap=nsmap)
update_element.attrib['select'] = self._concatPath(path, 'attribute::' + name[0])
update_element.text = val
root.append(update_element)
def _xupdateRenameElement(self, name, path, nsmap=None):
"""
Rename an existing element at 'path'.
"""
root = self._getResultRoot()
rename_element = etree.Element('{%s}rename' % self._ns, nsmap=nsmap)
rename_element.attrib['select'] = path
rename_element.text = name
root.append(rename_element)
def _xupdateUpdateElement(self, element, path, nsmap=None):
"""
Update the contents of an element at 'path' to that of 'element'.
"""
root = self._getResultRoot()
update_element = etree.Element('{%s}update' % self._ns, nsmap=nsmap)
update_element.attrib['select'] = path
if self._hasChildren(element):
for child in element:
clone_node = deepcopy(child)
update_element.append(clone_node)
else:
update_element.text = element.text
root.append(update_element)
def _xupdateUpdateTextNode(self, element, text, path, nsmap=None):
"""Update only text attribute
"""
root = self._getResultRoot()
update_element = etree.Element('{%s}update' % self._ns, nsmap=nsmap)
update_element.attrib['select'] = path
update_element.text = text
root.append(update_element)
def _xupdateRemoveElement(self, path, nsmap=None):
"""
Remove an element at 'path'.
"""
root = self._getResultRoot()
remove_element = etree.Element('{%s}remove' % self._ns, nsmap=nsmap)
remove_element.attrib['select'] = path
root.append(remove_element)
def _xupdateAppendElements(self, element_list, path):
"""
Append elements to the element at 'path'.
xupdate:append
xupdate:insert-before
xupdate:insert-after
"""
root = self._getResultRoot()
if not element_list:
return
parent_element = element_list[0].getparent()
len_total_child_list = len(parent_element)
last_append_element = None
for element in element_list:
# get only elements not something else (PI and comments are ignored)
# XXX May be support of PI and Comments should be added
# in this case fallback to previous code
# relative_next = element.getnext()
relative_next_list = element.xpath('following-sibling::*[1]')
if relative_next_list:
relative_next = relative_next_list[0]
else:
relative_next = None
relative_previous_list = element.xpath('preceding-sibling::*[1]')
if relative_previous_list:
relative_previous = relative_previous_list[0]
else:
relative_previous = None
if last_append_element is not None and relative_previous in element_list:
#reuse same container as preceding
append_element = last_append_element
elif relative_next is not None and relative_next not in element_list:
append_element = etree.SubElement(root, '{%s}insert-before' % self._ns, nsmap=element.nsmap)
path_list = self._makeRelativePathList([relative_next], before=1)
next_sibling_path = self._concatPath(path, path_list[0])
append_element.attrib['select'] = next_sibling_path
elif relative_previous is not None and relative_previous not in element_list:
append_element = etree.SubElement(root, '{%s}insert-after' % self._ns, nsmap=element.nsmap)
path_list = self._makeRelativePathList([relative_previous])
preceding_sibling_path = self._concatPath(path, path_list[0])
append_element.attrib['select'] = preceding_sibling_path
else:
#xupdate:append by default
append_element = etree.SubElement(root, '{%s}append' % self._ns, nsmap=element.nsmap)
if parent_element.index(element) == 0:
child = 'first()'
elif parent_element.index(element) == (len_total_child_list -1):
child = 'last()'
else:
child = '%d' % (len_total_child_list - parent_element.index(element) + 1)
append_element.attrib.update({'select': path,
'child': child})
child_element = etree.SubElement(append_element, '{%s}element' % self._ns, nsmap=root.nsmap)
child_element.attrib['name'] = element.xpath('name()')
namespace_uri = element.xpath('namespace-uri()')
if namespace_uri:
child_element.attrib['namespace'] = namespace_uri
attr_map = element.attrib
for name, value in attr_map.items():
attr_element = etree.SubElement(child_element, '{%s}attribute' % self._ns, nsmap=child_element.nsmap)
name, namespace_uri = self._getQName(element, name)
attr_element.attrib['name'] = name
if namespace_uri:
attr_element.attrib['namespace'] = namespace_uri
attr_element.text = value
for child in element:
clone_node = deepcopy(child)
child_element.append(clone_node)
if self._hasChildren(child_element) and element.text is not None:
child_element[-1].tail = element.text
else:
child_element.text = element.text
last_append_element = append_element
def _xupdateMoveElements(self, misplaced_node_dict, path, nsmap=None):
"""
"""
root = self._getResultRoot()
to_remove_node_list = []
for element_list in misplaced_node_dict.values():
for element_tuple in element_list:
to_remove_node_list.append(element_tuple[0])
child_path_list = self._makeRelativePathList(to_remove_node_list)
for child_path in child_path_list:
to_remove_path = self._concatPath(path, child_path)
self._xupdateRemoveElement(to_remove_path)
for previous, element_tuple_list in misplaced_node_dict.items():
if previous is None:
append_element = etree.SubElement(root, '{%s}append' % self._ns, nsmap=nsmap)
append_element.attrib['child'] = 'first()'
else:
append_element = etree.SubElement(root, '{%s}insert-after' % self._ns, nsmap=nsmap)
path_list = self._makeRelativePathList([previous])
preceding_sibling_path = self._concatPath(path, path_list[0])
append_element.attrib['select'] = preceding_sibling_path
for element_tuple in element_tuple_list:
element = element_tuple[1]
child_element = etree.SubElement(append_element, '{%s}element' % self._ns, nsmap=root.nsmap)
child_element.attrib['name'] = element.xpath('name()')
namespace_uri = element.xpath('namespace-uri()')
if namespace_uri:
child_element.attrib['namespace'] = namespace_uri
attr_map = element.attrib
for name, value in attr_map.items():
attr_element = etree.SubElement(child_element, '{%s}attribute' % self._ns, nsmap=child_element.nsmap)
name, namespace_uri = self._getQName(element, name)
attr_element.attrib['name'] = name
if namespace_uri:
attr_element.attrib['namespace'] = namespace_uri
attr_element.text = value
for child in element:
clone_node = deepcopy(child)
child_element.append(clone_node)
if self._hasChildren(child_element) and element.text is not None:
child_element[-1].tail = element.text
else:
child_element.text = element.text
def _testElements(self, element1, element2):
"""
Test if two given elements are matching. Matching does not mean that they are identical.
"""
# Make sure that they are elements.
if type(element1) != type(element2) or type(element1) != etree._Element:
return False
if element1.tag != element2.tag:
return False
id_list = []
for attr_map in (element1.attrib, element2.attrib):
if 'id' in attr_map:
id_list.append(attr_map['id'])
if len(id_list) == 0:
return True
if len(id_list) == 1:
return False
return (id_list[0] == id_list[1])
def _testAttributes(self, element1, element2, path):
"""
Test attrib of two given elements. Add differences, if any.
"""
# Make a list of dictionaries of the attributes.
dict_list = []
for element in (element1, element2):
d = {}
for name, value in element.attrib.items():
name, namespace_uri = self._getQName(element, name)
d[(name, namespace_uri)] = value
dict_list.append(d)
dict1, dict2 = dict_list
# Find all added or removed or changed attrib.
#sort key list to stick expected output
key_list1 = dict1.keys()
key_list1.sort()
for name1 in key_list1:
val1 = dict1[name1]
if name1 in dict2:
if val1 != dict2[name1]:
# The value is different.
self._xupdateUpdateAttribute(name1, dict2[name1], path, nsmap=element.nsmap)
# Mark this attribute.
dict2[name1] = None
else:
# This attribute is removed.
self._xupdateRemoveAttribute(name1, path, nsmap=element.nsmap)
d = {}
for name2, val2 in dict2.iteritems():
if val2 is not None:
# This attribute is added.
d[name2] = val2
if d != {}:
self._xupdateAppendAttributes(d, path, nsmap=element.nsmap)
def _checkEmptiness(self, element):
"""
Check if an element has Element or Text nodes
"""
for child in element:
if type(child) == etree._Element:
return False
if element.text is not None:
return False
return True
def _checkIgnoreText(self, element):
"""
Determine if text should be ignored by heuristics,
because ERP5 does not define any schema at the moment.
We ignore white-space text nodes between elements.
pseudo code:
tree = parse("
")
tree.node.text == '\n '
"""
return not [text for text in element.xpath('text()') if text.strip()]
def _makeRelativePathList(self, element_list, before=0):
"""
Make a list of relative paths from a list of elements.
"""
path_list = []
for element in element_list:
# Check if this element has an attribute 'id'.s
id_val = None
attr_map = element.attrib
for name, value in attr_map.items():
if name in ('id', 'gid',):
id_val = value
id_of_id = name
break
if id_val is not None:
# If an attribute 'id' or 'gid' is present, uses the attribute for convenience.
position_predicate = ''
len_all_similar_sibling = len(element.xpath('../*[@%s = "%s"]' %\
(id_of_id, id_val)))
if len_all_similar_sibling > 1:
position = len_all_similar_sibling - \
element.xpath('count(following-sibling::%s[@%s = "%s"])' %\
(element.xpath('name()'), id_of_id, id_val),
namespaces=element.nsmap)
position_predicate = '[%i]' % position
path_list.append("%s[@%s='%s']%s" % (element.xpath('name()'), id_of_id,
id_val, position_predicate,))
# Increase the count, for a case where other elements with the same tag name do not have
# 'id' attrib.
else:
len_all_similar_sibling = len(element.findall('../%s' % element.tag))
if len_all_similar_sibling > 1:
position = len_all_similar_sibling - len(list(element.itersiblings(tag=element.tag)))
path_list.append('%s[%d]' % (element.xpath('name()'), position-before or 1))
else:
path_list.append(element.xpath('name()'))
return path_list
def _aggregateElements(self, element):
"""
Aggregate child elements of an element into a list.
"""
return [child for child in element if type(child) == etree._Element]
def _aggregateText(self, element):
"""
Aggregate child text nodes of an element into a single string.
"""
return '%s' % element.xpath('string(.)')
def _removeStrictEqualsSubNodeList(self, old_list, new_list):
"""Remove inside list all elements which are similar
by using c14n serialisation
This script returns the same list of nodes whithout twins from other list
and a dictionary with nodes whose position has changed.
misplaced_node_dict :
key = anchor_node (node from which the moving node_list will be append)
value = list of tuple:
-old_element (to remove)
-new_element (to insert)
"""
# XXX we do nothing here for now
return old_list, new_list, {}
# XXX because the implementation below can return a wrong result
old_candidate_list = old_list[:]
new_candidate_list = new_list[:]
misplaced_node_dict = {}
misplaced_node_dict_after = {}
misplaced_node_dict_before = {}
old_new_index_mapping = {}
for old_index, old_element in enumerate(old_list):
if old_element not in old_candidate_list:
continue
for new_element in new_list:
new_index = new_list.index(new_element)
if new_element not in new_candidate_list:
continue
node_equality = isNodeEquals(old_element, new_element)
if node_equality:
index_key_on_new_tree = new_element.getparent().index(new_element)
old_new_index_mapping[index_key_on_new_tree] = old_element
new_start = new_index + 1
if new_element in new_candidate_list:
new_candidate_list.remove(new_element)
if old_element in old_candidate_list:
old_candidate_list.remove(old_element)
if old_index == new_index:
break
elif old_index < new_index:
misplaced_node_dict = misplaced_node_dict_after
else:
misplaced_node_dict = misplaced_node_dict_before
previous_new_element = new_element.getprevious()
for key, preceding_value_list in misplaced_node_dict.items():
for element_tuple in preceding_value_list:
if previous_new_element == element_tuple[1]:
#reuse the same previous as much as possible
if key is not None:
previous_new_element = previous_new_element.getparent()[key]
else:
previous_new_element = None
break
if previous_new_element is not None:
index_key_on_new_tree = previous_new_element.getparent().index(previous_new_element)
else:
index_key_on_new_tree = None
misplaced_node_dict.setdefault(index_key_on_new_tree, []).append((old_element, new_element))
break
# Chosse the lighter one to minimise diff
after_dict_weight = sum(len(i) for i in misplaced_node_dict_after.values())
before_dict_weight = sum(len(i) for i in misplaced_node_dict_before.values())
if after_dict_weight > before_dict_weight and before_dict_weight:
misplaced_node_dict = misplaced_node_dict_before
elif after_dict_weight <= before_dict_weight and after_dict_weight:
misplaced_node_dict = misplaced_node_dict_after
else:
misplaced_node_dict = {}
for k, v in misplaced_node_dict.items():
if k in old_new_index_mapping:
value = misplaced_node_dict[k]
misplaced_node_dict[old_new_index_mapping[k]] = value
if k is not None:
#if the element which suppose to support insert-after does not exist in old_tree,
#its just an added node not an moving
#None means that the node will become first child, so keep it
del misplaced_node_dict[k]
return old_candidate_list, new_candidate_list, misplaced_node_dict
def _compareChildNodes(self, old_element, new_element, path):
"""
Compare children of two elements, and add differences into the result, if any.
Call itself recursively, if these elements have grandchilden.
"""
self._p("Comparing %s with %s at %s..." % (repr(old_element), repr(new_element), path))
# First, determine if they are empty.
old_is_empty = self._checkEmptiness(old_element)
new_is_empty = self._checkEmptiness(new_element)
if old_is_empty and new_is_empty:
# Nothing to do.
self._p("Both are empty.")
pass
else:
# Second, determine if text should be ignored.
old_ignore_text = self._checkIgnoreText(old_element)
new_ignore_text = self._checkIgnoreText(new_element)
if old_ignore_text != new_ignore_text:
# This means that the semantics of this element is quite different.
self._p("One of them has only text and the other does not, so just update all the contents.")
self._xupdateUpdateElement(new_element, path, nsmap=new_element.nsmap)
elif not old_ignore_text and not len(old_element):
# The contents are only text.
self._p("Both have only text.")
old_text = self._aggregateText(old_element)
new_text = self._aggregateText(new_element)
if old_text != new_text:
self._p("They differ, so update the elements.")
self._xupdateUpdateElement(new_element, path, nsmap=new_element.nsmap)
else:
# The contents are elements.
self._p("Both have elements.")
old_list = self._aggregateElements(old_element)
new_list = self._aggregateElements(new_element)
old_list, new_list, misplaced_node_dict = self._removeStrictEqualsSubNodeList(old_list, new_list)
path_list = self._makeRelativePathList(old_list)
new_start = 0
new_len = len(new_list)
# Usefull set to detect orphan in new_list
new_object_left_index_set = set()
for old_node, node_path in zip(old_list, path_list):
child_path = self._concatPath(path, node_path)
for new_current in range(new_start, new_len):
new_node = new_list[new_current]
if self._testElements(old_node, new_node):
self._testAttributes(old_node, new_node, child_path)
if not old_ignore_text and len(old_element):
# Mixed Content
if old_node.text and old_node.text.strip() and new_node.text\
and new_node.text.strip() and old_node.text != new_node.text:
text_path = child_path + '/text()[%i]' % (new_node.getparent().index(new_node))
self._xupdateUpdateTextNode(new_node, new_node.text,
text_path, nsmap=new_element.nsmap)
if old_node.tail and old_node.tail.strip() and new_node.tail\
and new_node.tail.strip() and old_node.tail != new_node.tail:
position = 1
if new_node.getparent().text:
position += 1
position += len([sibling for sibling in old_node.itersiblings(preceding=True) if sibling.tail])
text_path = path + '/text()[%i]' % (position)
self._xupdateUpdateTextNode(new_node, new_node.tail,
text_path, nsmap=new_element.nsmap)
self._compareChildNodes(old_node, new_node, child_path)
new_start = new_current + 1
if new_current in new_object_left_index_set:
new_object_left_index_set.remove(new_current)
break
else:
new_object_left_index_set.add(new_current)
else:
# There is no matching node. So this element must be removed.
self._xupdateRemoveElement(child_path, old_node.nsmap)
if new_len > new_start:
# There are remaining nodes in the new children.
self._xupdateAppendElements(new_list[new_start:new_len], path)
# if New children are allready added, clean up new_object_left_index_set
[new_object_left_index_set.remove(index)\
for index in range(new_start, new_len) if\
index in new_object_left_index_set]
if new_object_left_index_set:
self._xupdateAppendElements([new_list[index] for index \
in new_object_left_index_set], path)
if misplaced_node_dict:
self._xupdateMoveElements(misplaced_node_dict, path)
def compare(self, old_xml, new_xml):
"""
Compare two given XML documents.
If an argument is a string, it is assumed to be a XML document itself.
Otherwise, it is assumed to be a file object which contains a XML document.
"""
old_doc, new_doc = self._makeDocList(old_xml, new_xml)
old_root_element = old_doc.getroottree().getroot()
new_root_element = new_doc.getroottree().getroot()
try:
if self._result is not None:
self._result = None
self._result = etree.Element('{%s}modifications' % self._ns, nsmap={'xupdate': self._ns})
self._result.set('version', '1.0')
if self._testElements(old_root_element, new_root_element):
qname = old_root_element.xpath('name()')
self._testAttributes(old_root_element, new_root_element, '/%s' % qname)
self._compareChildNodes(old_root_element, new_root_element, '/%s' % qname)
else:
# These XML documents seem to be completely different...
if old_root_element.tag != new_root_element.tag:
nsmap = old_root_element.nsmap
nsmap.update(new_root_element.nsmap)
self._xupdateRenameElement(new_root_element.xpath('name()'), '/%s' % old_root_element.xpath('name()'), nsmap)
qname = new_root_element.xpath('name()')
self._testAttributes(old_root_element, new_root_element, '/%s' % qname)
self._compareChildNodes(old_root_element, new_root_element, '/%s' % qname)
finally:
del old_doc
del new_doc
def output(self, output_file=None):
"""
Output the result of parsing XML documents to 'output_file'.
If it is not specified, stdout is assumed.
"""
if output_file is None:
output_file = sys.stdout
# stream
xml = etree.tostring(self._result, encoding='utf-8', pretty_print=True)
output_file.write(xml)
def outputString(self):
"""
Return the result as a string object.
"""
io = StringIO()
self.output(io)
ret = io.getvalue()
io.close()
return ret
def main():
"""
The main routine of ERP5Diff.
"""
try:
opts, args = getopt.getopt(sys.argv[1:], "ho:v", ["help", "output=", "verbose"])
except getopt.GetoptError, msg:
print msg
print "Try ``erp5diff --help'' for more information."
sys.exit(2)
output = None
verbose = 0
for o, a in opts:
if o == "-v":
verbose = 1
elif o in ("-h", "--help"):
print '''Usage: erp5diff [OPTION]... OLD_XML NEW_XML
Make a difference between two XML documents in XUpdate format.
-h, --help display this message and exit
-o, --output=FILE output the result to the file FILE
-v, --verbose print verbose messages
Report bugs to .'''
sys.exit()
elif o in ("-o", "--output"):
output = a
if len(args) != 2:
if len(args) > 2:
print "Too many arguments."
else:
print "Too few arguments."
print "Try ``erp5diff --help'' for more information."
sys.exit(2)
d = ERP5Diff()
d.setVerbosity(verbose)
old_xml = open(args[0])
new_xml = open(args[1])
d.compare(old_xml, new_xml)
old_xml.close()
new_xml.close()
try:
if output is not None:
file = open(output, 'w')
else:
file = None
d.output(file)
except:
if output is not None:
file.close()
os.remove(output)
raise
else:
if file is not None:
file.close()
sys.exit()
if __name__ == '__main__':
main()
erp5diff-0.8.1.7/src/ERP5Diff/__init__.py 0000664 0000000 0000000 00000000171 14310341301 0017562 0 ustar 00root root 0000000 0000000 # for backward compatibility with old import path
from ERP5Diff import main
from ERP5Diff import ERP5Diff as ERP5Diff
erp5diff-0.8.1.7/src/__init__.py 0000664 0000000 0000000 00000000416 14310341301 0016260 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
# See http://peak.telecommunity.com/DevCenter/setuptools#namespace-packages
try:
__import__('pkg_resources').declare_namespace(__name__)
except ImportError:
from pkgutil import extend_path
__path__ = extend_path(__path__, __name__)
erp5diff-0.8.1.7/src/erp5diff.1 0000664 0000000 0000000 00000001723 14310341301 0015737 0 ustar 00root root 0000000 0000000 .TH ERP5DIFF 1 "4 Dec 2003" "ERP5DIFF version 0.1" Nexedi
.SH NAME
erp5diff \- find differences between two XML documents for ERP5
.SH SYNOPSIS
.B erp5diff
[\fIoptions\fR]...
.LP
.SH DESCRIPTION
ERP5Diff is a XUpdate Generator for ERP5. It takes two XML files
as input data, and generates differences between these two XML
documents in XUpdate language.
.LP
ERP5Diff depends on more or less ERP5's XML data format. So this tool
cannot be used for general purpose, but might work if your XML files
are similar to ERP5's.
.SH OPTIONS
.TP
\fB\-o\fR, \fB\-\-output\fR=\fIFILE\fR
Specify the output file. The standard output is used by default.
.TP
\fB\-h\fR, \fB\-\-help\fR
Display the usage and exit.
.TP
\fB\-v\fR, \fB\-\-verbose\fR
Print verbose messages. Only useful for debugging.
.SH AUTHOR
Yoshinori OKUJI
.SH "SEE ALSO"
\fIhttp://www.xmldb.org/xupdate/index.html\fR,
\fIhttp://www.w3.org/TR/xpath\fR,
\fIhttp://www.w3.org/TR/REC-xml\fR,
\fIhttp://erp5.org\fR
erp5diff-0.8.1.7/src/interfaces/ 0000775 0000000 0000000 00000000000 14310341301 0016271 5 ustar 00root root 0000000 0000000 erp5diff-0.8.1.7/src/interfaces/__init__.py 0000664 0000000 0000000 00000000000 14310341301 0020370 0 ustar 00root root 0000000 0000000 erp5diff-0.8.1.7/src/interfaces/erp5diff.py 0000664 0000000 0000000 00000002656 14310341301 0020360 0 ustar 00root root 0000000 0000000 from zope.interface import Interface
class IERP5Diff(Interface):
"""
Make a difference between two XML documents using XUpdate.
Use some assumptions in ERP5's data representation.
The strategy is:
1. Find a matching element among elements of the other XML document at the same depth.
2. Use the first matching element, even if there can be other better elements.
3. Assume that two elements are matching, if the tag names are identical. If either of
them has an attribute 'id', the values of the attrib 'id' also must be identical.
4. Don't use xupdate:rename for elements. It should be quite rare to rename tag names
in ERP5, and it is too complicated to support this renaming.
5. Ignore some types of nodes, such as EntityReference and Comment, because they are not
used in ERP5 XML documents.
"""
def compare(self, old_xml, new_xml):
"""
Compare two given XML documents.
If an argument is a string, it is assumed to be a XML document itself.
Otherwise, it is assumed to be a file object which contains a XML document.
"""
def output(self, output_file=None):
"""
Output the result of parsing XML documents to 'output_file'.
If it is not specified, stdout is assumed.
"""
def outputString(self):
"""
Return the result as a string object.
"""
def main():
"""
The main routine of ERP5Diff.
""" erp5diff-0.8.1.7/src/tests/ 0000775 0000000 0000000 00000000000 14310341301 0015310 5 ustar 00root root 0000000 0000000 erp5diff-0.8.1.7/src/tests/__init__.py 0000664 0000000 0000000 00000000000 14310341301 0017407 0 ustar 00root root 0000000 0000000 erp5diff-0.8.1.7/src/tests/erp5diff_test_suite.py 0000664 0000000 0000000 00000122175 14310341301 0021646 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
import unittest
import pkg_resources
from ERP5Diff import ERP5Diff
from lxml import etree
from cStringIO import StringIO
erp5diff = ERP5Diff()
class TestERP5Diff(unittest.TestCase):
"""
"""
def _assertERP5DiffWorks(self, old_xml, new_xml, expected_result_string):
"""
"""
erp5diff.compare(old_xml, new_xml)
result_tree = erp5diff._result
result_string = etree.tostring(result_tree, pretty_print=True)
self.assertEquals(result_string, expected_result_string,
'\n%s\n\n%s' % (result_string, expected_result_string))
def test_textNodes(self):
"""update the texts of the three elements
"""
old_xml = """
"""
new_xml = """
"""
expected_result_string = """description3 çsdf__sdfççç_df___&&é]]]°°°°°°Tatuya2009/08/28 19:12:24.703 GMT+9
"""
self._assertERP5DiffWorks(old_xml, new_xml, expected_result_string)
def test_one_element(self):
"""2. update one element
"""
old_xml = """
"""
new_xml = """
"""
expected_result_string = """description3éà@ $*< < -----
"""
self._assertERP5DiffWorks(old_xml, new_xml, expected_result_string)
def test_one_element_same(self):
"""3. same
"""
old_xml = """
"""
new_xml = """
"""
expected_result_string = """
"""
self._assertERP5DiffWorks(old_xml, new_xml, expected_result_string)
def test_update_text_of_element_and_remove_another_element(self):
"""4. update the texts of the elements and remove an element
"""
old_xml = """
"""
new_xml = """
"""
expected_result_string = """description1 --- $sdfrç_sdfsçdf_oisfsopfNone
"""
self._assertERP5DiffWorks(old_xml, new_xml, expected_result_string)
def test_update_2_elements_inlcude_symbols(self):
"""5. update two elements includes some symbols
"""
old_xml = """
"""
new_xml = """
"""
expected_result_string = """description4 sdflkmooo^^^^]]]]]{{{{{{{ca
"""
self._assertERP5DiffWorks(old_xml, new_xml, expected_result_string)
def test_update_two_element_with_same_id(self):
"""6. update two date element which have same id
"""
old_xml = """
"""
new_xml = """
"""
expected_result_string = """2009/08/28 19:12:40.905 GMT+92009/08/28 19:12:40.910 GMT+9
"""
self._assertERP5DiffWorks(old_xml, new_xml, expected_result_string)
def test_insert_and_remove_elemts(self):
"""7. insert and remove elements
"""
old_xml = """
"""
new_xml = """
"""
expected_result_string = """tokenstatuya<?xml version="1.0"?><marshal><tuple><string>Owner</string></tuple></marshal>Go to the beachtokensManage portal content<?xml version="1.0"?>
"""
self._assertERP5DiffWorks(old_xml, new_xml, expected_result_string)
def test_update_nested_xml(self):
"""8. update xml in xml
"""
old_xml = """
"""
new_xml = """
"""
expected_result_string = """<?xml version="1.0"?><marshal><tuple><string>Assignee</string><string>Assignor</string><string>Associate</string><string>Auditor</string><string>Author</string><string>Manager</string><string>Owner</string></tuple></marshal>
"""
self._assertERP5DiffWorks(old_xml, new_xml, expected_result_string)
def test_rename_element(self):
"""9. rename element
"""
old_xml = """
"""
new_xml = """
"""
expected_result_string = """stringTatuyastringKamada
"""
self._assertERP5DiffWorks(old_xml, new_xml, expected_result_string)
def test_rename_root_element(self):
"""10. rename root element
"""
old_xml = """
"""
new_xml = """
"""
expected_result_string = """erp6
"""
self._assertERP5DiffWorks(old_xml, new_xml, expected_result_string)
def test_update_one_attribute(self):
"""11. Update one attribute
"""
old_xml = """
"""
new_xml = """
"""
expected_result_string = """ccc
"""
self._assertERP5DiffWorks(old_xml, new_xml, expected_result_string)
def test_update_two_attributes(self):
"""12. Update two attributes
"""
old_xml = """
"""
new_xml = """
"""
expected_result_string = """cccccc
"""
self._assertERP5DiffWorks(old_xml, new_xml, expected_result_string)
def test_update_three_attributes(self):
"""13. Update three attributes
"""
old_xml = """
"""
new_xml = """
"""
expected_result_string = """nnnnnnnnn
"""
self._assertERP5DiffWorks(old_xml, new_xml, expected_result_string)
def test_remove_one_attribute(self):
"""14. Remove one attribute
"""
old_xml = """
"""
new_xml = """
"""
expected_result_string = """
"""
self._assertERP5DiffWorks(old_xml, new_xml, expected_result_string)
def test_remove_two_attibutes(self):
"""15. Remove two attributes
"""
old_xml = """
"""
new_xml = """
"""
expected_result_string = """
"""
self._assertERP5DiffWorks(old_xml, new_xml, expected_result_string)
def test_remove_three_attributes(self):
"""16. Remove three attributes
"""
old_xml = """
"""
new_xml = """
"""
expected_result_string = """
"""
self._assertERP5DiffWorks(old_xml, new_xml, expected_result_string)
def test_append_one_attribute(self):
"""17. Append one attribute
"""
old_xml = """
"""
new_xml = """
"""
expected_result_string = """aaa
"""
self._assertERP5DiffWorks(old_xml, new_xml, expected_result_string)
def test_append_two_attributes(self):
"""18. Append two attributes
"""
old_xml = """
"""
new_xml = """
"""
expected_result_string = """aaabbb
"""
self._assertERP5DiffWorks(old_xml, new_xml, expected_result_string)
def test_append_three_attibutes(self):
"""19. Append three attributes
"""
old_xml = """
"""
new_xml = """
"""
expected_result_string = """aaabbbccc
"""
self._assertERP5DiffWorks(old_xml, new_xml, expected_result_string)
def test_remove_element_with_same_id(self):
"""20. Remove some elements that have same id
"""
old_xml = """
"""
new_xml = """
"""
expected_result_string = """2009/08/28 19:12:34.430 GMT+92009/08/28 19:12:34.428 GMT+92009/08/28 19:12:34.426 GMT+9
"""
self._assertERP5DiffWorks(old_xml, new_xml, expected_result_string)
def test_test_remove_element_with_same_id_bis(self):
"""21. Modify two elements that have same id
"""
old_xml = """
"""
new_xml = """
"""
expected_result_string = """2009/08/29 19:12:34.432 GMT+92009/08/30 19:12:34.434 GMT+92009/08/31 19:12:34.436 GMT+9
"""
self._assertERP5DiffWorks(old_xml, new_xml, expected_result_string)
def test_modify_attributes_of_sequential_objects(self):
"""22. Modify attributes of sequencial objects
"""
old_xml = """
"""
new_xml = """
"""
expected_result_string = """BC
"""
self._assertERP5DiffWorks(old_xml, new_xml, expected_result_string)
def test_nodes_with_qnames(self):
"""23. Modify nodes with Qualified Names
ERP5Diff should create xpath valid expression with correct prefix
"""
old_xml = """
"""
new_xml = """
B
"""
expected_result_string = """anyvalueBATestBTestC
"""
self._assertERP5DiffWorks(old_xml, new_xml, expected_result_string)
def test_attibutes_with_qnames(self):
"""24. Modify nodes with Qualified Names
Works on Attributes specially
"""
old_xml = """
"""
new_xml = """
"""
expected_result_string = """B
"""
self._assertERP5DiffWorks(old_xml, new_xml, expected_result_string)
def test_attibutes_with_qnames_at_root_level(self):
"""25. Modify nodes with Qualified Names at root level
Work on Attributes specially
"""
old_xml = """
"""
new_xml = """
"""
expected_result_string = """aaa:erp5B
"""
self._assertERP5DiffWorks(old_xml, new_xml, expected_result_string)
def test_reorder_nodes_to_the_end(self):
"""26. Reorder some nodes to the end of list
"""
old_xml = """
1
2
3
4
5
6
7
8
9
"""
new_xml = """
1
2
5
6
7
3
4
8
9
"""
expected_result_string = """56734
"""
self._assertERP5DiffWorks(old_xml, new_xml, expected_result_string)
def test_reorder_nodes_form_the_end(self):
"""26. Reorder some nodes from the end of list
"""
old_xml = """
1
2
3
4
5
6
7
8
9
"""
new_xml = """
1
2
7
8
3
4
5
6
9
"""
expected_result_string = """783456
"""
self._assertERP5DiffWorks(old_xml, new_xml, expected_result_string)
def test_test_reorder_nodes_at_the_beginning(self):
"""27. Reorder some nodes at the beginning
"""
old_xml = """
1
2
3
4
5
6
7
8
9
"""
new_xml = """
5
6
1
2
3
4
7
8
9
"""
expected_result_string = """561234
"""
self._assertERP5DiffWorks(old_xml, new_xml, expected_result_string)
def test_reorder_nodes_at_the_end(self):
"""28. Reorder some nodes at the end
"""
old_xml = """
1
2
3
4
5
6
7
8
9
"""
new_xml = """
1
4
5
6
7
8
9
2
3
"""
expected_result_string = """45678923
"""
self._assertERP5DiffWorks(old_xml, new_xml, expected_result_string)
def test_delete_children_with_withe_space_nodes(self):
"""29. Delete children with white-space as text nodes
"""
old_xml = """
"""
new_xml = """
"""
expected_result_string = """
"""
self._assertERP5DiffWorks(old_xml, new_xml, expected_result_string)
def test_delete_children_with_auto_closing_nodes(self):
"""29Bis. Delete childrens with auto-closing nodes
"""
old_xml = """
"""
new_xml = """
"""
expected_result_string = """
"""
self._assertERP5DiffWorks(old_xml, new_xml, expected_result_string)
def test_repalce_a_node_by_another_followed_by_modification(self):
"""30. Replace a node by another one followed by a modification
"""
old_xml = """
Ballon de Plagea5962z200.250000100.250000ball_size/s4ball_size/s5colour/blackcolour/whitetype/product
"""
new_xml = """
Ballon de Plagea5962z120.0000001357913579130ball_size/s4ball_size/s6colour/redcolour/whitetype/product
"""
expected_result_string = """120.000000ball_size/s6colour/red1357913579130
"""
self._assertERP5DiffWorks(old_xml, new_xml, expected_result_string)
def test_text_node_in_tails(self):
"""31. Check tail of elements ...blablabla...
"""
old_xml = """
blablabla
AAABBBAAABBBAAABBBBBBCCC
"""
new_xml = """
yayaya
CBBBAAADAAABBBEF
"""
expected_result_string = """yayaya
CDF
E
"""
self._assertERP5DiffWorks(old_xml, new_xml, expected_result_string)
if __name__ == '__main__':
unittest.main()