Commit 89af0938 authored by Hugo H. Maia Vieira's avatar Hugo H. Maia Vieira

Refactor eliminating _relevantParagraphList


git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk/utils@41267 20353a03-c40f-0410-a6d1-a30d3c3de9de
parent 7b951d3f
...@@ -42,17 +42,6 @@ class OOGranulate(object): ...@@ -42,17 +42,6 @@ class OOGranulate(object):
def __init__(self, file, source_format): def __init__(self, file, source_format):
self.document = OdfDocument(file, source_format) self.document = OdfDocument(file, source_format)
def _relevantParagraphList(self):
"""Returns a list with the relevants lxml.etree._Element 'p' tags of
self.document.parsed_content. It exclude the 'p' inside 'draw:frame'."""
# XXX: this algorithm could be improved to not iterate with the file twice
# and probably get all relevant paragraph list by a single xpath call
all_p_list = self.document.parsed_content.xpath('//text:p',
namespaces=self.document.parsed_content.nsmap)
draw_p_list = self.document.parsed_content.xpath('//draw:frame//text:p',
namespaces=self.document.parsed_content.nsmap)
return [x for x in all_p_list if x not in draw_p_list]
def getTableItemList(self, file): def getTableItemList(self, file):
"""Returns the list of table IDs in the form of (id, title).""" """Returns the list of table IDs in the form of (id, title)."""
raise NotImplementedError raise NotImplementedError
...@@ -88,9 +77,12 @@ class OOGranulate(object): ...@@ -88,9 +77,12 @@ class OOGranulate(object):
"""Returns the list of paragraphs in the form of (id, class) where class """Returns the list of paragraphs in the form of (id, class) where class
may have special meaning to define TOC/TOI.""" may have special meaning to define TOC/TOI."""
key = '{urn:oasis:names:tc:opendocument:xmlns:text:1.0}style-name' key = '{urn:oasis:names:tc:opendocument:xmlns:text:1.0}style-name'
relevant_paragraph_list = self.document.parsed_content.xpath(
'//text:p[not(ancestor::draw:frame)]',
namespaces=self.document.parsed_content.nsmap)
id = 0 id = 0
paragraph_list = [] paragraph_list = []
for p in self._relevantParagraphList(): for p in relevant_paragraph_list:
paragraph_list.append((id, p.attrib[key])) paragraph_list.append((id, p.attrib[key]))
id += 1 id += 1
return paragraph_list return paragraph_list
...@@ -98,7 +90,10 @@ class OOGranulate(object): ...@@ -98,7 +90,10 @@ class OOGranulate(object):
def getParagraphItem(self, paragraph_id): def getParagraphItem(self, paragraph_id):
"""Returns the paragraph in the form of (text, class).""" """Returns the paragraph in the form of (text, class)."""
try: try:
paragraph = self._relevantParagraphList()[paragraph_id] relevant_paragraph_list = self.document.parsed_content.xpath(
'//text:p[not(ancestor::draw:frame)]',
namespaces=self.document.parsed_content.nsmap)
paragraph = relevant_paragraph_list[paragraph_id]
text = ''.join(paragraph.xpath('.//text()', namespaces=paragraph.nsmap)) text = ''.join(paragraph.xpath('.//text()', namespaces=paragraph.nsmap))
key = '{urn:oasis:names:tc:opendocument:xmlns:text:1.0}style-name' key = '{urn:oasis:names:tc:opendocument:xmlns:text:1.0}style-name'
p_class = paragraph.attrib[key] p_class = paragraph.attrib[key]
......
...@@ -84,14 +84,6 @@ class TestOOGranulate(cloudoooTestCase): ...@@ -84,14 +84,6 @@ class TestOOGranulate(cloudoooTestCase):
obtained_image = self.oogranulate.getImage('anything.png') obtained_image = self.oogranulate.getImage('anything.png')
self.assertEquals('', obtained_image) self.assertEquals('', obtained_image)
def testRelevantParagraphList(self):
"""Test if _relevantParagraphList returns a list with 'p' excluding the 'p'
inside 'draw:frame'"""
draw_p_list = self.oogranulate.document.parsed_content.xpath(
'//draw:frame//text:p',
namespaces=self.oogranulate.document.parsed_content.nsmap)
self.assertTrue(draw_p_list not in self.oogranulate._relevantParagraphList())
def testGetParagraphItemList(self): def testGetParagraphItemList(self):
"""Test if getParagraphItemList() returns the right paragraphs list, with """Test if getParagraphItemList() returns the right paragraphs list, with
the ids always in the same order""" the ids always in the same order"""
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment