Commit 89af0938 authored by Hugo H. Maia Vieira's avatar Hugo H. Maia Vieira

Refactor eliminating _relevantParagraphList


git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk/utils@41267 20353a03-c40f-0410-a6d1-a30d3c3de9de
parent 7b951d3f
......@@ -42,17 +42,6 @@ class OOGranulate(object):
def __init__(self, file, source_format):
self.document = OdfDocument(file, source_format)
def _relevantParagraphList(self):
"""Returns a list with the relevants lxml.etree._Element 'p' tags of
self.document.parsed_content. It exclude the 'p' inside 'draw:frame'."""
# XXX: this algorithm could be improved to not iterate with the file twice
# and probably get all relevant paragraph list by a single xpath call
all_p_list = self.document.parsed_content.xpath('//text:p',
namespaces=self.document.parsed_content.nsmap)
draw_p_list = self.document.parsed_content.xpath('//draw:frame//text:p',
namespaces=self.document.parsed_content.nsmap)
return [x for x in all_p_list if x not in draw_p_list]
def getTableItemList(self, file):
"""Returns the list of table IDs in the form of (id, title)."""
raise NotImplementedError
......@@ -88,9 +77,12 @@ class OOGranulate(object):
"""Returns the list of paragraphs in the form of (id, class) where class
may have special meaning to define TOC/TOI."""
key = '{urn:oasis:names:tc:opendocument:xmlns:text:1.0}style-name'
relevant_paragraph_list = self.document.parsed_content.xpath(
'//text:p[not(ancestor::draw:frame)]',
namespaces=self.document.parsed_content.nsmap)
id = 0
paragraph_list = []
for p in self._relevantParagraphList():
for p in relevant_paragraph_list:
paragraph_list.append((id, p.attrib[key]))
id += 1
return paragraph_list
......@@ -98,7 +90,10 @@ class OOGranulate(object):
def getParagraphItem(self, paragraph_id):
"""Returns the paragraph in the form of (text, class)."""
try:
paragraph = self._relevantParagraphList()[paragraph_id]
relevant_paragraph_list = self.document.parsed_content.xpath(
'//text:p[not(ancestor::draw:frame)]',
namespaces=self.document.parsed_content.nsmap)
paragraph = relevant_paragraph_list[paragraph_id]
text = ''.join(paragraph.xpath('.//text()', namespaces=paragraph.nsmap))
key = '{urn:oasis:names:tc:opendocument:xmlns:text:1.0}style-name'
p_class = paragraph.attrib[key]
......
......@@ -84,14 +84,6 @@ class TestOOGranulate(cloudoooTestCase):
obtained_image = self.oogranulate.getImage('anything.png')
self.assertEquals('', obtained_image)
def testRelevantParagraphList(self):
"""Test if _relevantParagraphList returns a list with 'p' excluding the 'p'
inside 'draw:frame'"""
draw_p_list = self.oogranulate.document.parsed_content.xpath(
'//draw:frame//text:p',
namespaces=self.oogranulate.document.parsed_content.nsmap)
self.assertTrue(draw_p_list not in self.oogranulate._relevantParagraphList())
def testGetParagraphItemList(self):
"""Test if getParagraphItemList() returns the right paragraphs list, with
the ids always in the same order"""
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment