Commit 1dc85972 authored by Kevin Deldycke's avatar Kevin Deldycke

Identify each extracted table by its name.


git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@2964 20353a03-c40f-0410-a6d1-a30d3c3de9de
parent 5d70b6c7
...@@ -36,6 +36,7 @@ from Globals import InitializeClass ...@@ -36,6 +36,7 @@ from Globals import InitializeClass
from zipfile import ZipFile from zipfile import ZipFile
from zLOG import LOG from zLOG import LOG
import imghdr import imghdr
import random
...@@ -137,11 +138,12 @@ class OOoParser: ...@@ -137,11 +138,12 @@ class OOoParser:
""" """
Return a list of table-like spreadsheets (optionnaly included embedded ones) Return a list of table-like spreadsheets (optionnaly included embedded ones)
""" """
spreadsheets = [] tables = {}
spreadsheets = self.getPlainSpreadsheetsAsTable() tables = self.getPlainSpreadsheetsAsTable(no_empty_lines)
if include_embedded == True: if include_embedded == True:
spreadsheets += self.getEmbeddedSpreadsheetsAsTable(no_empty_lines) embedded_tables = self.getEmbeddedSpreadsheetsAsTable(no_empty_lines)
return spreadsheets tables = self._getTableListUnion(tables, embedded_tables)
return tables
security.declarePublic('getPlainSpreadsheetsAsDom') security.declarePublic('getPlainSpreadsheetsAsDom')
...@@ -161,11 +163,11 @@ class OOoParser: ...@@ -161,11 +163,11 @@ class OOoParser:
""" """
Return a list of plain spreadsheets from the document and transform them as table Return a list of plain spreadsheets from the document and transform them as table
""" """
tables = [] tables = {}
for spreadsheet in self.getPlainSpreadsheetsAsDom(): for spreadsheet in self.getPlainSpreadsheetsAsDom():
new_table = self.getSpreadsheetAsTable(spreadsheet, no_empty_lines) new_table = self.getSpreadsheetAsTable(spreadsheet, no_empty_lines)
if new_table != None: if new_table != None:
tables.append(new_table) tables = self._getTableListUnion(tables, new_table)
return tables return tables
...@@ -182,8 +184,8 @@ class OOoParser: ...@@ -182,8 +184,8 @@ class OOoParser:
if document: if document:
try: try:
object_content = self.reader.fromString(self.oo_files[document[3:] + '/content.xml']) object_content = self.reader.fromString(self.oo_files[document[3:] + '/content.xml'])
if object_content.getElementsByTagName("table:table"): for table in object_content.getElementsByTagName("table:table"):
spreadsheets.append(object_content) spreadsheets.append(table)
except: except:
pass pass
return spreadsheets return spreadsheets
...@@ -194,11 +196,11 @@ class OOoParser: ...@@ -194,11 +196,11 @@ class OOoParser:
""" """
Return a list of embedded spreadsheets in the document as table Return a list of embedded spreadsheets in the document as table
""" """
tables = [] tables = {}
for spreadsheet in self.getEmbeddedSpreadsheetsAsDom(): for spreadsheet in self.getEmbeddedSpreadsheetsAsDom():
new_table = self.getSpreadsheetAsTable(spreadsheet, no_empty_lines) new_table = self.getSpreadsheetAsTable(spreadsheet, no_empty_lines)
if new_table != None: if new_table != None:
tables.append(new_table) tables = self._getTableListUnion(tables, new_table)
return tables return tables
...@@ -208,11 +210,14 @@ class OOoParser: ...@@ -208,11 +210,14 @@ class OOoParser:
This method convert an OpenOffice spreadsheet to a simple table. This method convert an OpenOffice spreadsheet to a simple table.
This code is base on the oo2pt tool (http://cvs.sourceforge.net/viewcvs.py/collective/CMFReportTool/oo2pt). This code is base on the oo2pt tool (http://cvs.sourceforge.net/viewcvs.py/collective/CMFReportTool/oo2pt).
""" """
if spreadsheet == None: if spreadsheet == None or spreadsheet.nodeName != 'table:table':
return None return None
table = [] table = []
# Get the table name
table_name = spreadsheet.getAttributeNS(self.ns["table"], "name")
# Store informations on column widths # Store informations on column widths
line_number = 0 line_number = 0
for column in spreadsheet.getElementsByTagName("table:table-column"): for column in spreadsheet.getElementsByTagName("table:table-column"):
...@@ -263,13 +268,13 @@ class OOoParser: ...@@ -263,13 +268,13 @@ class OOoParser:
) )
if no_empty_lines: if no_empty_lines:
table = self._deleteTableEmptyLines(table) table = self._deleteTableEmptyLines(table)
return table return {table_name: table}
security.declarePrivate('_getTableMinimalBounds') security.declarePrivate('_getTableMinimalBounds')
def _getTableMinimalBounds(self, table): def _getTableMinimalBounds(self, table):
""" """
Calcul the minimum size of a text table Calcul the minimum size of a table
""" """
empty_lines = 0 empty_lines = 0
no_more_empty_lines = 0 no_more_empty_lines = 0
...@@ -321,7 +326,7 @@ class OOoParser: ...@@ -321,7 +326,7 @@ class OOoParser:
security.declarePrivate('_deleteTableEmptyLines') security.declarePrivate('_deleteTableEmptyLines')
def _deleteTableEmptyLines(self, table): def _deleteTableEmptyLines(self, table):
""" """
Delete table empty lines Delete table empty lines.
""" """
new_table = [] new_table = []
for line in table: for line in table:
...@@ -334,5 +339,22 @@ class OOoParser: ...@@ -334,5 +339,22 @@ class OOoParser:
return new_table return new_table
security.declarePrivate('_getTableListUnion')
def _getTableListUnion(self, list1, list2):
"""
Coerce two dict containing tables structures.
We need to use this method because a OpenOffice document can hold
several embedded spreadsheets with the same id. This explain the
use of random suffix in such extreme case.
"""
for list2_key in list2.keys():
# Generate a new table ID if needed
new_key = list2_key
while new_key in list1.keys():
new_key = list2_key + '_' + str(random.randint(1000,9999))
list1[new_key] = list2[list2_key]
return list1
InitializeClass(OOoParser) InitializeClass(OOoParser)
allow_class(OOoParser) allow_class(OOoParser)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment