OOoUtils.py 20.5 KB
Newer Older
Nicolas Delaby's avatar
Nicolas Delaby committed
1
# -*- coding: utf-8 -*-
Kevin Deldycke's avatar
Kevin Deldycke committed
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
##############################################################################
#
# Copyright (c) 2003-2005 Nexedi SARL and Contributors. All Rights Reserved.
#                         Kevin DELDYCKE    <kevin@nexedi.com>
#                         Guillaume MICHON  <guillaume@nexedi.com>
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsability of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# garantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
#
##############################################################################

31 32
import sys

33 34
from Acquisition import Implicit

Kevin Deldycke's avatar
Kevin Deldycke committed
35 36 37 38
from Products.PythonScripts.Utility import allow_class
from ZPublisher.HTTPRequest import FileUpload
from xml.dom import Node
from AccessControl import ClassSecurityInfo
39
from Products.ERP5Type.Globals import InitializeClass, get_request
40
from zipfile import ZipFile, ZIP_DEFLATED
41
from cStringIO import StringIO
Kevin Deldycke's avatar
Kevin Deldycke committed
42
import imghdr
43
import random
Bartek Górny's avatar
Bartek Górny committed
44
from Products.ERP5Type import Permissions
45
from zLOG import LOG, INFO, DEBUG
Kevin Deldycke's avatar
Kevin Deldycke committed
46

47
from OFS.Image import Pdata
Kevin Deldycke's avatar
Kevin Deldycke committed
48

Nicolas Delaby's avatar
Nicolas Delaby committed
49 50 51
from lxml import etree
from lxml.etree import Element, XMLSyntaxError
from copy import deepcopy
52
from warnings import warn
53

Kevin Deldycke's avatar
Kevin Deldycke committed
54 55
class CorruptedOOoFile(Exception): pass

56 57 58 59 60 61 62 63 64 65 66 67
OOo_mimeType_dict = {
  'sxw' : 'application/vnd.sun.xml.writer',
  'stw' : 'application/vnd.sun.xml.writer.template',
  'sxg' : 'application/vnd.sun.xml.writer.global',
  'sxc' : 'application/vnd.sun.xml.calc',
  'stc' : 'application/vnd.sun.xml.calc.template',
  'sxi' : 'application/vnd.sun.xml.impress',
  'sti' : 'application/vnd.sun.xml.impress.template',
  'sxd' : 'application/vnd.sun.xml.draw',
  'std' : 'application/vnd.sun.xml.draw.template',
  'sxm' : 'application/vnd.sun.xml.math',
}
Kevin Deldycke's avatar
Kevin Deldycke committed
68

69
class OOoBuilder(Implicit):
70 71 72
  """
  Tool that allows to reinject new files in a ZODB OOo document.
  """
73
  __allow_access_to_unprotected_subobjects__ = 1
74 75

  def __init__(self, document):
76
    if hasattr(document, 'data') :
77
      self._document = StringIO()
78 79 80 81 82 83 84 85 86 87 88

      if isinstance(document.data, Pdata):
        # Handle image included in the style
        dat = document.data
        while dat is not None:
          self._document.write(dat.data)
          dat = dat.next
      else:
        # Default behaviour
        self._document.write(document.data)
          
89 90 91
    elif hasattr(document, 'read') :
      self._document = document
    else :
92 93
      self._document = StringIO()
      self._document.write(document)
94
    self._image_count = 0    
95
    self._manifest_additions_list = []
96 97 98 99 100 101 102 103 104 105

  def replace(self, filename, stream):
    """
    Replaces the content of filename by stream in the archive.
    Creates a new file if filename was not already there.
    """
    try:
      zf = ZipFile(self._document, mode='a', compression=ZIP_DEFLATED)
    except RuntimeError:
      zf = ZipFile(self._document, mode='a')
106
    try:
107 108 109
      # remove the file first if it exists
      fi = zf.getinfo(filename)
      zf.filelist.remove( fi )
110
    except KeyError:
111 112
      # This is a new file
      pass
113 114
    zf.writestr(filename, stream)
    zf.close()
Bartek Górny's avatar
Bartek Górny committed
115

116 117 118 119 120 121 122 123 124
  def extract(self, filename):
    """
    Extracts a file from the archive
    """
    try:
      zf = ZipFile(self._document, mode='r', compression=ZIP_DEFLATED)
    except RuntimeError:
      zf = ZipFile(self._document, mode='r')
    return zf.read(filename)
Bartek Górny's avatar
Bartek Górny committed
125

126 127 128 129 130 131 132 133 134
  def getNameList(self):
    try:
      zf = ZipFile(self._document, mode='r', compression=ZIP_DEFLATED)
    except RuntimeError:
      zf = ZipFile(self._document, mode='r')
    li = zf.namelist()
    zf.close()
    return li

135 136 137
  def getMimeType(self):
    return self.extract('mimetype')

Nicolas Delaby's avatar
Nicolas Delaby committed
138
  def prepareContentXml(self, ooo_xml_file_id):
139 140 141 142 143
    """
      extracts content.xml text and prepare it :
        - add tal namespace
        - indent the xml
    """
144
    content_xml = self.extract(ooo_xml_file_id)
145
    output = StringIO()
Nicolas Delaby's avatar
Nicolas Delaby committed
146
    content_doc = etree.XML(content_xml)
147
    root = content_doc.getroottree().getroot()
Nicolas Delaby's avatar
Nicolas Delaby committed
148 149 150 151 152 153 154 155 156 157 158 159 160
    #Declare zope namespaces
    NSMAP = {'tal': 'http://xml.zope.org/namespaces/tal',
             'i18n': 'http://xml.zope.org/namespaces/i18n',
             'metal': 'http://xml.zope.org/namespaces/metal'}
    NSMAP.update(root.nsmap)
    new_root = Element(root.tag, nsmap=NSMAP)
    new_root.attrib.update(dict(root.attrib))
    new_root.attrib.update({'{%s}attributes' % NSMAP.get('tal'): 'dummy python:request.RESPONSE.setHeader(\'Content-Type\', \'text/html;; charset=utf-8\')'})
    for child in root.getchildren():
      new_root.append(deepcopy(child))
    return etree.tostring(new_root, encoding='utf-8', xml_declaration=True,
                          pretty_print=True)

161

162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188
  def addFileEntry(self, full_path, media_type, content=None):
      """ Add a file entry to the manifest and possibly is content """
      self.addManifest(full_path, media_type)
      if content:
          self.replace(full_path, content)

  def addManifest(self, full_path, media_type):
    """ Add a path to the manifest """
    li = '<manifest:file-entry manifest:media-type="%s" manifest:full-path="%s"/>'%(media_type, full_path)
    self._manifest_additions_list.append(li)

  def updateManifest(self):
    """ Add a path to the manifest """
    MANIFEST_FILENAME = 'META-INF/manifest.xml'
    meta_infos = self.extract(MANIFEST_FILENAME)
    # prevent some duplicates
    for meta_line in meta_infos.split('\n'):
        for new_meta_line in self._manifest_additions_list:
            if meta_line.strip() == new_meta_line:
                self._manifest_additions_list.remove(new_meta_line)

    # add the new lines
    self._manifest_additions_list.append('</manifest:manifest>')
    meta_infos = meta_infos.replace( self._manifest_additions_list[-1], '\n'.join(self._manifest_additions_list) )
    self.replace(MANIFEST_FILENAME, meta_infos)
    self._manifest_additions_list = []

189
  def addImage(self, image, format='png', content_type=None):
190 191 192 193 194
    """
    Add an image to the current document and return its id
    """
    count = self._image_count
    self._image_count += 1
195
    name = "Pictures/%s.%s" % (count, format)
196 197 198 199 200
    if not content_type:
      import mimetypes
      warn('content_type argument must be passed explicitely', FutureWarning)
      content_type = mimetypes.guess_type(name)[0]
    self.addManifest(name, content_type)
201 202
    # we need to explicitly update manifest file
    self.updateManifest()
203
    self.replace(name, image)
204
    is_legacy = ('oasis.opendocument' not in self.getMimeType())
Yoshinori Okuji's avatar
Yoshinori Okuji committed
205
    return "%s%s" % (is_legacy and '#' or '', name,)
206

207
  def render(self, name='', extension='sxw', source=False):
208 209 210
    """
    returns the OOo document
    """
211
    if name and not(source):
212
      request = get_request()
213 214
      request.response.setHeader('Content-Disposition',
                              'attachment; filename=%s.%s' % (name, extension))
215

216 217
    self._document.seek(0)
    return self._document.read()
Bartek Górny's avatar
Bartek Górny committed
218

219
allow_class(OOoBuilder)
Kevin Deldycke's avatar
Kevin Deldycke committed
220

221
class OOoParser(Implicit):
Kevin Deldycke's avatar
Kevin Deldycke committed
222 223 224
  """
    General purpose tools to parse and handle OpenOffice v1.x documents.
  """
225
  __allow_access_to_unprotected_subobjects__ = 1 
Kevin Deldycke's avatar
Kevin Deldycke committed
226 227 228 229 230
  def __init__(self):
    self.oo_content_dom = None
    self.oo_styles_dom  = None
    self.oo_files = {}
    self.pictures = {}
Kevin Deldycke's avatar
Kevin Deldycke committed
231
    self.filename = None
Kevin Deldycke's avatar
Kevin Deldycke committed
232

233 234 235
  def openFromString(self, text_content):
    return self.openFile(StringIO(text_content))

236
  def openFile(self, file_descriptor):
Kevin Deldycke's avatar
Kevin Deldycke committed
237 238 239 240 241
    """
      Load all files in the zipped OpenOffice document
    """
    # Try to unzip the Open Office doc
    try:
242
      oo_unzipped = ZipFile(file_descriptor, mode="r")
243 244 245
    except Exception, e:
      LOG('ERP5OOo', DEBUG, 'Error in openFile', error=sys.exc_info())
      raise CorruptedOOoFile(e)
Kevin Deldycke's avatar
Kevin Deldycke committed
246
    # Test the integrity of the file
247 248
    if oo_unzipped.testzip() is not None:
      raise CorruptedOOoFile('Invalid zip file')
Kevin Deldycke's avatar
Kevin Deldycke committed
249

Kevin Deldycke's avatar
Kevin Deldycke committed
250
    # Get the filename
251
    self.filename = getattr(file_descriptor, 'filename', 'default_filename')
Kevin Deldycke's avatar
Kevin Deldycke committed
252

Kevin Deldycke's avatar
Kevin Deldycke committed
253 254 255
    # List and load the content of the zip file
    for name in oo_unzipped.namelist():
      self.oo_files[name] = oo_unzipped.read(name)
256
    oo_unzipped.close()
Kevin Deldycke's avatar
Kevin Deldycke committed
257 258

    # Get the main content and style definitions
Nicolas Delaby's avatar
Nicolas Delaby committed
259 260
    self.oo_content_dom = etree.XML(self.oo_files["content.xml"])
    self.oo_styles_dom  = etree.XML(self.oo_files["styles.xml"])
Kevin Deldycke's avatar
Kevin Deldycke committed
261

Kevin Deldycke's avatar
Kevin Deldycke committed
262 263 264 265 266 267
  def getFilename(self):
    """
      Return the name of the OpenOffice file
    """
    return self.filename

268
  def getPicturesMapping(self):
Kevin Deldycke's avatar
Kevin Deldycke committed
269 270 271
    """
      Return a dictionnary of all pictures in the document
    """
Vincent Pelletier's avatar
Vincent Pelletier committed
272
    if not self.pictures:
Kevin Deldycke's avatar
Kevin Deldycke committed
273 274 275 276 277 278 279
      for file_name in self.oo_files:
        raw_data = self.oo_files[file_name]
        pict_type = imghdr.what(None, raw_data)
        if pict_type != None:
          self.pictures[file_name] = raw_data
    return self.pictures

280
  def getContentDom(self):
Kevin Deldycke's avatar
Kevin Deldycke committed
281 282 283 284 285
    """
      Return the DOM tree of the main OpenOffice content
    """
    return self.oo_content_dom

286
  def getSpreadsheetsDom(self, include_embedded=False):
287 288 289 290
    """
      Return a list of DOM tree spreadsheets (optionnaly included embedded ones)
    """
    spreadsheets = []
291
    spreadsheets = self.getPlainSpreadsheetsDom()
292
    if include_embedded == True:
293
      spreadsheets += self.getEmbeddedSpreadsheetsDom()
294 295
    return spreadsheets

296
  def getSpreadsheetsMapping(self, include_embedded=False, no_empty_lines=False, normalize=True):
297 298 299
    """
      Return a list of table-like spreadsheets (optionnaly included embedded ones)
    """
300
    tables = {}
301
    tables = self.getPlainSpreadsheetsMapping(no_empty_lines, normalize)
302
    if include_embedded == True:
303
      embedded_tables = self.getEmbeddedSpreadsheetsMapping(no_empty_lines, normalize)
304 305
      tables = self._getTableListUnion(tables, embedded_tables)
    return tables
306

307
  def getPlainSpreadsheetsDom(self):
308 309 310
    """
      Retrieve every spreadsheets from the document and get they DOM tree
    """
Nicolas Delaby's avatar
Nicolas Delaby committed
311 312
    find_path = './/{%s}table' % self.oo_content_dom.nsmap['table']
    return self.oo_content_dom.findall(find_path)
313

314
  def getPlainSpreadsheetsMapping(self, no_empty_lines=False, normalize=True):
315 316 317
    """
      Return a list of plain spreadsheets from the document and transform them as table
    """
318
    tables = {}
319
    for spreadsheet in self.getPlainSpreadsheetsDom():
320
      new_table = self.getSpreadsheetMapping(spreadsheet, no_empty_lines, normalize)
321
      if new_table != None:
322
        tables = self._getTableListUnion(tables, new_table)
323 324
    return tables

325
  def getEmbeddedSpreadsheetsDom(self):
Kevin Deldycke's avatar
Kevin Deldycke committed
326 327 328 329 330
    """
      Return a list of existing embedded spreadsheets in the file as DOM tree
    """
    spreadsheets = []
    # List all embedded spreadsheets
Nicolas Delaby's avatar
Nicolas Delaby committed
331 332
    find_path = './/{%s}object' % self.oo_content_dom.nsmap['draw']
    emb_objects = self.oo_content_dom.findall(find_path)
Kevin Deldycke's avatar
Kevin Deldycke committed
333
    for embedded in emb_objects:
Nicolas Delaby's avatar
Nicolas Delaby committed
334 335 336 337 338 339 340 341 342 343 344 345 346
      document = embedded.get('{%s}href' % embedded.nsmap['xlink'])
      if document:
        try:
          object_content = etree.XML(self.oo_files[document[3:] + '/content.xml'])
          find_path = './/{%s}table' % self.oo_content_dom.nsmap['table']
          table_list = self.oo_content_dom.findall(find_path)
          if table_list:
            for table in table_list:
              spreadsheets.append(table)
          else: # XXX: insert the link to OLE document ?
            pass
        except XMLSyntaxError:
          pass
Kevin Deldycke's avatar
Kevin Deldycke committed
347 348
    return spreadsheets

349
  def getEmbeddedSpreadsheetsMapping(self, no_empty_lines=False, normalize=True):
Kevin Deldycke's avatar
Kevin Deldycke committed
350
    """
351
      Return a list of embedded spreadsheets in the document as table
Kevin Deldycke's avatar
Kevin Deldycke committed
352
    """
353
    tables = {}
354
    for spreadsheet in self.getEmbeddedSpreadsheetsDom():
355
      new_table = self.getSpreadsheetMapping(spreadsheet, no_empty_lines, normalize)
Kevin Deldycke's avatar
Kevin Deldycke committed
356
      if new_table != None:
357
        tables = self._getTableListUnion(tables, new_table)
Kevin Deldycke's avatar
Kevin Deldycke committed
358 359
    return tables

360
  def getSpreadsheetMapping(self, spreadsheet=None, no_empty_lines=False, normalize=True):
Kevin Deldycke's avatar
Kevin Deldycke committed
361 362
    """
      This method convert an OpenOffice spreadsheet to a simple table.
363
      This code is based on the oo2pt tool (http://cvs.sourceforge.net/viewcvs.py/collective/CMFReportTool/oo2pt).
Kevin Deldycke's avatar
Kevin Deldycke committed
364
    """
Nicolas Delaby's avatar
Nicolas Delaby committed
365 366
    if spreadsheet is None or \
      spreadsheet.tag != '{%s}table' % spreadsheet.nsmap['table']:
Kevin Deldycke's avatar
Kevin Deldycke committed
367 368
      return None

369
    table = []
Kevin Deldycke's avatar
Kevin Deldycke committed
370

371
    # Get the table name
Nicolas Delaby's avatar
Nicolas Delaby committed
372
    table_name = spreadsheet.get('{%s}name' % spreadsheet.nsmap["table"])
373

374
    # Scan table and store usable information
Nicolas Delaby's avatar
Nicolas Delaby committed
375 376
    find_path = './/{%s}table-row' % spreadsheet.nsmap['table']
    for line in spreadsheet.findall(find_path):
377 378 379

      # TODO : to the same as cell about abusive repeated lines

Nicolas Delaby's avatar
Nicolas Delaby committed
380
      line_group_found = line.get('{%s}number-rows-repeated' % line.nsmap["table"])
381 382
      if not line_group_found:
        lines_to_repeat = 1
383
      else:
384
        lines_to_repeat = int(line_group_found)
385

386
      for i in range(lines_to_repeat):
387 388
        table_line = []

389
        # Get all cells
Nicolas Delaby's avatar
Nicolas Delaby committed
390 391
        find_path = './/{%s}table-cell' % line.nsmap['table']
        cells = line.findall(find_path)
392 393 394 395 396 397 398 399 400 401 402 403 404
        cell_index_range = range(len(cells))

        for cell_index in cell_index_range:
          cell = cells[cell_index]

          # If the cell as no child, cells have no content
          # And if the cell is the last of the row, we don't need to add it to the line
          # So we can go to the next line (= exit this cells loop)
          #
          # I must do this test because sometimes the following cell group
          #   can be found in OOo documents : <table:table-cell table:number-columns-repeated='246'/>
          # This is bad because it create too much irrevelent content that slow down the process
          # So it's a good idea to break the loop in this case
Nicolas Delaby's avatar
Nicolas Delaby committed
405
          if len(cell) == 0 and cell_index == cell_index_range[-1]:
406 407 408
            break

          # Handle cells group
Nicolas Delaby's avatar
Nicolas Delaby committed
409
          cell_group_found = cell.get('{%s}number-columns-repeated' % cell.nsmap['table'])
410 411
          if not cell_group_found:
            cells_to_repeat = 1
412
          else:
413
            cells_to_repeat = int(cell_group_found)
414

415 416 417
          # Ungroup repeated cells
          for j in range(cells_to_repeat):
            # Get the cell content
418
            cell_data = None
Nicolas Delaby's avatar
Nicolas Delaby committed
419 420 421 422 423 424 425 426 427 428 429 430
            attribute_type_mapping = {'date': 'date-value',
                                      'time': 'time-value',
                                      'float': 'value',
                                      'percentage': 'value',
                                      'currency': 'value'}
            # Depending of odf version, value-type and value attributes can be in
            # table or office namespaces, so we use local-name.
            value_type = str(cell.xpath('string(@*[local-name()="value-type"])'))
            if value_type in attribute_type_mapping:
              xpath = '@*[local-name()="%s"]' % attribute_type_mapping[value_type]
              cell_data = str(cell.xpath(xpath)[0])
            else: # read text nodes
431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466
              # Text nodes can contain multiple <text:p> tags, one for each
              # line. There are also some tags for special entities, for
              # instance <text:s/> for a space (or using <text:s text:c="3"/>
              # for multiple spaces) <text:tab/> for a tab and <text:line-break/>
              # for new line
              text_ns = cell.nsmap['text']
              def format_node(node):
                if node.tag == '{%s}table-cell' % node.nsmap['table']:
                  return "\n".join(part for part in
                    [format_node(child) for child in node.iterchildren()]
                    if part is not None)
                elif node.tag == '{%s}p' % node.nsmap['text']:
                  part_list = [node.text]
                  part_list.extend(format_node(child)
                    for child in node.iterchildren())
                  return ''.join(part for part in part_list if part)
                elif node.tag == '{%s}s' % node.nsmap['text']:
                  count = int(node.get('{%s}c' % node.nsmap['text'], 1))
                  return ''.join(part for part in
                    [node.text, ' ' * count, node.tail] if part)
                elif node.tag == '{%s}span' % node.nsmap['text']:
                  part_list = [node.text]
                  part_list.extend(format_node(child)
                    for child in node.iterchildren())
                  part_list.append(node.tail)
                  return ''.join(part for part in part_list if part)
                elif node.tag == '{%s}tab' % node.nsmap['text']:
                  return ''.join(part for part in
                    [node.text, '\t', node.tail] if part)
                elif node.tag == '{%s}line-break' % node.nsmap['text']:
                  return ''.join(part for part in
                    [node.text, '\n', node.tail] if part)
                elif node.tag == '{%s}a' % node.nsmap['text']:
                  return ''.join(part for part in
                    [node.text, node.tail] if part)
                # we can also have table:annotation, and they are ignored
467
              cell_data = format_node(cell) or None
468

469
            # Add the cell to the line
470
            table_line.append(cell_data)
471

472 473 474 475
        # Delete empty lines if needed
        if no_empty_lines:
          empty_cell = 0
          for table_cell in table_line:
Nicolas Delaby's avatar
Nicolas Delaby committed
476
            if table_cell is None:
477 478 479 480
              empty_cell += 1
          if empty_cell == len(table_line):
            table_line = None

481
        # Add the line to the table
Nicolas Delaby's avatar
Nicolas Delaby committed
482
        if table_line is not None:
483
          table.append(table_line)
484 485 486 487
        else:
          # If the line is empty here, the repeated line will also be empty, so
          # no need to loop.
          break
488

489
    # Reduce the table to the minimum
490 491 492 493 494
    new_table = self._getReducedTable(table)

    # Get a homogenized table
    if normalize:
      table_size = self._getTableSizeDict(new_table)
Nicolas Delaby's avatar
Nicolas Delaby committed
495 496 497
      new_table = self._getNormalizedBoundsTable( table=new_table
                                                , width=table_size['width']
                                                , height=table_size['height']
498 499
                                                )
    return {table_name: new_table}
Kevin Deldycke's avatar
Kevin Deldycke committed
500

501
  def _getReducedTable(self, table):
Kevin Deldycke's avatar
Kevin Deldycke committed
502
    """
503
      Reduce the table to its minimum size
Kevin Deldycke's avatar
Kevin Deldycke committed
504 505 506 507 508
    """
    empty_lines = 0
    no_more_empty_lines = 0

    # Eliminate all empty cells at the ends of lines and columns
509
    # Browse the table starting from the bottom for easy empty lines count
510
    for line in range(len(table)-1, -1, -1):
Kevin Deldycke's avatar
Kevin Deldycke committed
511
      empty_cells = 0
512
      line_content = table[line]
Kevin Deldycke's avatar
Kevin Deldycke committed
513
      for cell in range(len(line_content)-1, -1, -1):
514
        if line_content[cell] in ('', None):
Kevin Deldycke's avatar
Kevin Deldycke committed
515 516 517
          empty_cells += 1
        else:
          break
518

Kevin Deldycke's avatar
Kevin Deldycke committed
519 520 521 522
      if (not no_more_empty_lines) and (empty_cells == len(line_content)):
        empty_lines += 1
      else:
        line_size = len(line_content) - empty_cells
523
        table[line] = line_content[:line_size]
Kevin Deldycke's avatar
Kevin Deldycke committed
524 525
        no_more_empty_lines = 1

526 527 528
    table_height = len(table) - empty_lines

    return table[:table_height]
Kevin Deldycke's avatar
Kevin Deldycke committed
529

530 531 532 533
  def _getTableSizeDict(self, table):
    """
      Get table dimension as dictionnary contain both height and width
    """
534
    return { 'width' : max(len(x) for x in table or [[]])
535 536
           , 'height': len(table)
           }
Kevin Deldycke's avatar
Kevin Deldycke committed
537

538
  def _getNormalizedBoundsTable(self, table, width=0, height=0):
Kevin Deldycke's avatar
Kevin Deldycke committed
539
    """
540
      Add necessary cells and lines to obtain given bounds
Kevin Deldycke's avatar
Kevin Deldycke committed
541
    """
Vincent Pelletier's avatar
Vincent Pelletier committed
542 543 544
    table += [[]] * (len(table) - height)
    for line in table:
      line += [None] * (len(line) - width)
545 546
    return table

547 548 549 550 551 552 553
  def _getTableListUnion(self, list1, list2):
    """
      Coerce two dict containing tables structures.
      We need to use this method because a OpenOffice document can hold
        several embedded spreadsheets with the same id. This explain the
        use of random suffix in such extreme case.
    """
Vincent Pelletier's avatar
Vincent Pelletier committed
554
    for list2_key in list2:
555 556
      # Generate a new table ID if needed
      new_key = list2_key
Vincent Pelletier's avatar
Vincent Pelletier committed
557
      while new_key in list1:
558 559 560 561
        new_key = list2_key + '_' + str(random.randint(1000,9999))
      list1[new_key] = list2[list2_key]
    return list1

Kevin Deldycke's avatar
Kevin Deldycke committed
562
allow_class(OOoParser)
Nicolas Delaby's avatar
Nicolas Delaby committed
563
allow_class(CorruptedOOoFile)
564 565 566 567

def newOOoParser(container):
  return OOoParser().__of__(container)