##############################################################################
#
# Copyright (c) 2002-2006 Nexedi SARL and Contributors. All Rights Reserved.
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsability of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# garantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
#
##############################################################################

from AccessControl import ClassSecurityInfo
from Products.CMFCore.WorkflowCore import WorkflowMethod
from Products.ERP5Type import Permissions, PropertySheet, Constraint, Interface
from Products.ERP5Type.Cache import CachingMethod
from Products.ERP5OOo.Document.DMSFile import DMSFile

import tempfile, os


class PdfDocument(DMSFile):
  """
  PdfDocument - same as file, but has its own getSearchableText method
  (converts via pdftotext)
  """
  # CMF Type Definition
  meta_type = 'ERP5 Pdf Document'
  portal_type = 'Pdf Document'
  isPortalContent = 1
  isRADContent = 1

  # Declarative security
  security = ClassSecurityInfo()
  security.declareObjectProtected(Permissions.AccessContentsInformation)

  # Default Properties
  property_sheets = ( PropertySheet.Base
                    , PropertySheet.CategoryCore
                    , PropertySheet.DublinCore
                    , PropertySheet.Version
                    , PropertySheet.Reference
                    , PropertySheet.DMSFile
                    , PropertySheet.Document
                    )

  searchable_attrs=DMSFile.searchable_attrs+('text_content',)

  ### Content indexing methods
  security.declareProtected(Permissions.View, 'getSearchableText')
  def getSearchableText(self, md=None, force=0):
    """
    Used by the catalog for basic full text indexing
    we get text content by using pdftotext
    but we have to do it only once
    """
    if hasattr(self,'data') and (force==1 or self.getTextContent() is None):
      tmp=tempfile.NamedTemporaryFile()
      tmp.write(self._unpackData(self.data))
      tmp.seek(0)
      cmd='pdftotext -layout -enc UTF-8 -nopgbrk %s -' % tmp.name
      r=os.popen(cmd)
      self.setTextContent(r.read().replace('\n',' '))
      tmp.close()
      r.close()
    return DMSFile.getSearchableText(self,md)

  SearchableText=getSearchableText


# vim: syntax=python shiftwidth=2