##############################################################################
#
# Copyright (c) 2002 Nexedi SARL and Contributors. All Rights Reserved.
#                    Jean-Paul Smets-Solanes <jp@nexedi.com>
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsability of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# garantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
#
##############################################################################

from AccessControl import ClassSecurityInfo

from Products.ERP5Type import Permissions, PropertySheet, Constraint, Interface
from Products.ERP5Type.XMLObject import XMLObject
from Products.ERP5Type.WebDAVSupport import TextContent
from DateTime import DateTime

def makeSortedTuple(kw):
  items = kw.items()
  items.sort()
  return tuple(items)

class ConversionCacheMixin:
  """
    This class provides a generic API to store in the ZODB
    various converted versions of a file or of a string.

    TODO:
    * Implement ZODB BLOB
  """
  # time of generation of various formats
  _cached_time = {}
  # generated files (cache)
  _cached_data = {}
  # mime types for cached formats XXX to be refactored
  _cached_mime = {}

  # Declarative security
  security = ClassSecurityInfo()
  security.declareObjectProtected(Permissions.AccessContentsInformation)

  security.declareProtected(Permissions.ModifyPortalContent, 'clearConversionCache')
  def clearConversionCache(self):
    """
    Clear cache (invoked by interaction workflow upon file upload
    needed here to overwrite class attribute with instance attrs
    """
    self._cached_time = {}
    self._cached_data = {}
    self._cached_mime = {}

  security.declareProtected(Permissions.View, 'hasConversion')
  def hasConversion(self, **format):
    """
      Checks whether we have a version in this format
    """
    return self._cached_data.has_key(makeSortedTuple(format))

  def getCacheTime(self, **format):
    """
      Checks when if ever was the file produced
    """
    return self._cached_time.get(makeSortedTuple(format), 0)

  def updateConversion(self, **format):
      self._cached_time[makeSortedTuple(format)] = DateTime()

  def setConversion(self, data, mime=None, **format):
    tformat = makeSortedTuple(format)
    if mime is not None:
      self._cached_mime[tformat] = mime
    if data is not None:
      self._cached_data[tformat] = data
      self.updateConversion(**format)
    self._p_changed = 1

  def getConversion(self, **format):
    '''
    we could be much cooler here - pass testing and updating methods to this function
    so that it does it all by itself; this'd eliminate the need for cacheSet public method
    '''
    tformat = makeSortedTuple(format)
    return self._cached_mime.get(tformat, ''), self._cached_data.get(tformat, '')

  security.declareProtected(Permissions.View, 'getConversionCacheInfo')
  def getConversionCacheInfo(self):
    """
    Get cache details as string (for debugging)
    """
    s = 'CACHE INFO:<br/><table><tr><td>format</td><td>size</td><td>time</td><td>is changed</td></tr>'
    #self.log('getCacheInfo',self.cached_time)
    #self.log('getCacheInfo',self.cached_data)
    for f in self._cached_time.keys():
      t = self._cached_time[f]
      data = self._cached_data.get(f)
      if data:
        if isinstance(data, str):
          ln = len(data)
        else:
          ln = 0
          while data is not None:
            ln += len(data.data)
            data = data.next
      else:
        ln = 'no data!!!'
      s += '<tr><td>%s</td><td>%s</td><td>%s</td><td>%s</td></tr>' % (f, str(ln), str(t), '-')
    s += '</table>'
    return s

class Document(XMLObject):
    """
        Document is an abstract class with all methods
        related to document management in ERP5. This includes
        searchable text, explicit relations, implicit relations,
        metadata, versions, languages, etc.

        There currently two types of Document subclasses:

        * File for binary file based documents. File
          has subclasses such as Image, OOoDocument,
          PDFDocument, etc. to implement specific conversion
          methods

        * TextDocument for text based documents. TextDocument
          has subclasses such as Wiki to implement specific
          methods

        Document classes which implement conversion should use
        the CachingMixin class so that converted values are
        stored.

        The Document class behaviour can be extended through scripts.

        * Document_discoverMetadata (DMS_ingestFile)
          finds all metadata or uses the metadata which was
          provided as parameter. Document_discoverMetadata should
          be overloaded if necessary for some classes
          (ex. TextDocument_discoverMetadata, Image_discoverMetadata)
          and should be called through a single API discoverMetadata()
          Consider using _getTypeBasedMethod for implementation

        * Document_ingestFile (Document_uploadFile)
          is called for http based ingestion and itself calls
          Document_discoverMetadata. Many parameters may be
          passed to Document_ingest through an
          online form.

        * Document_ingestEmail is called for email based
          ingestion and itself calls Document_ingestFile.
          Document_ingestEmail is in charge of parsing email
          to extract metadata before calling Document_ingestFile.

        * PUT is called for DAV/FTP based ingestion directly from the class.
          It itself calls Document_discoverMetadata.

        Custom scripts for automatic classification:

        * Document_findWikiPredecessorList finds a list of documents
          which are referencing us.
          Should this be merged with WebSite_getDocumentValue ? XXX

        * Document_findWikiSuccessor tries to find a document matching with
          a given regexp.
          Should this be merged with WebSite_getDocumentValue ? XXX

        Subcontent: documents may include subcontent (files, images, etc.)
        so that publication of rich content can be path independent.
    """

    meta_type = 'ERP5 Document'
    portal_type = 'Document'
    add_permission = Permissions.AddPortalContent
    isPortalContent = 1
    isRADContent = 1
    isDocument = 1

    # Declarative security
    security = ClassSecurityInfo()
    security.declareObjectProtected(Permissions.AccessContentsInformation)

    # Declarative properties
    property_sheets = ( PropertySheet.Base
                      , PropertySheet.XMLObject
                      , PropertySheet.CategoryCore
                      , PropertySheet.DublinCore
                      , PropertySheet.Version
                      , PropertySheet.Document
                      )

    # Declarative interfaces
    __implements__ = ()

    searchable_property_list = ('title', 'description', 'id', 'reference',
                                'version', 'short_title', 'keywords',
                                'subject', 'source_reference', 'source_project_title')
                                # What is keywords ?
                                # XXX-JPS This is a plural
                                # XXX-JPS subject_list would be better than subject in this case
                                # and the getSearchableText should be able to process lists
                                # Same for source_reference_list, source_project_title_list
  
    ### Content indexing methods
    security.declareProtected(Permissions.View, 'getSearchableText')
    def getSearchableText(self, md=None):
      """
      Used by the catalog for basic full text indexing.
  
      XXX-JPS - This method is nice. It should probably be moved to Base class
      searchable_property_list could become a standard class attribute.
  
      TODO (future): Make this property a per portal type property.
      """
      searchable_text = ' '.join(map(lambda x: self.getProperty(x) or ' ',self.searchable_property_list))
      return searchable_text

    # Compatibility with CMF Catalog
    SearchableText = getSearchableText # XXX-JPS - Here wa have a security issue - ask seb what to do

    security.declareProtected(Permissions.ModifyPortalContent, 'setPropertyListFromFilename')
    def setPropertyListFromFilename(self, fname):
      """
        XXX-JPS missing description
      """
      rx_src = self.portal_preferences.getPreferredDocumentFilenameRegexp()
      if rx_src:
        rx_parse = re.compile()
        if rx_parse is None:
          self.setReference(fname) # XXX-JPS please use _setReference to prevent reindexing all the time
          return
        m = rx_parse.match(fname)
        if m is None:
          self.setReference(fname) # XXX-JPS please use _setReference to prevent reindexing all the time
          return
        for k,v in m.groupdict().items():
          self.setProperty(k,v) # XXX-JPS please use _setProperty to prevent reindexing all the time
        # XXX-JPS finally call self.reindexObject()
      else:
        # If no regexp defined, we use the file name as reference
        # this is the failover behaviour
        self.setReference(fname)
  
    security.declareProtected(Permissions.View, 'getWikiSuccessorReferenceList')
    def getWikiSuccessorReferenceList(self):
      """
        find references in text_content, return matches
        with this we can then find objects
      """
      if self.getTextContent() is None:
        return []
      rx_search = re.compile(self.portal_preferences.getPreferredDocumentReferenceRegexp()) # XXX-JPS Safe ? Better error required ?
      try:
        res = rx_search.finditer(self.getTextContent())
      except AttributeError:
        return []
      res = [(r.group(),r.groupdict()) for r in res]
      return res
  
    security.declareProtected(Permissions.View, 'getWikiSuccessorValueList')
    def getWikiSuccessorValueList(self):
      """
        XXX-JPS Put a description then add notes (notes only is not enough)
        
        getWikiSuccessorValueList - the way to find objects is on 
        implementation level
      """
      # XXX results should be cached as volatile attributes
      # XXX-JPS - Please use TransactionCache in ERP5Type for this
      # TransactionCache does all the work for you
      lst = []
      for ref in self.getWikiSuccessorReferenceList():
        r = ref[1]
        res = self.Document_findWikiSuccessor(**r)
        if len(res)>0:
          lst.append(res[0].getObject())
      return lst
  
    security.declareProtected(Permissions.View, 'getWikiPredecessorValueList')
    def getWikiPredecessorValueList(self):
      """
        XXX-JPS Put a description then add notes (notes only is not enough)
        
        it is mostly implementation level - depends on what parameters we use to identify
        document, and on how a doc must reference me to be my predecessor (reference only,
        or with a language, etc
      """
      # XXX results should be cached as volatile attributes
      lst = self.Document_findWikiPredecessorList()
      lst = [r.getObject() for r in lst]
      di = dict.fromkeys(lst) # make it unique
      ref = self.getReference()
      return [o for o in di.keys() if o.getReference() != ref] # every object has its own reference in SearchableText