diff --git a/product/ERP5OOo/Document/DMSFile.py b/product/ERP5OOo/Document/DMSFile.py index 1586d8f236df540032c2022ee1703e2fddcac363..5f652491f689b97ee4bfe0193fce2c5afa05b0bc 100644 --- a/product/ERP5OOo/Document/DMSFile.py +++ b/product/ERP5OOo/Document/DMSFile.py @@ -43,8 +43,7 @@ class DMSFile(XMLObject,File): """ Special base class, different from File only in that it can contain things (like Role Definition, for example) - Could (perhaps should) be a parent class for OOoDocument - Should probably be located somewhere else + will be merged with File when WebDAV issues are solved """ # CMF Type Definition meta_type = 'ERP5 DMS File' @@ -70,20 +69,32 @@ class DMSFile(XMLObject,File): _edit=File._edit edit=File.edit + searchable_attrs=('title','description','id','reference','version', + 'short_title','keywords','subject','original_filename','source_project_title') + ### Content indexing methods security.declareProtected(Permissions.View, 'getSearchableText') def getSearchableText(self, md=None): - """\ + """ Used by the catalog for basic full text indexing - And so we end up with a strange hybrid of File and Document - This is the same as in OOoDocument except that no text_content here - Some people call it 'copy-and-paste programming' """ - searchable_attrs=('title','description','id','reference','version', - 'short_title','keywords','subject','original_filename','source_project_title') - searchable_text = ' '.join(map(lambda x: self.getProperty(x) or ' ',searchable_attrs)) + searchable_text = ' '.join(map(lambda x: self.getProperty(x) or ' ',self.searchable_attrs)) return searchable_text + security.declarePrivate('_unpackData') + def _unpackData(self,data): + """ + Unpack Pdata into string + """ + if isinstance(data,str): + return data + else: + data_list=[] + while data is not None: + data_list.append(data.data) + data=data.next + return ''.join(data_list) + SearchableText=getSearchableText security.declareProtected(Permissions.ModifyPortalContent, 'guessMimeType') @@ -93,8 +104,8 @@ class DMSFile(XMLObject,File): if fname: content_type,enc=mimetypes.guess_type(fname) if content_type is not None: - self.content_type=content_type - return content_type + self.content_type=content_type + return content_type # BG copied from File in case diff --git a/product/ERP5OOo/Document/OOoDocument.py b/product/ERP5OOo/Document/OOoDocument.py index d6f35f85ed8aa004468265340f2e46610c14fc1a..ba0c58aa3eec71fe80741cf9c7f1707a0a0fd134 100644 --- a/product/ERP5OOo/Document/OOoDocument.py +++ b/product/ERP5OOo/Document/OOoDocument.py @@ -35,21 +35,20 @@ from Products.ERP5Type.Message import Message from Products.ERP5Type.Cache import CachingMethod from Products.ERP5.Document.File import File from Products.ERP5Type.XMLObject import XMLObject +from Products.ERP5OOo.Document.DMSFile import DMSFile from DateTime import DateTime -import xmlrpclib, base64, mimetypes, re, zipfile, cStringIO +import xmlrpclib, base64, re, zipfile, cStringIO # to overwrite WebDAV methods from Products.CMFDefault.File import File as CMFFile from Products.CMFCore.utils import getToolByName -mimetypes.init() - enc=base64.encodestring dec=base64.decodestring class ConvertionError(Exception):pass #class OOoDocument(File): -class OOoDocument(XMLObject,File): +class OOoDocument(DMSFile): """ A file document able to convert OOo compatible files to any OOo supported format, to capture metadata and to @@ -121,20 +120,7 @@ class OOoDocument(XMLObject,File): rx_strip=re.compile('<[^>]*?>',re.DOTALL|re.MULTILINE) rx_compr=re.compile('\s+') - ### Content indexing methods - security.declareProtected(Permissions.View, 'getSearchableText') - def getSearchableText(self, md=None): - """\ - Used by the catalog for basic full text indexing - And so we end up with a strange hybrid of File and Document - """ - searchable_attrs=('title','description','id','text_content','reference','version', - 'short_title','keywords','subject','original_filename','source_project_title') - searchable_text = ' '.join(map(lambda x: self.getProperty(x) or ' ',searchable_attrs)) - return searchable_text - - SearchableText=getSearchableText - + searchable_attrs=DMSFile.searchable_attrs+('text_content',) security.declareProtected(Permissions.ModifyPortalContent,'clearCache') def clearCache(self): @@ -313,20 +299,6 @@ class OOoDocument(XMLObject,File): data=self.oo_data return data - security.declarePrivate('_unpackData') - def _unpackData(self,data): - """ - Unpack Pdata into string - """ - if isinstance(data,str): - return data - else: - data_list=[] - while data is not None: - data_list.append(data.data) - data=data.next - return ''.join(data_list) - security.declareProtected(Permissions.View,'hasFile') def hasFile(self): """ @@ -373,8 +345,15 @@ class OOoDocument(XMLObject,File): return self.returnMessage('already has a snapshot') raise ConvertionError('already has a snapshot') # making snapshot - self.makeFile('pdf') - self.snapshot=Pdata(self._unpackData(self.cached_data['pdf'])) # XXX - use propertysheet accessors + # we have to figure out which pdf format to use + tgts=[x[1] for x in self.getTargetFormatItemList() if x[1].endswith('pdf')] + if len(tgts)>1: + return self.returnMessage('multiple pdf formats found - this shouldnt happen') + if len(tgts)==0: + return self.returnMessage('no pdf format found') + fmt=tgts[0] + self.makeFile(fmt) + self.snapshot=Pdata(self._unpackData(self.cached_data[fmt])) # XXX - use propertysheet accessors return self.returnMessage('snapshot created') security.declareProtected(Permissions.View,'getSnapshot') @@ -515,19 +494,6 @@ class OOoDocument(XMLObject,File): s+='</table>' return s - # this will go out after refactoring (will be inherited from DMS File - # and eventually from File - security.declareProtected(Permissions.ModifyPortalContent, 'guessMimeType') - def guessMimeType(self,fname=''): - '''get mime type from file name''' - if fname=='':fname=self.getOriginalFilename() - if fname: - content_type,enc=mimetypes.guess_type(fname) - if content_type is not None: - self.content_type=content_type - return content_type - - # make sure to call the right edit methods _edit=File._edit edit=File.edit diff --git a/product/ERP5OOo/Document/PdfDocument.py b/product/ERP5OOo/Document/PdfDocument.py new file mode 100644 index 0000000000000000000000000000000000000000..ace6c49869cd5648dba7adb38d72899518700378 --- /dev/null +++ b/product/ERP5OOo/Document/PdfDocument.py @@ -0,0 +1,88 @@ + +############################################################################## +# +# Copyright (c) 2002-2006 Nexedi SARL and Contributors. All Rights Reserved. +# +# WARNING: This program as such is intended to be used by professional +# programmers who take the whole responsability of assessing all potential +# consequences resulting from its eventual inadequacies and bugs +# End users who are looking for a ready-to-use solution with commercial +# garantees and support are strongly adviced to contract a Free Software +# Service Company +# +# This program is Free Software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +############################################################################## + +from AccessControl import ClassSecurityInfo +from Products.CMFCore.WorkflowCore import WorkflowMethod +from Products.ERP5Type import Permissions, PropertySheet, Constraint, Interface +from Products.ERP5Type.Cache import CachingMethod +from Products.ERP5OOo.Document.DMSFile import DMSFile + +import tempfile, os + + +class PdfDocument(DMSFile): + """ + PdfDocument - same as file, but has its own getSearchableText method + (converts via pdftotext) + """ + # CMF Type Definition + meta_type = 'ERP5 Pdf Document' + portal_type = 'Pdf Document' + isPortalContent = 1 + isRADContent = 1 + + # Declarative security + security = ClassSecurityInfo() + security.declareObjectProtected(Permissions.AccessContentsInformation) + + # Default Properties + property_sheets = ( PropertySheet.Base + , PropertySheet.CategoryCore + , PropertySheet.DublinCore + , PropertySheet.Version + , PropertySheet.Reference + , PropertySheet.DMSFile + , PropertySheet.Document + ) + + searchable_attrs=DMSFile.searchable_attrs+('text_content',) + + ### Content indexing methods + security.declareProtected(Permissions.View, 'getSearchableText') + def getSearchableText(self, md=None, force=0): + """ + Used by the catalog for basic full text indexing + we get text content by using pdftotext + but we have to do it only once + """ + if hasattr(self,'data') and (force==1 or self.getTextContent() is None): + tmp=tempfile.NamedTemporaryFile() + tmp.write(self._unpackData(self.data)) + tmp.seek(0) + cmd='pdftotext -layout -enc UTF-8 -nopgbrk %s -' % tmp.name + r=os.popen(cmd) + self.setTextContent(r.read().replace('\n',' ')) + tmp.close() + r.close() + return DMSFile.getSearchableText(self,md) + + SearchableText=getSearchableText + + +# vim: syntax=python shiftwidth=2 + diff --git a/product/ERP5OOo/Document/__init__.py b/product/ERP5OOo/Document/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391