diff --git a/product/ERP5/Document/ExternalSource.py b/product/ERP5/Document/ExternalSource.py new file mode 100644 index 0000000000000000000000000000000000000000..54e1799c265645c68b85991483c3813070a155b7 --- /dev/null +++ b/product/ERP5/Document/ExternalSource.py @@ -0,0 +1,172 @@ +############################################################################## +# +# Copyright (c) 2002-2007 Nexedi SARL and Contributors. All Rights Reserved. +# +# WARNING: This program as such is intended to be used by professional +# programmers who take the whole responsability of assessing all potential +# consequences resulting from its eventual inadequacies and bugs +# End users who are looking for a ready-to-use solution with commercial +# garantees and support are strongly adviced to contract a Free Software +# Service Company +# +# This program is Free Software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +############################################################################## + +from AccessControl import ClassSecurityInfo +from Products.CMFCore.WorkflowCore import WorkflowMethod +from Products.CMFCore.utils import getToolByName +from Products.ERP5Type import Permissions, PropertySheet, Constraint, Interface +from Products.ERP5Type.XMLObject import XMLObject +from Products.ERP5.Document.Url import UrlMixIn + +import mimetypes +import re +import urllib +from htmlentitydefs import name2codepoint +from DateTime import DateTime + +class ExternalSource(XMLObject, UrlMixIn): + """ + An External Source consists of single URL which defines the + root of a collection of documents, each of which can be accessed + individually. The URL can be an http site, an ftp site, a local repository, + a samba server, etc. + + The main purpose of External Sources is to group related documents + and define shared security policies, shared updated policies, etc. + For example, all pages of + a wiki with restricted access rights share the same security policy + (ex. team, project, etc.). Another purpose of the External Source class is + to make it easy to manage external sources of knowledge (adding them, + removing them, etc.). + + The second purpose of an external source is to provide a way to search + contents stored externally in a system which is not compatible with + ERP5 Catalog. + + Example of external sources: + + - a Web Site + + - a SAMBA share + + - an FTP server + + - a backup server + + - a mail directory + + - a mailing list archive + + ExternalSource may be subclassed to provide more automation + features. This is useful for example to manage the creation + of a mailing list, the deletion of mailing list and the + definition of the members of a mailing list in a centralised way. + + NOTE: RSS feeds are not external sources but standard Text + documents with transformation and update policy. They use + the populateContent method to create subcontent from + a root content. This is different with crawling. + + NOTE2: access to filesystems through URL requires to extend + urllib2 so that directories are handled as if they were web + pages OR RSS feed with a list of files (and associated URL). + Complete implemetation of external sources will require + major extensions to urllib2 (or equivalent). + + NOTE3: it is possible to make external search sources persistent + by triggering an activity with newContent for every displayed + result. This can be done by wrapping the results in a generator + (yield). The interest of this approach is to make it possible to + search already searched contents without having to go through the + external source search (ie. with the front page search). + """ + # CMF Type Definition + meta_type = 'ERP5 External Source' + portal_type = 'External Source' + isPortalContent = 1 + isRADContent = 1 + + # Declarative security + security = ClassSecurityInfo() + security.declareObjectProtected(Permissions.AccessContentsInformation) + + # Default Properties + property_sheets = ( PropertySheet.Base + , PropertySheet.CategoryCore + , PropertySheet.DublinCore + , PropertySheet.Version + , PropertySheet.Reference + , PropertySheet.Document + , PropertySheet.TextDocument + , PropertySheet.Url + , PropertySheet.ExternalDocument + , PropertySheet.Periodicity + ) + + # Crawling API + security.declareProtected(Permissions.ModifyPortalContent, 'crawlContent') + def crawlContent(self): + """ + Creates the initial content from the URL by crawling the root + """ + self.portal_contributions.crawlContent(self) + + security.declareProtected(Permissions.AccessContentsInformation, 'getContentURLList') + def getContentURLList(self): + """ + Returns the root of the crawling process + """ + return [self.asURL()] + + security.declareProtected(Permissions.AccessContentsInformation, 'getContentBaseURL') + def getContentBaseURL(self): + """ + Returns None to force crawler to ignore this parameter + """ + return None + + # Search API + security.declareProtected(Permissions.SearchCatalog, 'searchResults') + def searchResults(self, **kw): + """ + Search results. There is no notion of security here since + the source is external. + + NOTE: implementation is delegated to a script so that different + kinds of sources may be implemented using different portal + types. + + NOTE2: a typical implementation consists in creating + a specific SQL method with a dedicated connector then + force the SQL catalog to use that method instead of the standard + ones, yet delegate the SQL generation to the catalog. + """ + method = self._getTypeBasedMethod('searchResults') + return method(**kw) + + security.declareProtected(Permissions.SearchCatalog, 'countResults') + def countResults(self, **kw): + """ + Count results. There is no notion of security here since + the source is external. + + NOTE: implementation is delegated to a script so that different + kinds of sources may be implemented using different portal + types. + """ + method = self._getTypeBasedMethod('countResults') + return method(**kw)