Commit 15161f5c authored by Jean-Paul Smets's avatar Jean-Paul Smets

Initial version.

git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@13568 20353a03-c40f-0410-a6d1-a30d3c3de9de
parent 015387de
##############################################################################
#
# Copyright (c) 2002-2007 Nexedi SARL and Contributors. All Rights Reserved.
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsability of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# garantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
##############################################################################
from AccessControl import ClassSecurityInfo
from Products.CMFCore.WorkflowCore import WorkflowMethod
from Products.CMFCore.utils import getToolByName
from Products.ERP5Type import Permissions, PropertySheet, Constraint, Interface
from Products.ERP5Type.XMLObject import XMLObject
from Products.ERP5.Document.Url import UrlMixIn
import mimetypes
import re
import urllib
from htmlentitydefs import name2codepoint
from DateTime import DateTime
class ExternalSource(XMLObject, UrlMixIn):
"""
An External Source consists of single URL which defines the
root of a collection of documents, each of which can be accessed
individually. The URL can be an http site, an ftp site, a local repository,
a samba server, etc.
The main purpose of External Sources is to group related documents
and define shared security policies, shared updated policies, etc.
For example, all pages of
a wiki with restricted access rights share the same security policy
(ex. team, project, etc.). Another purpose of the External Source class is
to make it easy to manage external sources of knowledge (adding them,
removing them, etc.).
The second purpose of an external source is to provide a way to search
contents stored externally in a system which is not compatible with
ERP5 Catalog.
Example of external sources:
- a Web Site
- a SAMBA share
- an FTP server
- a backup server
- a mail directory
- a mailing list archive
ExternalSource may be subclassed to provide more automation
features. This is useful for example to manage the creation
of a mailing list, the deletion of mailing list and the
definition of the members of a mailing list in a centralised way.
NOTE: RSS feeds are not external sources but standard Text
documents with transformation and update policy. They use
the populateContent method to create subcontent from
a root content. This is different with crawling.
NOTE2: access to filesystems through URL requires to extend
urllib2 so that directories are handled as if they were web
pages OR RSS feed with a list of files (and associated URL).
Complete implemetation of external sources will require
major extensions to urllib2 (or equivalent).
NOTE3: it is possible to make external search sources persistent
by triggering an activity with newContent for every displayed
result. This can be done by wrapping the results in a generator
(yield). The interest of this approach is to make it possible to
search already searched contents without having to go through the
external source search (ie. with the front page search).
"""
# CMF Type Definition
meta_type = 'ERP5 External Source'
portal_type = 'External Source'
isPortalContent = 1
isRADContent = 1
# Declarative security
security = ClassSecurityInfo()
security.declareObjectProtected(Permissions.AccessContentsInformation)
# Default Properties
property_sheets = ( PropertySheet.Base
, PropertySheet.CategoryCore
, PropertySheet.DublinCore
, PropertySheet.Version
, PropertySheet.Reference
, PropertySheet.Document
, PropertySheet.TextDocument
, PropertySheet.Url
, PropertySheet.ExternalDocument
, PropertySheet.Periodicity
)
# Crawling API
security.declareProtected(Permissions.ModifyPortalContent, 'crawlContent')
def crawlContent(self):
"""
Creates the initial content from the URL by crawling the root
"""
self.portal_contributions.crawlContent(self)
security.declareProtected(Permissions.AccessContentsInformation, 'getContentURLList')
def getContentURLList(self):
"""
Returns the root of the crawling process
"""
return [self.asURL()]
security.declareProtected(Permissions.AccessContentsInformation, 'getContentBaseURL')
def getContentBaseURL(self):
"""
Returns None to force crawler to ignore this parameter
"""
return None
# Search API
security.declareProtected(Permissions.SearchCatalog, 'searchResults')
def searchResults(self, **kw):
"""
Search results. There is no notion of security here since
the source is external.
NOTE: implementation is delegated to a script so that different
kinds of sources may be implemented using different portal
types.
NOTE2: a typical implementation consists in creating
a specific SQL method with a dedicated connector then
force the SQL catalog to use that method instead of the standard
ones, yet delegate the SQL generation to the catalog.
"""
method = self._getTypeBasedMethod('searchResults')
return method(**kw)
security.declareProtected(Permissions.SearchCatalog, 'countResults')
def countResults(self, **kw):
"""
Count results. There is no notion of security here since
the source is external.
NOTE: implementation is delegated to a script so that different
kinds of sources may be implemented using different portal
types.
"""
method = self._getTypeBasedMethod('countResults')
return method(**kw)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment