diff --git a/product/ERP5/Document/ExternalDocument.py b/product/ERP5/Document/ExternalDocument.py index fa77835d0531f6e3cd11b96c63031ad328ffd828..4338e5e1141b8a403a86ba96854721c5c6b88490 100644 --- a/product/ERP5/Document/ExternalDocument.py +++ b/product/ERP5/Document/ExternalDocument.py @@ -127,20 +127,24 @@ class ExternalDocument(Document): s,inf=self._spiderSource() except Exception,e: self.log(e,level=1) - self.setExternalProcessingStatusMessage("Tried on %s: %s" % (self._time(),str(e))) + msg = "Tried on %s: %s" % (self._time(),str(e)) + portal_workflow.doActionFor(context, 'process', comment=msg) return False chars=len(s) if chars==0: - self.setExternalProcessingStatusMessage("Tried on %s: got empty string" % self._time()) + msg = "Tried on %s: got empty string" % self._time() + portal_workflow.doActionFor(context, 'process', comment=msg) return False try: s=self._processData(s,inf) except Exception,e: self.log(e,level=1) - self.setExternalProcessingStatusMessage("Spidered on %s, %i chars, but could not process; reason: %s" % (self._time(), chars, str(e))) + msg = "Spidered on %s, %i chars, but could not process; reason: %s" % (self._time(), chars, str(e)) + portal_workflow.doActionFor(context, 'process', comment=msg) return False self.setTextContent(s) - self.setExternalProcessingStatusMessage("Spidered on %s, %i chars, recorded %i chars" % (self._time(), chars, len(s))) + msg = "Spidered on %s, %i chars, recorded %i chars" % (self._time(), chars, len(s)) + portal_workflow.doActionFor(context, 'process', comment=msg) return True security.declareProtected(Permissions.View, 'getProtocolItemList') diff --git a/product/ERP5/Document/ExternalWebPage.py b/product/ERP5/Document/ExternalWebPage.py index fabe685c45aa9e8ac276b35d07a240b7f7d973bd..c873eb8ba293986698b28408e97422a8b1fcca10 100644 --- a/product/ERP5/Document/ExternalWebPage.py +++ b/product/ERP5/Document/ExternalWebPage.py @@ -30,10 +30,13 @@ from Products.CMFCore.WorkflowCore import WorkflowMethod from Products.ERP5Type import Permissions, PropertySheet, Constraint, Interface from Products.ERP5.Document.File import stripHtml from Products.ERP5.Document.ExternalDocument import ExternalDocument, SpiderException +from Products.CMFCore.utils import getToolByName import mimetypes, re, urllib from htmlentitydefs import name2codepoint +portal_workflow = getToolByName('portal_workflow') + rx=[] rx.append(re.compile('<!--.*?-->',re.DOTALL|re.MULTILINE)) # clear comments (sometimes JavaScript code in comments contains > chars) rx.append(re.compile('<[^>]*?>',re.DOTALL|re.MULTILINE)) # clear tags @@ -202,7 +205,8 @@ class ExternalWebPage(ExternalDocument): try: s=recode(s) except CanNotDecode: - self.setExternalProcessingStatusMessage("Spidered on %s, %i chars, but could not decode" % (self._time(), chars)) + msg = "Spidered on %s, %i chars, but could not decode" % (self._time(), chars) + portal_workflow.doActionFor(context, 'process', comment=msg) return False s=stripHtml(s) # remove headers, doctype and the like s=clearHtml(s) # remove tags