Commit de2545fc authored by Nicolas Delaby's avatar Nicolas Delaby

Refactoring of DMS.

- file_name become filename
- filename values are not stored in source_reference
Contribution Tool will not honour id arguments.
Contribution Tool can create any kind of document.
Portal Contribution Registry can read extention, content_type and read content_type from data
to guess what will be the best Portal Type to use.

All discoverable methods (IDiscoverable) can change the portal_type of document.
  (migratePortalType)
User can change portal_type of document through UI with simple Action.
Crawling will not hardcode ids of document depending of their URLs thanks to 
Portal Url Registry





git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@40971 20353a03-c40f-0410-a6d1-a30d3c3de9de
parent 4627391c
This diff is collapsed.
......@@ -114,22 +114,14 @@ class PDFDocument(Image):
"""
if not self.hasData():
return ''
tmp = tempfile.NamedTemporaryFile()
tmp.write(self.getData())
tmp.seek(0)
try:
command = ['pdftotext', '-layout', '-enc', 'UTF-8',
'-nopgbrk', tmp.name, '-']
try:
command_result = Popen(command, stdout=PIPE).communicate()[0]
except OSError, e:
if e.errno == errno.ENOENT:
raise ConversionError('pdftotext was not found')
raise
finally:
tmp.close()
if command_result:
return command_result
mime_type = 'text/plain'
portal_transforms = self.getPortalObject().portal_transforms
filename = self.getStandardFilename(format='txt')
result = portal_transforms.convertToData(mime_type, str(self.getData()),
context=self, filename=filename,
mimetype=self.getContentType())
if result:
return result
else:
# Try to use OCR
# As high dpi images are required, it may take some times to convert the
......@@ -145,13 +137,12 @@ class PDFDocument(Image):
frame=page_number, display='identical')
if not src_mimetype.endswith('png'):
continue
content = '%s' % png_data
mime_type = 'text/plain'
content = str(png_data)
if content is not None:
portal_transforms = getToolByName(self, 'portal_transforms')
filename = self.getStandardFilename(format='png')
result = portal_transforms.convertToData(mime_type, content,
context=self,
filename=self.getTitleOrId(),
filename=filename,
mimetype=src_mimetype)
if result is None:
raise ConversionError('PDFDocument conversion error. '
......
......@@ -45,6 +45,9 @@ try:
from string import Template
except ImportError:
from Products.ERP5Type.patches.string import Template
from Products.ERP5Type.Utils import guessEncodingFromText
from lxml import html as etree_html
class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin,
TextContent, File):
......@@ -147,7 +150,7 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin,
kw['format'] = format
if not self.hasConversion(**kw):
portal_transforms = getToolByName(portal, 'portal_transforms')
filename = self.getSourceReference(self.getTitleOrId())
filename = self.getStandardFilename(format=format)
if mime_type == 'text/html':
mime_type = 'text/x-html-safe'
result = portal_transforms.convertToData(mime_type, text_content,
......@@ -183,9 +186,13 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin,
"""
if self.hasTextContent():
html = self._asHTML()
base_list = re.findall(self.base_parser, str(html))
if base_list:
return base_list[0]
# a document can be entirely stripped by safe_html
# so its html conversion can be empty
if html.strip():
html_tree = etree_html.fromstring(html)
base_list = [href for href in html_tree.xpath('//base/@href') if href]
if base_list:
return str(base_list[0])
return Document.getContentBaseURL(self)
security.declareProtected(Permissions.ModifyPortalContent, 'setBaseData')
......@@ -270,14 +277,14 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin,
return encoded content_type and message if encoding
is not utf-8
"""
codec = document._guessEncoding(text_content, content_type)
codec = guessEncodingFromText(text_content, content_type)
if codec is not None:
try:
text_content = text_content.decode(codec).encode('utf-8')
except (UnicodeDecodeError, LookupError):
message = 'Conversion to base format with codec %r fails' % codec
# try again with another guesser based on file command
codec = document._guessEncoding(text_content, 'text/plain')
codec = guessEncodingFromText(text_content, 'text/plain')
if codec is not None:
try:
text_content = text_content.decode(codec).encode('utf-8')
......
......@@ -29,7 +29,7 @@
from AccessControl import ClassSecurityInfo
from Products.ERP5Type.Globals import InitializeClass
from Products.ERP5Type.Tool.BaseTool import BaseTool
from Products.ERP5Type import Permissions
class ContributionRegistryTool(BaseTool):
......@@ -41,14 +41,18 @@ class ContributionRegistryTool(BaseTool):
security = ClassSecurityInfo()
security.declarePrivate('findPortalTypeName')
def findPortalTypeName(self, file_name='', mime_type=None, data=None):
from Products.ERP5Type.Document import newTempIngestionFile
ingestion_file = newTempIngestionFile(self, 'id')
ingestion_file._edit(file_name=file_name, mime_type=mime_type, data=data)
security.declareProtected(Permissions.AccessContentsInformation,
'findPortalTypeName')
def findPortalTypeName(self, context=None, **kw):
# if a context is passed, ignore other arguments
if context is None:
# Build a temp object edited with provided parameters
from Products.ERP5Type.Document import newTempFile
context = newTempFile(self, 'id')
context.edit(**kw)
for predicate in self.objectValues(sort_on='int_index'):
result = predicate.test(ingestion_file)
result = predicate.test(context)
if result:
return result
......
This diff is collapsed.
......@@ -50,7 +50,7 @@ from Tool import CategoryTool, SimulationTool, RuleTool, IdTool, TemplateTool,\
TrashTool, ContributionTool, NotificationTool, PasswordTool,\
GadgetTool, ContributionRegistryTool, IntrospectionTool,\
AcknowledgementTool, SolverTool, SolverProcessTool,\
ConversionTool, RoundingTool
ConversionTool, RoundingTool, UrlRegistryTool
import ERP5Site
from Document import PythonScript
object_classes = ( ERP5Site.ERP5Site,
......@@ -78,6 +78,7 @@ portal_tools = ( CategoryTool.CategoryTool,
SolverProcessTool.SolverProcessTool,
ConversionTool.ConversionTool,
RoundingTool.RoundingTool,
UrlRegistryTool.UrlRegistryTool,
)
content_classes = ()
content_constructors = ()
......
......@@ -11,7 +11,7 @@
<value>
<dictionary>
<item>
<key> <string>file_extension</string> </key>
<key> <string>extension_from_filename</string> </key>
<value>
<list>
<string>sxd</string>
......@@ -32,7 +32,7 @@
<key> <string>criterion_property</string> </key>
<value>
<tuple>
<string>file_extension</string>
<string>extension_from_filename</string>
</tuple>
</value>
</item>
......@@ -46,7 +46,7 @@
</item>
<item>
<key> <string>int_index</string> </key>
<value> <int>60</int> </value>
<value> <int>10</int> </value>
</item>
<item>
<key> <string>portal_type</string> </key>
......@@ -60,7 +60,7 @@
</item>
<item>
<key> <string>title</string> </key>
<value> <string>Drawing</string> </value>
<value> <string>Drawing by extension</string> </value>
</item>
</dictionary>
</pickle>
......
......@@ -11,7 +11,7 @@
<value>
<dictionary>
<item>
<key> <string>file_extension</string> </key>
<key> <string>extension_from_filename</string> </key>
<value>
<list>
<string>gif</string>
......@@ -35,7 +35,7 @@
<key> <string>criterion_property</string> </key>
<value>
<tuple>
<string>file_extension</string>
<string>extension_from_filename</string>
</tuple>
</value>
</item>
......@@ -49,7 +49,7 @@
</item>
<item>
<key> <string>int_index</string> </key>
<value> <int>20</int> </value>
<value> <int>10</int> </value>
</item>
<item>
<key> <string>portal_type</string> </key>
......@@ -63,7 +63,7 @@
</item>
<item>
<key> <string>title</string> </key>
<value> <string>Image</string> </value>
<value> <string>Image by extension</string> </value>
</item>
</dictionary>
</pickle>
......
......@@ -11,7 +11,7 @@
<value>
<dictionary>
<item>
<key> <string>file_extension</string> </key>
<key> <string>extension_from_filename</string> </key>
<value>
<list>
<string>pdf</string>
......@@ -31,7 +31,7 @@
<key> <string>criterion_property</string> </key>
<value>
<tuple>
<string>file_extension</string>
<string>extension_from_filename</string>
</tuple>
</value>
</item>
......@@ -45,7 +45,7 @@
</item>
<item>
<key> <string>int_index</string> </key>
<value> <int>30</int> </value>
<value> <int>10</int> </value>
</item>
<item>
<key> <string>portal_type</string> </key>
......@@ -59,7 +59,7 @@
</item>
<item>
<key> <string>title</string> </key>
<value> <string>PDF</string> </value>
<value> <string>PDF by extension</string> </value>
</item>
</dictionary>
</pickle>
......
......@@ -11,7 +11,7 @@
<value>
<dictionary>
<item>
<key> <string>mime_type</string> </key>
<key> <string>content_type</string> </key>
<value>
<list>
<string>application/pdf</string>
......@@ -31,7 +31,7 @@
<key> <string>criterion_property</string> </key>
<value>
<tuple>
<string>mime_type</string>
<string>content_type</string>
</tuple>
</value>
</item>
......@@ -45,7 +45,7 @@
</item>
<item>
<key> <string>int_index</string> </key>
<value> <int>30</int> </value>
<value> <int>20</int> </value>
</item>
<item>
<key> <string>portal_type</string> </key>
......@@ -59,7 +59,7 @@
</item>
<item>
<key> <string>title</string> </key>
<value> <string>PDF</string> </value>
<value> <string>PDF by mimetype</string> </value>
</item>
</dictionary>
</pickle>
......
......@@ -11,7 +11,7 @@
<value>
<dictionary>
<item>
<key> <string>file_extension</string> </key>
<key> <string>extension_from_filename</string> </key>
<value>
<list>
<string>ppt</string>
......@@ -34,7 +34,7 @@
<key> <string>criterion_property</string> </key>
<value>
<tuple>
<string>file_extension</string>
<string>extension_from_filename</string>
</tuple>
</value>
</item>
......@@ -48,7 +48,7 @@
</item>
<item>
<key> <string>int_index</string> </key>
<value> <int>50</int> </value>
<value> <int>10</int> </value>
</item>
<item>
<key> <string>portal_type</string> </key>
......@@ -62,7 +62,7 @@
</item>
<item>
<key> <string>title</string> </key>
<value> <string>Presentation</string> </value>
<value> <string>Presentation by extension</string> </value>
</item>
</dictionary>
</pickle>
......
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="ContributionPredicate" module="Products.ERP5Type.Document.ContributionPredicate"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>_identity_criterion</string> </key>
<value>
<dictionary>
<item>
<key> <string>content_type_from_content</string> </key>
<value>
<list>
<string>application/vnd.ms-excel</string>
<string>application/vnd.ms-office</string>
<string>application/msexcel</string>
<string>application/vnd.oasis.opendocument.spreadsheet</string>
<string>application/vnd.oasis.opendocument.spreadsheet-template</string>
</list>
</value>
</item>
</dictionary>
</value>
</item>
<item>
<key> <string>_range_criterion</string> </key>
<value>
<dictionary/>
</value>
</item>
<item>
<key> <string>criterion_property</string> </key>
<value>
<tuple>
<string>content_type_from_content</string>
</tuple>
</value>
</item>
<item>
<key> <string>destination_portal_type</string> </key>
<value> <string>Spreadsheet</string> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>spreadsheet_by_content</string> </value>
</item>
<item>
<key> <string>int_index</string> </key>
<value> <int>70</int> </value>
</item>
<item>
<key> <string>portal_type</string> </key>
<value> <string>Contribution Predicate</string> </value>
</item>
<item>
<key> <string>test_method_id</string> </key>
<value>
<tuple/>
</value>
</item>
<item>
<key> <string>title</string> </key>
<value> <string>Spreadsheet by content</string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
......@@ -11,7 +11,7 @@
<value>
<dictionary>
<item>
<key> <string>file_extension</string> </key>
<key> <string>extension_from_filename</string> </key>
<value>
<list>
<string>xls</string>
......@@ -35,7 +35,7 @@
<key> <string>criterion_property</string> </key>
<value>
<tuple>
<string>file_extension</string>
<string>extension_from_filename</string>
</tuple>
</value>
</item>
......@@ -49,7 +49,7 @@
</item>
<item>
<key> <string>int_index</string> </key>
<value> <int>40</int> </value>
<value> <int>10</int> </value>
</item>
<item>
<key> <string>portal_type</string> </key>
......@@ -63,7 +63,7 @@
</item>
<item>
<key> <string>title</string> </key>
<value> <string>Spreadsheet</string> </value>
<value> <string>Spreadsheet by extension</string> </value>
</item>
</dictionary>
</pickle>
......
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="ContributionPredicate" module="Products.ERP5Type.Document.ContributionPredicate"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>_identity_criterion</string> </key>
<value>
<dictionary>
<item>
<key> <string>content_type</string> </key>
<value>
<list>
<string>text/plain</string>
</list>
</value>
</item>
</dictionary>
</value>
</item>
<item>
<key> <string>_range_criterion</string> </key>
<value>
<dictionary/>
</value>
</item>
<item>
<key> <string>criterion_property</string> </key>
<value>
<tuple>
<string>content_type</string>
</tuple>
</value>
</item>
<item>
<key> <string>destination_portal_type</string> </key>
<value> <string>Text</string> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>text_by_conent_type</string> </value>
</item>
<item>
<key> <string>int_index</string> </key>
<value> <int>20</int> </value>
</item>
<item>
<key> <string>portal_type</string> </key>
<value> <string>Contribution Predicate</string> </value>
</item>
<item>
<key> <string>test_method_id</string> </key>
<value>
<tuple/>
</value>
</item>
<item>
<key> <string>title</string> </key>
<value> <string>Text by content type</string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="ContributionPredicate" module="Products.ERP5Type.Document.ContributionPredicate"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>_identity_criterion</string> </key>
<value>
<dictionary>
<item>
<key> <string>content_type_from_content</string> </key>
<value>
<list>
<string>text/plain</string>
</list>
</value>
</item>
</dictionary>
</value>
</item>
<item>
<key> <string>_range_criterion</string> </key>
<value>
<dictionary/>
</value>
</item>
<item>
<key> <string>criterion_property</string> </key>
<value>
<tuple>
<string>content_type_from_content</string>
</tuple>
</value>
</item>
<item>
<key> <string>destination_portal_type</string> </key>
<value> <string>Text</string> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>text_by_content</string> </value>
</item>
<item>
<key> <string>int_index</string> </key>
<value> <int>70</int> </value>
</item>
<item>
<key> <string>portal_type</string> </key>
<value> <string>Contribution Predicate</string> </value>
</item>
<item>
<key> <string>test_method_id</string> </key>
<value>
<tuple/>
</value>
</item>
<item>
<key> <string>title</string> </key>
<value> <string>Text by mimetype from data</string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
......@@ -11,7 +11,7 @@
<value>
<dictionary>
<item>
<key> <string>file_extension</string> </key>
<key> <string>extension_from_filename</string> </key>
<value>
<list>
<string>txt</string>
......@@ -36,7 +36,7 @@
<key> <string>criterion_property</string> </key>
<value>
<tuple>
<string>file_extension</string>
<string>extension_from_filename</string>
</tuple>
</value>
</item>
......@@ -64,7 +64,7 @@
</item>
<item>
<key> <string>title</string> </key>
<value> <string>Text</string> </value>
<value> <string>Text by extension</string> </value>
</item>
</dictionary>
</pickle>
......
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="ContributionPredicate" module="Products.ERP5Type.Document.ContributionPredicate"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>_identity_criterion</string> </key>
<value>
<dictionary>
<item>
<key> <string>content_type_from_content</string> </key>
<value>
<list>
<string>text/html</string>
</list>
</value>
</item>
</dictionary>
</value>
</item>
<item>
<key> <string>_range_criterion</string> </key>
<value>
<dictionary/>
</value>
</item>
<item>
<key> <string>criterion_property</string> </key>
<value>
<tuple>
<string>content_type_from_content</string>
</tuple>
</value>
</item>
<item>
<key> <string>destination_portal_type</string> </key>
<value> <string>Web Page</string> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>web_page_by_content</string> </value>
</item>
<item>
<key> <string>int_index</string> </key>
<value> <int>70</int> </value>
</item>
<item>
<key> <string>portal_type</string> </key>
<value> <string>Contribution Predicate</string> </value>
</item>
<item>
<key> <string>test_method_id</string> </key>
<value>
<tuple/>
</value>
</item>
<item>
<key> <string>title</string> </key>
<value> <string>Web Page by mimetype from data</string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
......@@ -11,7 +11,7 @@
<value>
<dictionary>
<item>
<key> <string>file_extension</string> </key>
<key> <string>extension_from_filename</string> </key>
<value>
<list>
<string>html</string>
......@@ -33,7 +33,7 @@
<key> <string>criterion_property</string> </key>
<value>
<tuple>
<string>file_extension</string>
<string>extension_from_filename</string>
</tuple>
</value>
</item>
......@@ -47,7 +47,7 @@
</item>
<item>
<key> <string>int_index</string> </key>
<value> <int>90</int> </value>
<value> <int>10</int> </value>
</item>
<item>
<key> <string>portal_type</string> </key>
......@@ -61,7 +61,7 @@
</item>
<item>
<key> <string>title</string> </key>
<value> <string>Web Page</string> </value>
<value> <string>Web Page by extension</string> </value>
</item>
</dictionary>
</pickle>
......
......@@ -11,7 +11,7 @@
<value>
<dictionary>
<item>
<key> <string>mime_type</string> </key>
<key> <string>content_type</string> </key>
<value>
<list>
<string>text/html</string>
......@@ -31,7 +31,7 @@
<key> <string>criterion_property</string> </key>
<value>
<tuple>
<string>mime_type</string>
<string>content_type</string>
</tuple>
</value>
</item>
......@@ -45,13 +45,7 @@
</item>
<item>
<key> <string>int_index</string> </key>
<value> <int>90</int> </value>
</item>
<item>
<key> <string>membership_criterion_base_category</string> </key>
<value>
<tuple/>
</value>
<value> <int>20</int> </value>
</item>
<item>
<key> <string>portal_type</string> </key>
......@@ -65,7 +59,7 @@
</item>
<item>
<key> <string>title</string> </key>
<value> <string>Web Page</string> </value>
<value> <string>Web Page by mimetype</string> </value>
</item>
</dictionary>
</pickle>
......
......@@ -24,6 +24,22 @@
</tuple>
</value>
</item>
<item>
<key> <string>_Add_portal_content_Permission</string> </key>
<value>
<tuple>
<string>Manager</string>
</tuple>
</value>
</item>
<item>
<key> <string>_Delete_objects_Permission</string> </key>
<value>
<tuple>
<string>Manager</string>
</tuple>
</value>
</item>
<item>
<key> <string>_Modify_portal_content_Permission</string> </key>
<value>
......@@ -252,6 +268,22 @@ It\'s the lowest priority one; ie. managers can create higher priority preferenc
<key> <string>preferred_date_order</string> </key>
<value> <string>ymd</string> </value>
</item>
<item>
<key> <string>preferred_document_file_name_regular_expression</string> </key>
<value> <string encoding="cdata"><![CDATA[
(?P<reference>[A-Z&é@{]{3,7})-(?P<language>[a-z]{2})-(?P<version>[0-9]{3})
]]></string> </value>
</item>
<item>
<key> <string>preferred_document_reference_regular_expression</string> </key>
<value> <string encoding="cdata"><![CDATA[
(?P<reference>[A-Z&é@{]{3,7})(-(?P<language>[a-z]{2}))?(-(?P<version>[0-9]{3}))?
]]></string> </value>
</item>
<item>
<key> <string>preferred_event_assessment_form_id</string> </key>
<value>
......
......@@ -58,8 +58,8 @@ from zExceptions import Unauthorized\n
format = None\n
# Always force download of document even if format is supported\n
# by browser\n
file_name = context.getStandardFileName(format)\n
response.setHeader(\'Content-disposition\', \'attachment; filename="%s"\' % file_name)\n
filename = context.getStandardFilename(format)\n
response.setHeader(\'Content-disposition\', \'attachment; filename="%s"\' % filename)\n
\n
try:\n
return context.index_html(request, response, format)\n
......@@ -111,7 +111,7 @@ except Unauthorized:\n
<string>None</string>
<string>format</string>
<string>context</string>
<string>file_name</string>
<string>filename</string>
<string>msg</string>
<string>dict</string>
</tuple>
......
......@@ -222,12 +222,16 @@
<value>
<list>
<tuple>
<string>file_extension</string>
<string>file_extension</string>
<string>extension_from_filename</string>
<string>extension_from_filename</string>
</tuple>
<tuple>
<string>mime_type</string>
<string>mime_type</string>
<string>content_type</string>
<string>content_type</string>
</tuple>
<tuple>
<string>content_type_from_content</string>
<string>content_type_from_content</string>
</tuple>
</list>
</value>
......
......@@ -352,6 +352,10 @@
<key> <string>css_class</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>default_display_style</string> </key>
<value> <string>table</string> </value>
</item>
<item>
<key> <string>default_params</string> </key>
<value>
......@@ -362,6 +366,12 @@
<key> <string>description</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>display_style_list</string> </key>
<value>
<list/>
</value>
</item>
<item>
<key> <string>domain_root_list</string> </key>
<value>
......@@ -396,10 +406,18 @@
<list/>
</value>
</item>
<item>
<key> <string>global_search_column</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>hidden</string> </key>
<value> <int>0</int> </value>
</item>
<item>
<key> <string>hide_rows_on_no_search_criterion</string> </key>
<value> <int>0</int> </value>
</item>
<item>
<key> <string>lines</string> </key>
<value> <int>20</int> </value>
......@@ -425,6 +443,10 @@
</list>
</value>
</item>
<item>
<key> <string>page_navigation_mode</string> </key>
<value> <string>slider</string> </value>
</item>
<item>
<key> <string>page_template</string> </key>
<value> <string></string> </value>
......@@ -445,6 +467,10 @@
<key> <string>report_tree</string> </key>
<value> <int>0</int> </value>
</item>
<item>
<key> <string>row_css_method</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>search</string> </key>
<value> <int>0</int> </value>
......@@ -490,10 +516,22 @@
<key> <string>stat_method</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>style_columns</string> </key>
<value>
<list/>
</value>
</item>
<item>
<key> <string>title</string> </key>
<value> <string>Contribution Predicates</string> </value>
</item>
<item>
<key> <string>untranslatable_columns</string> </key>
<value>
<list/>
</value>
</item>
<item>
<key> <string>url_columns</string> </key>
<value>
......
40819
\ No newline at end of file
40820
\ No newline at end of file
......@@ -22,8 +22,12 @@ portal_contribution_registry/image_extension
portal_contribution_registry/pdf_extension
portal_contribution_registry/pdf_mimetype
portal_contribution_registry/presentation_extension
portal_contribution_registry/spreadsheet_by_content
portal_contribution_registry/spreadsheet_extension
portal_contribution_registry/text_by_conent_type
portal_contribution_registry/text_by_content
portal_contribution_registry/text_extension
portal_contribution_registry/web_page_by_content
portal_contribution_registry/webpage_extension
portal_contribution_registry/webpage_mimetype
portal_domains/base_day_domain
......
# -*- coding: utf-8 -*-
##############################################################################
#
# Copyright (c) 2010 Nexedi SA and Contributors. All Rights Reserved.
# Jean-Paul Smets-Solanes <jp@nexedi.com>
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsibility of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# guarantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
##############################################################################
from zope.interface import Interface
class IDiscoverable(Interface):
"""
Discoverable interface specification
Documents which implement IMetadataDiscoverable provides
methods to discover and update metadata properties
from content, user input, file name, etc.
"""
def getContentInformation():
"""
Returns a dictionary of possible metadata which can be extracted from the
document content (ex. title from an HTML file, creation date from a PDF
document, etc.)
"""
def getPropertyDictFromUserLogin(user_login=None):
"""
Based on the user_login, find out all properties which
can be discovered to later update document metadata.
user_login -- optional user login ID
"""
def getPropertyDictFromContent():
"""
Based on the result of getContentInformation, find out all
properties which can be discovered to later update document metadata.
"""
def getPropertyDictFromFilename(filename):
"""
Based on the file name, find out all properties which
can be discovered to later update document metadata.
filename -- file name to use in discovery process
"""
def getPropertyDictFromInput():
"""
Based on the user input, find out all properties which
can be discovered to later update document metadata.
"""
def discoverMetadata(filename=None, user_login=None):
"""
Updates the document metadata by discovering metadata from
the user login, the document content, the file name and the
user input. The order of discovery should be set in system
preferences.
filename - optional file name (ex. AA-BBB-CCC-223-en.doc)
user_login -- optional user login ID
XXX - it is unclear if this method should also trigger finishIngestion
and whether this should be documented here or not
"""
def finishIngestion():
"""
Finish the ingestion process (ex. allocate a reference number automatically if
no reference was defined.)
XXX - it is unclear if this method should be part of the interface
"""
def getExtensionFromFilename():
"""Return calculated value of extension read from filename
"""
def getContentTypeFromContent():
"""Return calculated value of content type read from content
"""
......@@ -87,7 +87,7 @@ class IDocument(Interface):
input - data supplied with http request or set on the object during (2) (e.g.
discovered from email text)
file_name - data which might be encoded in file name
filename - data which might be encoded in filename
user_login - information about user who is contributing the file
content - data which might be derived from document content
......
......@@ -52,11 +52,11 @@ class IDownloadable(Interface):
kw -- optional conversion parameters
"""
def getStandardFileName(format=None):
def getStandardFilename(format=None):
"""
Returns a standard file name for the document to download.
This method is the reverse of
IMetadataDiscoverable.getPropertyDictFromFileName.
IDiscoverable.getPropertyDictFromFilename.
format -- extension of returned file name
"""
# -*- coding: utf-8 -*-
##############################################################################
#
# Copyright (c) 2010 Nexedi SA and Contributors. All Rights Reserved.
# Nicolas Delaby <nicolas@nexedi.com>
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsibility of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# guarantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
##############################################################################
from zope.interface import Interface
class IUrl(Interface):
"""
"""
def asURL():
"""
Returns a text representation of the Url if defined
or None else.
"""
def fromURL(url):
"""
Analyses a URL and splits it into two parts. URLs
normally follow RFC 1738. However, we accept URLs
without the protocol a.k.a. scheme part (http, mailto, etc.). In this
case only the url_string a.k.a. scheme-specific-part is taken
into account. asURL will then generate the full URL.
"""
def getURLServer():
"""
Returns the server part of a URL
"""
def getURLPort():
"""
Returns the port part of a URL
"""
def getURLPath():
"""
Returns the path part of a URL
"""
def asNormalisedURL(base_url=None):
"""
Returns a normalised version of the url so
that we do not download twice the same content.
This normalisation must refer to the same resource !
Refer to http://en.wikipedia.org/wiki/URL_normalization .
base_url - Specify a default URL and a default target
for all links on a page.
if url is a relative link, we try to compute an absolute url
with help of base_url
"""
# -*- coding: utf-8 -*-
##############################################################################
#
# Copyright (c) 2010 Nexedi SA and Contributors. All Rights Reserved.
# Nicolas Delaby <nicolas@nexedi.com>
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsibility of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# guarantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
##############################################################################
from zope.interface import Interface
class IUrlRegistryTool(Interface):
"""Tool to register URLs
This tool aim to maintain consistency in URL management
of crawlable sources in order to maintain consistency
between an external resource identifier and generated
document inside ERP5.
Multiple URL can be associated to the same reference
A System Preference can used to configure the global namespace.
This enable isolation of url mappings for different Groups.
This is a configurable tool to support different scope for mappings.
So it is possible to restrict the crawling of an URL
only once in the context of portal;
Or restrict the crawling of an url for the scope of an external_source
or a module only (Crawling multiple times the same URL for a portal)
"""
def clearUrlRegistryTool(context=None):
"""Unregister all urls in all namespaces.
Only available for Manager
context - a context to access container of mappings.
"""
def registerURL(url, reference, context=None):
"""Register the mapping url:reference
this method is aimed to be called from interaction_workflow
which trig on _setReference in order to keep the association
between url:reference up to date.
url - external resource identifier
reference - reference of downloaded resource (ERP5 Object instance)
context - a context to access container of mappings.
If not passed, mappings are stored on tool itself
"""
def getReferenceList(context=None):
"""return all references registered by portal_url_registry
according given context
context - a context to access container of mappings.
"""
def getReferenceFromURL(url, context=None):
"""return reference of document according provided url
url - external resource identifier
context - a context to access container of mappings.
If not passed, mapping are stored on tool itself
"""
def getURLListFromReference(reference, context=None):
"""return list of urls associated to given reference
and context.
reference - reference of downloaded resource (ERP5 Object instance)
context - a context to access container of mappings.
"""
def updateUrlRegistryTool():
"""Rebuild all url mappings for active preference
"""
......@@ -139,10 +139,21 @@ class CachedConvertableMixin:
cached_value = data
conversion_md5 = md5_new(str(data.data)).hexdigest()
size = len(data.data)
else:
elif isinstance(data, (str, unicode,)):
cached_value = data
conversion_md5 = md5_new(cached_value).hexdigest()
size = len(cached_value)
elif isinstance(data, dict):
# Dict instance are used to store computed metadata
# from actual content.
# So this value is intimely related to cache of conversion.
# As it should be cleared each time the document is edited.
# Also may be a proper API should be used
cached_value = data
conversion_md5 = None
size = len(cached_value)
else:
raise NotImplementedError, 'Not able to store type:%r' % type(data)
if date is None:
date = DateTime()
stored_data_dict = {'content_md5': self.getContentMd5(),
......
# -*- coding: utf-8 -*-
##############################################################################
#
# Copyright (c) 2009 Nexedi SA and Contributors. All Rights Reserved.
# Copyright (c) 2010 Nexedi SA and Contributors. All Rights Reserved.
# Ivan Tyagov <ivan@nexedi.com>
#
# WARNING: This program as such is intended to be used by professional
......@@ -27,8 +27,13 @@
#
##############################################################################
from AccessControl import ClassSecurityInfo, getSecurityManager
from AccessControl import ClassSecurityInfo
from Products.ERP5Type import Permissions
from Products.ERP5Type.Utils import normaliseUrl
from Products.ERP5Type.DateUtils import convertDateToHour,\
number_of_hours_in_day, number_of_hours_in_year
from urlparse import urlsplit, urlunsplit
from lxml import html as etree_html
class CrawlableMixin:
"""
......@@ -80,3 +85,81 @@ class CrawlableMixin:
method = self._getTypeBasedMethod('isUpdatable',
fallback_script_id = 'Document_isUpdatable')
return method()
security.declareProtected(Permissions.AccessContentsInformation,
'getContentURLList')
def getContentURLList(self):
"""
Returns a list of URLs referenced by the content of this document.
Default implementation consists in analysing the document
converted to HTML. Subclasses may overload this method
if necessary. However, it is better to extend the conversion
methods in order to produce valid HTML, which is useful to
many people, rather than overload this method which is only
useful for crawling.
"""
html_content = self.asEntireHTML()
html_tree = etree_html.fromstring(html_content)
base_href = self.getContentBaseURL()
if base_href:
html_tree.make_links_absolute(base_href)
href_list = []
for elemnt, attribute_name, link, position in html_tree.iterlinks():
# For now take into acount only a and img tags
if attribute_name not in ('href',):
continue
if isinstance(link, unicode):
link = link.encode('utf-8')
href_list.append(link)
return href_list
security.declareProtected(Permissions.AccessContentsInformation,
'getContentBaseURL')
def getContentBaseURL(self):
"""
Returns the content base URL based on the actual content or
on its URL.
"""
raw_url = self.asURL() or ''
splitted_url = urlsplit(raw_url)
path_part = splitted_url[2]
path_part = '/'.join(path_part.split('/')[:-1])
base_url = urlunsplit((splitted_url[0], splitted_url[1], path_part, None,
None))
if isinstance(base_url, unicode):
base_url = base_url.encode('utf-8')
return base_url
security.declareProtected(Permissions.AccessContentsInformation,
'getContentNormalisedURLList')
def getContentNormalisedURLList(self):
"""
Call url normalizer for each url returned by getContentURLList
Return only url associated to the same Domain
"""
reference_domain = urlsplit(normaliseUrl(self.asURL() or ''))[1]
# in www.example.com or www.3.example.com
# keep only the example.com part
reference_domain = ''.join(reference_domain.split('.')[-2:])
if isinstance(reference_domain, unicode):
reference_domain = reference_domain.encode('utf-8')
url_list = []
base_url = self.getContentBaseURL()
for url in self.getContentURLList():
try:
url = normaliseUrl(url, base_url=base_url)
except UnicodeDecodeError:
# Ignore wrong encoding errors
# Web is not a kind world
continue
if not url:
continue
url_domain = urlsplit(url)[1]
if isinstance(url_domain, unicode):
url_domain = url_domain.encode('utf-8')
if url_domain and ''.join(url_domain.split('.')[-2:]) != reference_domain:
continue
# if domain is empty (relative link) or domain is same, then OK
url_list.append(url)
return url_list
This diff is collapsed.
......@@ -31,6 +31,7 @@ from Products.ERP5Type import Permissions
from Products.ERP5Type.Utils import fill_args_from_request
from Products.CMFCore.utils import getToolByName, _setCacheHeaders,\
_ViewEmulator
import warnings
_MARKER = []
......@@ -108,15 +109,31 @@ class DownloadableMixin:
return str(data)
security.declareProtected(Permissions.AccessContentsInformation,
'getStandardFileName')
def getStandardFileName(self, format=None):
'getStandardFilename')
def getStandardFilename(self, format=None):
"""Returns the document coordinates as a standard file name. This
method is the reverse of getPropertyDictFromFileName.
"""
method = self._getTypeBasedMethod('getStandardFileName',
method = self._getTypeBasedMethod('getStandardFilename',
fallback_script_id='Document_getStandardFilename')
if method is None:
# backward compatibility
method = self._getTypeBasedMethod('getStandardFileName',
fallback_script_id='Document_getStandardFileName')
return method(format=format)
# backward compatibility
security.declareProtected(Permissions.AccessContentsInformation,
'getStandardFileName')
def getStandardFileName(self, format=None):
"""(deprecated) use getStandardFilename() instead."""
warnings.warn('getStandardFileName() is deprecated. '
'use getStandardFilename() instead.')
return self.getStandardFilename(format=format)
method = self._getTypeBasedMethod('getStandardFilename',
fallback_script_id='Document_getStandardFilename')
return method(format=format)
def manage_FTPget(self):
"""Return body for ftp. and WebDAV
"""
......
......@@ -43,6 +43,7 @@ from zExceptions import BadRequest
from Products.ERP5Type.tests.backportUnittest import skip
from Products.ERP5Type.Tool.ClassTool import _aq_reset
from Products.ERP5Type.Workflow import addWorkflowByType
from Products.CMFCore.WorkflowCore import WorkflowException
def getDummyTypeBaseMethod(self):
""" Use a type Base method
......@@ -1248,6 +1249,43 @@ class TestBase(ERP5TypeTestCase, ZopeTestCase.Functional):
self.assertFalse(person.isIndexable)
self.assertEquals(0, len(self.portal.portal_catalog(uid=person.getUid())))
def test_metaWorkflowTransition(self):
"""Test Meta Transtion, jump from state to another without explicitely
transtion defined.
"""
module = self.portal.person_module
person = module.newContent(portal_type='Person')
self.assertEquals(person.getValidationState(), 'draft')
self.assertFalse(self.portal.portal_workflow.isTransitionPossible(person,
'invalidate'))
# test low-level implementation
self.portal.portal_workflow.validation_workflow._executeMetaTransition(
person, 'invalidated')
self.assertEquals(person.getValidationState(), 'invalidated')
validation_history = person.workflow_history['validation_workflow']
self.assertEquals(len(validation_history), 2)
self.assertEquals(validation_history[-1]['comment'],
'Jump from \'draft\' to \'invalidated\'')
person = module.newContent(portal_type='Person')
self.assertEquals(person.getValidationState(), 'draft')
# test high-level implementation
self.portal.portal_workflow._jumpToStateFor(person, 'invalidated')
self.assertEquals(person.getValidationState(), 'invalidated')
person = module.newContent(portal_type='Person')
self.assertEquals(person.getValidationState(), 'draft')
self.portal.portal_workflow._jumpToStateFor(person, 'invalidated',
wf_id='validation_workflow')
self.assertEquals(person.getValidationState(), 'invalidated')
person = module.newContent(portal_type='Person')
self.assertEquals(person.getValidationState(), 'draft')
self.assertRaises(WorkflowException,
self.portal.portal_workflow._jumpToStateFor,
person, 'invalidated', wf_id='edit_workflow')
self.assertEquals(person.getValidationState(), 'draft')
class TestERP5PropertyManager(unittest.TestCase):
"""Tests for ERP5PropertyManager.
"""
......
......@@ -36,7 +36,7 @@ from Products.CMFCore.WorkflowCore import WorkflowException
from Products.ERP5Type.tests.utils import DummyMailHost, FileUpload
from Products.ERP5Type.tests.ERP5TypeTestCase import ERP5TypeTestCase,\
_getConversionServerDict
from Products.ERP5OOo.tests.testIngestion import FILE_NAME_REGULAR_EXPRESSION
from Products.ERP5OOo.tests.testIngestion import FILENAME_REGULAR_EXPRESSION
from Products.ERP5OOo.tests.testIngestion import REFERENCE_REGULAR_EXPRESSION
from Products.ERP5Type.tests.backportUnittest import expectedFailure
......@@ -443,7 +443,7 @@ class TestCRMMailIngestion(BaseTestCRM):
data=self._readTestData(filename)
return self.portal.portal_contributions.newContent(
container_path='event_module',
file_name='postfix_mail.eml',
filename='postfix_mail.eml',
data=data)
def test_findTypeByName_MailMessage(self):
......@@ -451,7 +451,7 @@ class TestCRMMailIngestion(BaseTestCRM):
self.assertEquals(
'Mail Message',
self.portal.portal_contribution_registry.findPortalTypeName(
file_name='postfix_mail.eml', mime_type='message/rfc822', data='Test'
filename='postfix_mail.eml', content_type='message/rfc822', data='Test'
))
def test_Base_getEntityListFromFromHeader(self):
......@@ -767,7 +767,7 @@ class TestCRMMailSend(BaseTestCRM):
conversion_dict = _getConversionServerDict()
default_pref.setPreferredOoodocServerAddress(conversion_dict['hostname'])
default_pref.setPreferredOoodocServerPortNumber(conversion_dict['port'])
default_pref.setPreferredDocumentFileNameRegularExpression(FILE_NAME_REGULAR_EXPRESSION)
default_pref.setPreferredDocumentFileNameRegularExpression(FILENAME_REGULAR_EXPRESSION)
default_pref.setPreferredDocumentReferenceRegularExpression(REFERENCE_REGULAR_EXPRESSION)
if default_pref.getPreferenceState() == 'disabled':
default_pref.enable()
......
......@@ -120,36 +120,36 @@ return predicate.getDestinationPortalType()
tool = self.portal.portal_contribution_registry
# Test extension matching
self.assertEqual(tool.findPortalTypeName(file_name='test.txt'), 'Text')
self.assertEqual(tool.findPortalTypeName(file_name='test.odt'), 'Text')
self.assertEqual(tool.findPortalTypeName(file_name='001.jpg'), 'Image')
self.assertEqual(tool.findPortalTypeName(file_name='002.PNG'), 'Image')
self.assertEqual(tool.findPortalTypeName(file_name='002.PNG'), 'Image')
self.assertEqual(tool.findPortalTypeName(file_name='index.html'), 'Web Page')
self.assertEqual(tool.findPortalTypeName(filename='test.txt'), 'Text')
self.assertEqual(tool.findPortalTypeName(filename='test.odt'), 'Text')
self.assertEqual(tool.findPortalTypeName(filename='001.jpg'), 'Image')
self.assertEqual(tool.findPortalTypeName(filename='002.png'), 'Image')
self.assertEqual(tool.findPortalTypeName(filename='002.PNG'), 'Image')
self.assertEqual(tool.findPortalTypeName(filename='index.html'), 'Web Page')
# Unknown extension
self.assertEqual(tool.findPortalTypeName(file_name='index.xxx'), 'File')
self.assertEqual(tool.findPortalTypeName(filename='index.xxx'), 'File')
# Test mimetype matching
self.assertEqual(tool.findPortalTypeName(mime_type='text/html'), 'Web Page')
self.assertEqual(tool.findPortalTypeName(content_type='text/html'), 'Web Page')
# Unknown mimetype
self.assertEqual(tool.findPortalTypeName(mime_type='application/octet-stream'), 'File')
self.assertEqual(tool.findPortalTypeName(content_type='application/octet-stream'), 'File')
# Test both of extension and mimetype
self.assertNotEqual(tool.findPortalTypeName(file_name='message.eml'),
self.assertNotEqual(tool.findPortalTypeName(filename='message.eml'),
'Mail Message')
self.assertNotEqual(tool.findPortalTypeName(mime_type='message/rfc822'),
self.assertNotEqual(tool.findPortalTypeName(content_type='message/rfc822'),
'Mail Message')
self.assertEqual(tool.findPortalTypeName(file_name='message.eml',
mime_type='message/rfc822'),
self.assertEqual(tool.findPortalTypeName(filename='message.eml',
content_type='message/rfc822'),
'Mail Message')
# Test test script
data = """\
Subject: Fax
"""
self.assertEqual(tool.findPortalTypeName(file_name='message.eml',
mime_type='message/rfc822',
self.assertEqual(tool.findPortalTypeName(filename='message.eml',
content_type='message/rfc822',
data=data),
'Fax Message')
......
......@@ -37,7 +37,8 @@ from AccessControl.SecurityManagement import newSecurityManager
from Testing import ZopeTestCase
from Products.ERP5Type.tests.ERP5TypeTestCase import ERP5TypeTestCase,\
_getConversionServerDict
from Products.ERP5Type.tests.utils import FileUpload
from Products.ERP5Type.tests.utils import FileUpload, createZODBPythonScript
LANGUAGE_LIST = ('en', 'fr', 'de', 'bg',)
......@@ -568,8 +569,21 @@ class TestERP5WebWithDms(ERP5TypeTestCase, ZopeTestCase.Functional):
def test_PreviewOOoDocumentWithEmbeddedImage(self):
"""Tests html preview of an OOo document with images as extensible content.
For this test, Presentation_checkConversionFormatPermission does not allow
access to original format for Unauthenticated users.
Chack that user can still access to other format.
"""
portal = self.portal
script_id = 'Presentation_checkConversionFormatPermission'
python_code = """from AccessControl import getSecurityManager
user = getSecurityManager().getUser()
if (not user or not user.getId()) and not format:
return False
return True
"""
createZODBPythonScript(portal.portal_skins.custom, script_id,
'format, **kw', python_code)
request = portal.REQUEST
request['PARENTS'] = [self.app]
self.getPortalObject().aq_parent.acl_users._doAddUser(
......@@ -611,7 +625,7 @@ class TestERP5WebWithDms(ERP5TypeTestCase, ZopeTestCase.Functional):
# then publish the document and access it anonymously by reference through
# the web site
document.publish()
transaction.commit()
self.tic()
......@@ -620,7 +634,7 @@ class TestERP5WebWithDms(ERP5TypeTestCase, ZopeTestCase.Functional):
self.assertTrue(response.getHeader('content-type').startswith('text/html'))
html = response.getBody()
self.assertTrue('<img' in html, html)
# find the img src
img_list = etree.HTML(html).findall('.//img')
self.assertEquals(1, len(img_list))
......@@ -633,6 +647,22 @@ class TestERP5WebWithDms(ERP5TypeTestCase, ZopeTestCase.Functional):
png = response.getBody()
self.assertTrue(png.startswith('\x89PNG'))
# Now purge cache and let Anonymous user converting the document.
self.login()
document.edit() # Reset cache key
transaction.commit()
self.tic()
response = self.publish('%s/%s/asEntireHTML' % (
website.absolute_url_path(), document_reference))
self.assertTrue(response.getHeader('content-type').startswith('text/html'))
html = response.getBody()
self.assertTrue('<img' in html, html)
# find the img src
img_list = etree.HTML(html).findall('.//img')
self.assertEquals(1, len(img_list))
src = img_list[0].get('src')
def test_ImageConversionThroughWebSite(self):
"""Check that conversion parameters pass in url
are hounoured to display an image in context of a website
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment