Commit 8b7e104d authored by Julien Muchembled's avatar Julien Muchembled

fixup! erp5_web_shadir: index shadir entries in catalog

Unit test written by Rafael Monnerat.

See commit 3d7d6d5e.
parent 2f7689d1
...@@ -6,13 +6,13 @@ WHERE ...@@ -6,13 +6,13 @@ WHERE
<dtml-var sql_delimiter> <dtml-var sql_delimiter>
<dtml-let <dtml-let
a2b_hex="__import__('binascii').a2b_hex"
loads="__import__('json').loads" loads="__import__('json').loads"
tostr="lambda x: x.encode('utf-8') if type(x) is unicode else x"
row_list="[]"> row_list="[]">
<dtml-in prefix="loop" expr="_.range(_.len(uid))"> <dtml-in prefix="loop" expr="_.range(_.len(uid))">
<dtml-if "getFollowUp[loop_item]"> <dtml-if "getFollowUp[loop_item] and getContentType[loop_item] == 'application/json'">
<dtml-let data="loads(loads(getData[loop_item])[0])"> <dtml-let data="loads(loads(getData[loop_item])[0])">
<dtml-call expr="row_list.append((uid[loop_item], a2b_hex(data['sha512']), data.get('url')))"> <dtml-call expr="row_list.append((uid[loop_item], data['sha512'], getFilename[loop_item] or None, ';'.join(['%s=%r' % (x, tostr(data[x])) for x in ('file', 'requirement', 'revision', 'software_url', 'os', 'url') if data.get(x)]) or None))">
</dtml-let> </dtml-let>
</dtml-if> </dtml-if>
</dtml-in> </dtml-in>
...@@ -23,8 +23,9 @@ VALUES ...@@ -23,8 +23,9 @@ VALUES
<dtml-in row_list prefix="row"> <dtml-in row_list prefix="row">
( (
<dtml-sqlvar expr="row_item[0]" type="int">, <dtml-sqlvar expr="row_item[0]" type="int">,
<dtml-sqlvar expr="row_item[1]" type="string">, x'<dtml-var expr="row_item[1]">',
<dtml-sqlvar expr="row_item[2]" type="string"> <dtml-sqlvar expr="row_item[2]" type="string" optional>,
<dtml-sqlvar expr="row_item[3]" type="string" optional>
) )
<dtml-if sequence-end><dtml-else>,</dtml-if> <dtml-if sequence-end><dtml-else>,</dtml-if>
</dtml-in> </dtml-in>
......
...@@ -13,8 +13,10 @@ ...@@ -13,8 +13,10 @@
<item> <item>
<key> <string>arguments_src</string> </key> <key> <string>arguments_src</string> </key>
<value> <string>uid\n <value> <string>uid\n
getFollowUp\n getContentType\n
getData</string> </value> getData\n
getFilename\n
getFollowUp</string> </value>
</item> </item>
<item> <item>
<key> <string>cache_time_</string> </key> <key> <string>cache_time_</string> </key>
......
CREATE TABLE `shadir` ( CREATE TABLE `shadir` (
`uid` BIGINT UNSIGNED PRIMARY KEY, `uid` BIGINT UNSIGNED PRIMARY KEY,
`sha512` BINARY(64) NOT NULL, `sha512` BINARY(64) NOT NULL,
`url` TEXT, `filename` TINYTEXT,
`summary` TEXT,
KEY(`sha512`) KEY(`sha512`)
) ENGINE=InnoDB; ) ENGINE=InnoDB;
...@@ -26,10 +26,12 @@ ...@@ -26,10 +26,12 @@
############################################################################## ##############################################################################
import hashlib import hashlib
import json
from base64 import b64decode from base64 import b64decode
from binascii import a2b_hex from binascii import a2b_hex
from collections import defaultdict
from json import dumps, loads
from zExceptions import BadRequest from zExceptions import BadRequest
from DateTime import DateTime
from Products.ERP5Type.UnrestrictedMethod import super_user from Products.ERP5Type.UnrestrictedMethod import super_user
...@@ -59,13 +61,13 @@ def WebSection_getDocumentValue(self, key, portal=None, language=None,\ ...@@ -59,13 +61,13 @@ def WebSection_getDocumentValue(self, key, portal=None, language=None,\
# Return the SIGNATURE file, if the document exists. # Return the SIGNATURE file, if the document exists.
if data_set is not None: if data_set is not None:
document_list = [json.loads(document.getData()) \ document_list = [loads(document.getData())
for document in portal.portal_catalog( for document in portal.portal_catalog(
follow_up_uid=data_set.getUid(), follow_up_uid=data_set.getUid(),
validation_state='published')] validation_state='published')]
temp_file = self.newContent(temp_object=True, portal_type='File', id='%s.txt' % key) temp_file = self.newContent(temp_object=True, portal_type='File', id='%s.txt' % key)
temp_file.setData(json.dumps(document_list)) temp_file.setData(dumps(document_list))
temp_file.setContentType('application/json') temp_file.setContentType('application/json')
return temp_file.getObject() return temp_file.getObject()
...@@ -78,8 +80,8 @@ def WebSection_setObject(self, id, ob, **kw): ...@@ -78,8 +80,8 @@ def WebSection_setObject(self, id, ob, **kw):
portal = self.getPortalObject() portal = self.getPortalObject()
data = self.REQUEST.get('BODY') data = self.REQUEST.get('BODY')
try: try:
metadata, signature = json.loads(data) metadata, signature = loads(data)
metadata = json.loads(metadata) metadata = loads(metadata)
# a few basic checks # a few basic checks
b64decode(signature) b64decode(signature)
if len(a2b_hex(metadata['sha512'])) != 64: if len(a2b_hex(metadata['sha512'])) != 64:
...@@ -125,3 +127,100 @@ def WebSection_putFactory(self, name, typ, body): ...@@ -125,3 +127,100 @@ def WebSection_putFactory(self, name, typ, body):
filename=name, filename=name,
discover_metadata=False) discover_metadata=False)
return document return document
# The following scripts are helpers to search & clean up shadir entries.
# XXX: Due to lack of View skin for shadir, external methods are currently
# created manually in custom skin after installation, if needed.
def _delete(portal, data_set_id_list, document_id_list):
r = "%s\n%s %r\n%s %r" % (DateTime(),
len(data_set_id_list), data_set_id_list,
len(document_id_list), document_id_list)
if document_id_list:
portal.document_module.manage_delObjects(document_id_list)
if data_set_id_list:
portal.data_set_module.manage_delObjects(data_set_id_list)
return r
def _deletableDataSetList(data_set_dict):
return [data_set.getId()
for data_set, document_set in data_set_dict.iteritems()
if document_set.issuperset(data_set.getFollowUpRelatedList())]
def ERP5Site_deleteOrphanShadir(self):
assert self.getPortalType() == "ERP5 Site", self
data_set_dict = defaultdict(set)
document_id_list = []
query = self.erp5_sql_connection().query
for relative_url, in query("select catalog.relative_url"
" from catalog join shadir using (uid)"
" left join catalog as t on (lower(hex(sha512))=t.reference)"
" where t.uid is null", 0)[1]:
document = self.unrestrictedTraverse(relative_url)
data_set = document.getFollowUpValue(portal_type='Data Set')
if data_set is not None:
data_set_dict[data_set].add(relative_url)
document_id_list.append(document.getId())
data_set_id_list = _deletableDataSetList(data_set_dict)
x = zip(*query("select catalog.id from catalog"
" join category on (base_category_uid=%s and category_uid=catalog.uid)"
" left join catalog as t on (catalog.uid=t.uid)"
" where catalog.parent_uid=%s and t.uid is null" % (
self.portal_categories.follow_up.getUid(),
self.data_set_module.getUid(),
), 0)[1])
if x:
data_set_id_list += x[0]
return _delete(self, data_set_id_list, document_id_list)
def _deleteDocumentList(portal, document_list):
data_set_dict = defaultdict(set)
document_id_list = []
sha512_set = set()
for document in document_list:
sha512_set.add(loads(loads(document.getData())[0])[u"sha512"])
data_set = document.getFollowUpValue(portal_type='Data Set')
if data_set is not None:
data_set_dict[data_set].add(document.getRelativeUrl())
document_id_list.append(document.getId())
sha512_set.difference_update(sha512
for relative_url, sha512 in portal.erp5_sql_connection().query(
"select relative_url, lower(hex(sha512))"
" from catalog join shadir using (uid) where sha512 in (%s)"
% ','.join(map("x'%s'".__mod__, sha512_set)), 0)[1]
if not (relative_url.startswith("document_module/") and
relative_url[16:] in document_id_list))
if sha512_set:
for document in portal.document_module.searchFolder(reference=sha512_set):
document_id_list.append(document.getId())
return _delete(portal, _deletableDataSetList(data_set_dict), document_id_list)
def ShaDir_delete(self):
portal_type = self.getPortalType()
if portal_type == 'Data Set':
document_list = self.getFollowUpRelatedValueList(portal_type='File')
else:
assert portal_type == 'File', self
document_list = self,
return _deleteDocumentList(self.getPortalObject(), document_list)
def ShaDir_search(self, filename, summary, delete=False):
assert self.getPortalType() == "ERP5 Site", self
document_list = []
x = defaultdict(list)
for document in self.portal_catalog.unrestrictedSearchResults(
filename=filename, summary=summary):
document = document.getObject()
document_list.append(document)
metadata = loads(loads(document.getData())[0])
del metadata[u"sha512"]
x[';'.join('%s=%r' % (k, v.encode('utf-8') if type(v) is unicode else v)
for k, v in sorted(metadata.iteritems()))].append(
document.getId())
r = '\n'.join('%s %s' % (k, sorted(v)) for k, v in sorted(x.iteritems()))
if delete:
r += '\n' + _deleteDocumentList(self, document_list)
return r
...@@ -256,3 +256,20 @@ class TestShaDir(ShaDirMixin, ERP5TypeTestCase): ...@@ -256,3 +256,20 @@ class TestShaDir(ShaDirMixin, ERP5TypeTestCase):
self.assertEqual(obj, vf1._getOb(obj_id)) self.assertEqual(obj, vf1._getOb(obj_id))
self.assertIsNone(vf2._getOb(obj_id, default=None)) self.assertIsNone(vf2._getOb(obj_id, default=None))
def test_post_information_with_other_documents(self):
"""
Check if code works when there are unrelated
shacache documents.
"""
person = self.portal.person_module.newContent(portal_type="Person")
doc = self.portal.document_module.newContent(
portal_type="File",
reference="F-TESTSHADIR",
version="001",
language="en",
follow_up_value=person,
data="FILEDATA")
doc.publish()
self.tic()
self.test_post_information_more_than_once()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment