From d963b0a51dc929281c3350e8d03305ae3aa3a393 Mon Sep 17 00:00:00 2001 From: Xiaowu Zhang <xiaowu.zhang@nexedi.com> Date: Wed, 28 Mar 2018 17:37:29 +0200 Subject: [PATCH] ParserStorage: add Atom parser /reviewed-on https://lab.nexedi.com/nexedi/jio/merge_requests/83 --- src/jio.storage/parserstorage.js | 76 ++++++- test/jio.storage/parserstorage.tests.js | 275 +++++++++++++++++++++++- 2 files changed, 348 insertions(+), 3 deletions(-) diff --git a/src/jio.storage/parserstorage.js b/src/jio.storage/parserstorage.js index f46d771..74065e2 100644 --- a/src/jio.storage/parserstorage.js +++ b/src/jio.storage/parserstorage.js @@ -79,6 +79,77 @@ } return result_list; }; + ///////////////////////////////////////////////////////////// + // ATOM Parser + ///////////////////////////////////////////////////////////// + function ATOMParser(txt) { + this._dom_parser = new DOMParser().parseFromString(txt, 'text/xml'); + } + ATOMParser.prototype.parseElement = function (element) { + var tag_element, + i, + j, + tag_name, + attribute, + result = {}; + + for (i = element.childNodes.length - 1; i >= 0; i -= 1) { + tag_element = element.childNodes[i]; + if ((tag_element.nodeType === Node.ELEMENT_NODE) && + (tag_element.tagName !== 'entry')) { + tag_name = tag_element.tagName; + // may have several links, with different rel value + // default is alternate + if (tag_name === 'link') { + tag_name += '_' + (tag_element.getAttribute('rel') || 'alternate'); + } else { + result[tag_name] = tag_element.textContent; + } + for (j = tag_element.attributes.length - 1; j >= 0; j -= 1) { + attribute = tag_element.attributes[j]; + if (attribute.value) { + result[tag_name + '_' + attribute.name] = + attribute.value; + } + } + + } + } + return result; + }; + ATOMParser.prototype.getDocumentList = function (include, id) { + var result_list, + item_list = this._dom_parser.querySelectorAll("feed > entry"), + i; + + if ((id === '/0') || (id === undefined)) { + result_list = [{ + id: '/0', + value: {} + }]; + if (include) { + result_list[0].doc = this.parseElement( + this._dom_parser.querySelector("feed") + ); + } + } else { + result_list = []; + } + + for (i = 0; i < item_list.length; i += 1) { + if ((id === '/0/' + i) || (id === undefined)) { + result_list.push({ + id: '/0/' + i, + value: {} + }); + if (include) { + result_list[result_list.length - 1].doc = + this.parseElement(item_list[i]); + } + } + } + return result_list; + }; ///////////////////////////////////////////////////////////// // RSS Parser @@ -151,7 +222,8 @@ ///////////////////////////////////////////////////////////// var parser_dict = { 'rss': RSSParser, - 'opml': OPMLParser + 'opml': OPMLParser, + 'atom': ATOMParser }; function getParser(storage) { @@ -203,4 +275,4 @@ jIO.addStorage('parser', ParserStorage); -}(jIO, DOMParser, Node)); \ No newline at end of file +}(jIO, DOMParser, Node)); diff --git a/test/jio.storage/parserstorage.tests.js b/test/jio.storage/parserstorage.tests.js index 088bd9e..cbfe7e1 100644 --- a/test/jio.storage/parserstorage.tests.js +++ b/test/jio.storage/parserstorage.tests.js @@ -51,7 +51,57 @@ }; jIO.addStorage('rssstorage200', RSSStorage200); + ///////////////////////////////////////////////////////////////// + // Custom atom test substorage definition + ///////////////////////////////////////////////////////////////// + function ATOMStorage200() { + return this; + } + ATOMStorage200.prototype.getAttachment = function (id, name) { + equal(id, 'foo'); + equal(name, 'bar'); + var txt = '<?xml version="1.0" encoding="utf-8"?>' + + '<feed xmlns="http://www.w3.org/2005/Atom">' + + '<title>Example Feed</title>' + + '<subtitle>A subtitle.</subtitle>' + + '<link href="http://example.org/feed/" rel="self" />' + + '<link href="http://example.org/" />' + + '<id>urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6</id>' + + '<updated>2003-12-13T18:30:02Z</updated>' + + '<entry>' + + '<id>tag:example.org,2018-03-29:81102</id>' + + '<title>Atom-Powered Robots Run Amok</title>' + + '<link href="/2003/12/13/atom03" />' + + '<link rel="alternate" type="text/html" ' + + 'href="http://example.org/2003/12/13/atom03.html"/>' + + '<link rel="edit" href="http://example.org/2003/12/13/atom03/edit"/>' + + '<author>' + + '<name>John Doe</name>' + + '<email>johndoe@example.com</email>' + + '</author>' + + '<updated>2003-12-13T18:30:02Z</updated>' + + '<summary>Some text.</summary>' + + '</entry>' + + '<entry>' + + '<title>Atom-Powered Robots Run Amok</title>' + + '<link href="http://example.org/2003/12/13/atom03" />' + + '<link rel="alternate" type="text/html" ' + + 'href="http://example.org/2003/12/13/atom03.html"/>' + + '<link rel="edit" href="http://example.org/2003/12/13/atom03/edit"/>' + + '<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>' + + '<author>' + + '<name>John Doe</name>' + + '<email>johndoe@example.com</email>' + + '</author>' + + '<updated>2004-12-13T18:30:02Z</updated>' + + '<summary>Some text.</summary>' + + '</entry>' + + '</feed>'; + return new Blob([txt]); + }; + + jIO.addStorage('atomstorage200', ATOMStorage200); ///////////////////////////////////////////////////////////////// // Custom OPML test substorage definition ///////////////////////////////////////////////////////////////// @@ -244,6 +294,131 @@ }); }); + test("get all IDs from ATOM", function () { + this.jio = jIO.createJIO({ + type: 'parser', + document_id: 'foo', + attachment_id: 'bar', + parser: 'atom', + sub_storage: { + type: 'atomstorage200' + } + }); + + var expected_dict = { + "data": { + "rows": [ + { + "id": "/0", + "value": {} + }, + { + "id": "/0/0", + "value": {} + }, + { + "id": "/0/1", + "value": {} + } + ], + "total_rows": 3 + } + }; + + stop(); + expect(3); + + this.jio.allDocs() + .then(function (result) { + deepEqual(result, expected_dict, "Check documents"); + }) + .fail(function (error) { + ok(false, error); + }) + .always(function () { + start(); + }); + }); + + test("get all documents from ATOM", function () { + this.jio = jIO.createJIO({ + type: 'parser', + document_id: 'foo', + attachment_id: 'bar', + parser: 'atom', + sub_storage: { + type: 'atomstorage200' + } + }); + + var expected_dict = { + "data": { + "rows": [ + { + "doc": { + "id": "urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6", + "link_alternate_href": "http://example.org/", + "link_self_href": "http://example.org/feed/", + "link_self_rel": "self", + "subtitle": "A subtitle.", + "title": "Example Feed", + "updated": "2003-12-13T18:30:02Z" + }, + "id": "/0", + "value": {} + }, + { + "doc": { + "author": "John Doejohndoe@example.com", + "id": "tag:example.org,2018-03-29:81102", + "link_alternate_href": "/2003/12/13/atom03", + "link_alternate_rel": "alternate", + "link_alternate_type": "text/html", + "link_edit_href": "http://example.org/2003/12/13/atom03/edit", + "link_edit_rel": "edit", + "summary": "Some text.", + "title": "Atom-Powered Robots Run Amok", + "updated": "2003-12-13T18:30:02Z" + }, + "id": "/0/0", + "value": {} + }, + { + "doc": { + "author": "John Doejohndoe@example.com", + "id": "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a", + "link_alternate_href": "http://example.org/2003/12/13/atom03", + "link_alternate_rel": "alternate", + "link_alternate_type": "text/html", + "link_edit_href": "http://example.org/2003/12/13/atom03/edit", + "link_edit_rel": "edit", + "summary": "Some text.", + "title": "Atom-Powered Robots Run Amok", + "updated": "2004-12-13T18:30:02Z" + }, + "id": "/0/1", + "value": {} + } + ], + "total_rows": 3 + } + }; + + stop(); + expect(3); + + this.jio.allDocs({include_docs: true}) + .then(function (result) { + deepEqual(result, expected_dict, "Check documents"); + }) + .fail(function (error) { + ok(false, error); + }) + .always(function () { + start(); + }); + }); + test("get all IDs from OPML", function () { this.jio = jIO.createJIO({ type: 'parser', @@ -542,6 +717,104 @@ start(); }); }); + test("get Atom feed", function () { + this.jio = jIO.createJIO({ + type: 'parser', + document_id: 'foo', + attachment_id: 'bar', + parser: 'atom', + sub_storage: { + type: 'atomstorage200' + } + }); + + stop(); + expect(3); + + this.jio.get('/0') + .then(function (result) { + deepEqual(result, { + "id": "urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6", + "link_alternate_href": "http://example.org/", + "link_self_href": "http://example.org/feed/", + "link_self_rel": "self", + "subtitle": "A subtitle.", + "title": "Example Feed", + "updated": "2003-12-13T18:30:02Z" + }, "Check document"); + }) + .fail(function (error) { + ok(false, error); + }) + .always(function () { + start(); + }); + }); + + test("get ATOM entry", function () { + this.jio = jIO.createJIO({ + type: 'parser', + document_id: 'foo', + attachment_id: 'bar', + parser: 'atom', + sub_storage: { + type: 'atomstorage200' + } + }); + + stop(); + expect(3); + + this.jio.get('/0/1') + .then(function (result) { + deepEqual(result, { + "author": "John Doejohndoe@example.com", + "id": "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a", + "link_alternate_href": "http://example.org/2003/12/13/atom03", + "link_alternate_rel": "alternate", + "link_alternate_type": "text/html", + "link_edit_href": "http://example.org/2003/12/13/atom03/edit", + "link_edit_rel": "edit", + "summary": "Some text.", + "title": "Atom-Powered Robots Run Amok", + "updated": "2004-12-13T18:30:02Z" + }, "Check document"); + }) + .fail(function (error) { + ok(false, error); + }) + .always(function () { + start(); + }); + }); + + test("get unknown atom entry", function () { + this.jio = jIO.createJIO({ + type: 'parser', + document_id: 'foo', + attachment_id: 'bar', + parser: 'atom', + sub_storage: { + type: 'atomstorage200' + } + }); + + stop(); + expect(5); + + this.jio.get('foo') + .then(function (result) { + ok(false, result); + }) + .fail(function (error) { + ok(error instanceof jIO.util.jIOError, error); + equal(error.message, "Cannot find parsed document: foo"); + equal(error.status_code, 404); + }) + .always(function () { + start(); + }); + }); test("get OPML head", function () { this.jio = jIO.createJIO({ @@ -635,4 +908,4 @@ }); }); -}(jIO, QUnit, Blob)); \ No newline at end of file +}(jIO, QUnit, Blob)); -- 2.30.9