Commit d2772256 authored by Alexandra Rogova's avatar Alexandra Rogova

Milestone 2

parent 17cb4353
......@@ -20,7 +20,6 @@ module.exports = {
var invoked = false,
to_run = args.shift();
console.log(to_run.args[1]);
process = childProcess.fork(to_run.scriptPath, to_run.args);
process.on('message', function (mes){
......
const fs = require('fs');
const util = require('util');
const readdir = util.promisify(fs.readdir);
const Rss_parser = require('rss-parser');
var rss_parser = new Rss_parser();
var xml_parser = require('xml2js').parseString;
var to_run_sitemap,
sitemap_stream;
function run(script, files, stream, callback){
var childProcess = require('child_process');
if (files.length === 0) return;
var invoked = false,
to_run = files.shift();
process = childProcess.fork(script, ['-f', to_run]);
process.on('message', function (mes){
mes = mes.toString();
fs.readFile(to_run, (err, data) => {
if (to_run.includes("rss")){
rss_parser.parseString(data, (err, result) => {
stream.write(to_run +","+ result.items.length +","+ mes + "\n");
});
} else {
xml_parser(data, (err, result) => {
stream.write(to_run +","+ result.urlset.url.length.toString() +","+ mes + "\n");
});
}
});
run(script, files, stream, callback);
});
process.on('error', function (err) {
if (invoked) return;
invoked = true;
callback(err);
});
process.on('exit', function (code) {
callback();
});
}
function run_sitemap(){
run("./tests/test_sitemap.js", to_run_sitemap, sitemap_stream, function (){return;});
}
(async () => {
fs.truncateSync("./results/rss.csv");
fs.truncateSync("./results/sitemap.csv");
var rss_stream = fs.createWriteStream("./results/rss.csv", {flags:'a'});
sitemap_stream = fs.createWriteStream("./results/sitemap.csv", {flags:'a'});
rss_stream.write("file name,items,items in index, title found, content complete\n");
sitemap_stream.write("file name,items,items in index, title found, content complete\n");
var rss_files = await readdir("./files/RSS");
to_run_rss = rss_files.map(file => "./files/RSS/" + file);
var sitemap_files = await readdir("./files/SITEMAP");
to_run_sitemap = sitemap_files.map(file => "./files/SITEMAP/" + file);
run("./tests/test_rss.js", to_run_rss, rss_stream, run_sitemap);
})();
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
file name,items,items in index, title found, content complete
./files/RSS/rss_100.xml,100,100,true,true
file name,items,items in index, title found, content complete
./files/SITEMAP/sitemap_100.xml,100,86,true,true
const puppeteer = require('puppeteer');
const Server = require('ws').Server;
const args = require("yargs")
.usage("Usage : test_rss.js -file file_path")
.demandOption(['file'])
.alias("f", "file")
.describe("file", "path to the RSS File")
.nargs("file", 1)
.argv;
var browser;
function init_server (){
var ws = new Server({port: 9030});
ws.on('connection', function(w){
w.on('message', function(msg){
ws.close();
process.send(msg);
browser.close();
});
w.on('close', function() {
ws.close();
});
});
}
(async () => {
init_server();
browser = await puppeteer.launch();
var page = await browser.newPage();
await page.goto('https://softinst115787.host.vifib.net/public/unit_tests/test_rss.html');
const [fileChooser] = await Promise.all([
page.waitForFileChooser(),
page.click('input#load')
]);
await fileChooser.accept([args.file]);
})();
const puppeteer = require('puppeteer');
const Server = require('ws').Server;
const args = require("yargs")
.usage("Usage : test_sitemap.js -file file_path")
.demandOption(['file'])
.alias("f", "file")
.describe("file", "path to the Sitemap File")
.nargs("file", 1)
.argv;
var browser;
function init_server (){
var ws = new Server({port: 9030});
ws.on('connection', function(w){
w.on('message', function(msg){
ws.close();
process.send(msg);
browser.close();
});
w.on('close', function() {
ws.close();
});
});
}
(async () => {
init_server();
browser = await puppeteer.launch();
var page = await browser.newPage();
await page.goto('https://softinst115787.host.vifib.net/public/unit_tests/test_sitemap.html');
const [fileChooser] = await Promise.all([
page.waitForFileChooser(),
page.click('input#load')
]);
await fileChooser.accept([args.file]);
})();
sudo: true
language: node_js
node_js:
- 8
script: npm run coveralls
Copyright (c) Felix Böhm
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
THIS IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS,
EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
var encode = require("./lib/encode.js"),
decode = require("./lib/decode.js");
exports.decode = function(data, level) {
return (!level || level <= 0 ? decode.XML : decode.HTML)(data);
};
exports.decodeStrict = function(data, level) {
return (!level || level <= 0 ? decode.XML : decode.HTMLStrict)(data);
};
exports.encode = function(data, level) {
return (!level || level <= 0 ? encode.XML : encode.HTML)(data);
};
exports.encodeXML = encode.XML;
exports.encodeHTML4 = exports.encodeHTML5 = exports.encodeHTML = encode.HTML;
exports.decodeXML = exports.decodeXMLStrict = decode.XML;
exports.decodeHTML4 = exports.decodeHTML5 = exports.decodeHTML = decode.HTML;
exports.decodeHTML4Strict = exports.decodeHTML5Strict = exports.decodeHTMLStrict = decode.HTMLStrict;
exports.escape = encode.escape;
var entityMap = require("../maps/entities.json"),
legacyMap = require("../maps/legacy.json"),
xmlMap = require("../maps/xml.json"),
decodeCodePoint = require("./decode_codepoint.js");
var decodeXMLStrict = getStrictDecoder(xmlMap),
decodeHTMLStrict = getStrictDecoder(entityMap);
function getStrictDecoder(map) {
var keys = Object.keys(map).join("|"),
replace = getReplacer(map);
keys += "|#[xX][\\da-fA-F]+|#\\d+";
var re = new RegExp("&(?:" + keys + ");", "g");
return function(str) {
return String(str).replace(re, replace);
};
}
var decodeHTML = (function() {
var legacy = Object.keys(legacyMap).sort(sorter);
var keys = Object.keys(entityMap).sort(sorter);
for (var i = 0, j = 0; i < keys.length; i++) {
if (legacy[j] === keys[i]) {
keys[i] += ";?";
j++;
} else {
keys[i] += ";";
}
}
var re = new RegExp("&(?:" + keys.join("|") + "|#[xX][\\da-fA-F]+;?|#\\d+;?)", "g"),
replace = getReplacer(entityMap);
function replacer(str) {
if (str.substr(-1) !== ";") str += ";";
return replace(str);
}
//TODO consider creating a merged map
return function(str) {
return String(str).replace(re, replacer);
};
})();
function sorter(a, b) {
return a < b ? 1 : -1;
}
function getReplacer(map) {
return function replace(str) {
if (str.charAt(1) === "#") {
if (str.charAt(2) === "X" || str.charAt(2) === "x") {
return decodeCodePoint(parseInt(str.substr(3), 16));
}
return decodeCodePoint(parseInt(str.substr(2), 10));
}
return map[str.slice(1, -1)];
};
}
module.exports = {
XML: decodeXMLStrict,
HTML: decodeHTML,
HTMLStrict: decodeHTMLStrict
};
var decodeMap = require("../maps/decode.json");
module.exports = decodeCodePoint;
// modified version of https://github.com/mathiasbynens/he/blob/master/src/he.js#L94-L119
function decodeCodePoint(codePoint) {
if ((codePoint >= 0xd800 && codePoint <= 0xdfff) || codePoint > 0x10ffff) {
return "\uFFFD";
}
if (codePoint in decodeMap) {
codePoint = decodeMap[codePoint];
}
var output = "";
if (codePoint > 0xffff) {
codePoint -= 0x10000;
output += String.fromCharCode(((codePoint >>> 10) & 0x3ff) | 0xd800);
codePoint = 0xdc00 | (codePoint & 0x3ff);
}
output += String.fromCharCode(codePoint);
return output;
}
var inverseXML = getInverseObj(require("../maps/xml.json")),
xmlReplacer = getInverseReplacer(inverseXML);
exports.XML = getInverse(inverseXML, xmlReplacer);
var inverseHTML = getInverseObj(require("../maps/entities.json")),
htmlReplacer = getInverseReplacer(inverseHTML);
exports.HTML = getInverse(inverseHTML, htmlReplacer);
function getInverseObj(obj) {
return Object.keys(obj)
.sort()
.reduce(function(inverse, name) {
inverse[obj[name]] = "&" + name + ";";
return inverse;
}, {});
}
function getInverseReplacer(inverse) {
var single = [],
multiple = [];
Object.keys(inverse).forEach(function(k) {
if (k.length === 1) {
single.push("\\" + k);
} else {
multiple.push(k);
}
});
//TODO add ranges
multiple.unshift("[" + single.join("") + "]");
return new RegExp(multiple.join("|"), "g");
}
var re_nonASCII = /[^\0-\x7F]/g,
re_astralSymbols = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g;
function singleCharReplacer(c) {
return (
"&#x" +
c
.charCodeAt(0)
.toString(16)
.toUpperCase() +
";"
);
}
function astralReplacer(c) {
// http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
var high = c.charCodeAt(0);
var low = c.charCodeAt(1);
var codePoint = (high - 0xd800) * 0x400 + low - 0xdc00 + 0x10000;
return "&#x" + codePoint.toString(16).toUpperCase() + ";";
}
function getInverse(inverse, re) {
function func(name) {
return inverse[name];
}
return function(data) {
return data
.replace(re, func)
.replace(re_astralSymbols, astralReplacer)
.replace(re_nonASCII, singleCharReplacer);
};
}
var re_xmlChars = getInverseReplacer(inverseXML);
function escapeXML(data) {
return data
.replace(re_xmlChars, singleCharReplacer)
.replace(re_astralSymbols, astralReplacer)
.replace(re_nonASCII, singleCharReplacer);
}
exports.escape = escapeXML;
{"0":65533,"128":8364,"130":8218,"131":402,"132":8222,"133":8230,"134":8224,"135":8225,"136":710,"137":8240,"138":352,"139":8249,"140":338,"142":381,"145":8216,"146":8217,"147":8220,"148":8221,"149":8226,"150":8211,"151":8212,"152":732,"153":8482,"154":353,"155":8250,"156":339,"158":382,"159":376}
\ No newline at end of file
This diff is collapsed.
{"Aacute":"\u00C1","aacute":"\u00E1","Acirc":"\u00C2","acirc":"\u00E2","acute":"\u00B4","AElig":"\u00C6","aelig":"\u00E6","Agrave":"\u00C0","agrave":"\u00E0","amp":"&","AMP":"&","Aring":"\u00C5","aring":"\u00E5","Atilde":"\u00C3","atilde":"\u00E3","Auml":"\u00C4","auml":"\u00E4","brvbar":"\u00A6","Ccedil":"\u00C7","ccedil":"\u00E7","cedil":"\u00B8","cent":"\u00A2","copy":"\u00A9","COPY":"\u00A9","curren":"\u00A4","deg":"\u00B0","divide":"\u00F7","Eacute":"\u00C9","eacute":"\u00E9","Ecirc":"\u00CA","ecirc":"\u00EA","Egrave":"\u00C8","egrave":"\u00E8","ETH":"\u00D0","eth":"\u00F0","Euml":"\u00CB","euml":"\u00EB","frac12":"\u00BD","frac14":"\u00BC","frac34":"\u00BE","gt":">","GT":">","Iacute":"\u00CD","iacute":"\u00ED","Icirc":"\u00CE","icirc":"\u00EE","iexcl":"\u00A1","Igrave":"\u00CC","igrave":"\u00EC","iquest":"\u00BF","Iuml":"\u00CF","iuml":"\u00EF","laquo":"\u00AB","lt":"<","LT":"<","macr":"\u00AF","micro":"\u00B5","middot":"\u00B7","nbsp":"\u00A0","not":"\u00AC","Ntilde":"\u00D1","ntilde":"\u00F1","Oacute":"\u00D3","oacute":"\u00F3","Ocirc":"\u00D4","ocirc":"\u00F4","Ograve":"\u00D2","ograve":"\u00F2","ordf":"\u00AA","ordm":"\u00BA","Oslash":"\u00D8","oslash":"\u00F8","Otilde":"\u00D5","otilde":"\u00F5","Ouml":"\u00D6","ouml":"\u00F6","para":"\u00B6","plusmn":"\u00B1","pound":"\u00A3","quot":"\"","QUOT":"\"","raquo":"\u00BB","reg":"\u00AE","REG":"\u00AE","sect":"\u00A7","shy":"\u00AD","sup1":"\u00B9","sup2":"\u00B2","sup3":"\u00B3","szlig":"\u00DF","THORN":"\u00DE","thorn":"\u00FE","times":"\u00D7","Uacute":"\u00DA","uacute":"\u00FA","Ucirc":"\u00DB","ucirc":"\u00FB","Ugrave":"\u00D9","ugrave":"\u00F9","uml":"\u00A8","Uuml":"\u00DC","uuml":"\u00FC","Yacute":"\u00DD","yacute":"\u00FD","yen":"\u00A5","yuml":"\u00FF"}
\ No newline at end of file
{"amp":"&","apos":"'","gt":">","lt":"<","quot":"\""}
{
"_from": "entities@^1.1.1",
"_id": "entities@1.1.2",
"_inBundle": false,
"_integrity": "sha512-f2LZMYl1Fzu7YSBKg+RoROelpOaNrcGmE9AZubeDfrCEia483oW4MI4VyFd5VNHIgQ/7qm1I0wUHK1eJnn2y2w==",
"_location": "/entities",
"_phantomChildren": {},
"_requested": {
"type": "range",
"registry": true,
"raw": "entities@^1.1.1",
"name": "entities",
"escapedName": "entities",
"rawSpec": "^1.1.1",
"saveSpec": null,
"fetchSpec": "^1.1.1"
},
"_requiredBy": [
"/rss-parser"
],
"_resolved": "https://registry.npmjs.org/entities/-/entities-1.1.2.tgz",
"_shasum": "bdfa735299664dfafd34529ed4f8522a275fea56",
"_spec": "entities@^1.1.1",
"_where": "C:\\Users\\thequ\\Documents\\Mynij\\Mynij-unit-tests\\node_modules\\rss-parser",
"author": {
"name": "Felix Boehm",
"email": "me@feedic.com"
},
"bugs": {
"url": "https://github.com/fb55/entities/issues"
},
"bundleDependencies": false,
"deprecated": false,
"description": "Encode & decode XML/HTML entities with ease",
"devDependencies": {
"coveralls": "*",
"istanbul": "*",
"jshint": "2",
"mocha": "^5.0.1",
"mocha-lcov-reporter": "*"
},
"directories": {
"test": "test"
},
"homepage": "https://github.com/fb55/entities#readme",
"jshintConfig": {
"eqeqeq": true,
"freeze": true,
"latedef": "nofunc",
"noarg": true,
"nonbsp": true,
"quotmark": "double",
"undef": true,
"unused": true,
"trailing": true,
"eqnull": true,
"proto": true,
"smarttabs": true,
"node": true,
"globals": {
"describe": true,
"it": true
}
},
"keywords": [
"html",
"xml",
"entity",
"decoding",
"encoding"
],
"license": "BSD-2-Clause",
"main": "./index.js",
"name": "entities",
"prettier": {
"tabWidth": 4
},
"repository": {
"type": "git",
"url": "git://github.com/fb55/entities.git"
},
"scripts": {
"coveralls": "npm run lint && npm run lcov && (cat coverage/lcov.info | coveralls || exit 0)",
"lcov": "istanbul cover _mocha --report lcovonly -- -R spec",
"lint": "jshint index.js lib/*.js test/*.js",
"test": "mocha && npm run lint"
},
"version": "1.1.2"
}
# entities [![NPM version](http://img.shields.io/npm/v/entities.svg)](https://npmjs.org/package/entities) [![Downloads](https://img.shields.io/npm/dm/entities.svg)](https://npmjs.org/package/entities) [![Build Status](http://img.shields.io/travis/fb55/entities.svg)](http://travis-ci.org/fb55/entities) [![Coverage](http://img.shields.io/coveralls/fb55/entities.svg)](https://coveralls.io/r/fb55/entities)
En- & decoder for XML/HTML entities.
## How to…
### …install `entities`
npm i entities
### …use `entities`
```javascript
var entities = require("entities");
//encoding
entities.encodeXML("&#38;"); // "&amp;#38;"
entities.encodeHTML("&#38;"); // "&amp;&num;38&semi;"
//decoding
entities.decodeXML("asdf &amp; &#xFF; &#xFC; &apos;"); // "asdf & ÿ ü '"
entities.decodeHTML("asdf &amp; &yuml; &uuml; &apos;"); // "asdf & ÿ ü '"
```
<!-- TODO extend API -->
---
License: BSD-2-Clause
--check-leaks
--reporter spec
var assert = require("assert"),
path = require("path"),
entities = require("../");
describe("Encode->decode test", function() {
var testcases = [
{
input: "asdf & ÿ ü '",
xml: "asdf &amp; &#xFF; &#xFC; &apos;",
html: "asdf &amp; &yuml; &uuml; &apos;"
},
{
input: "&#38;",
xml: "&amp;#38;",
html: "&amp;&num;38&semi;"
}
];
testcases.forEach(function(tc) {
var encodedXML = entities.encodeXML(tc.input);
it("should XML encode " + tc.input, function() {
assert.equal(encodedXML, tc.xml);
});
it("should default to XML encode " + tc.input, function() {
assert.equal(entities.encode(tc.input), tc.xml);
});
it("should XML decode " + encodedXML, function() {
assert.equal(entities.decodeXML(encodedXML), tc.input);
});
it("should default to XML encode " + encodedXML, function() {
assert.equal(entities.decode(encodedXML), tc.input);
});
it("should default strict to XML encode " + encodedXML, function() {
assert.equal(entities.decodeStrict(encodedXML), tc.input);
});
var encodedHTML5 = entities.encodeHTML5(tc.input);
it("should HTML5 encode " + tc.input, function() {
assert.equal(encodedHTML5, tc.html);
});
it("should HTML5 decode " + encodedHTML5, function() {
assert.equal(entities.decodeHTML(encodedHTML5), tc.input);
});
});
it("should encode data URIs (issue 16)", function() {
var data = "";
assert.equal(entities.decode(entities.encode(data)), data);
});
});
describe("Decode test", function() {
var testcases = [
{ input: "&amp;amp;", output: "&amp;" },
{ input: "&amp;#38;", output: "&#38;" },
{ input: "&amp;#x26;", output: "&#x26;" },
{ input: "&amp;#X26;", output: "&#X26;" },
{ input: "&#38;#38;", output: "&#38;" },
{ input: "&#x26;#38;", output: "&#38;" },
{ input: "&#X26;#38;", output: "&#38;" },
{ input: "&#x3a;", output: ":" },
{ input: "&#x3A;", output: ":" },
{ input: "&#X3a;", output: ":" },
{ input: "&#X3A;", output: ":" }
];
testcases.forEach(function(tc) {
it("should XML decode " + tc.input, function() {
assert.equal(entities.decodeXML(tc.input), tc.output);
});
it("should HTML4 decode " + tc.input, function() {
assert.equal(entities.decodeHTML(tc.input), tc.output);
});
it("should HTML5 decode " + tc.input, function() {
assert.equal(entities.decodeHTML(tc.input), tc.output);
});
});
});
var levels = ["xml", "entities"];
describe("Documents", function() {
levels
.map(function(n) {
return path.join("..", "maps", n);
})
.map(require)
.forEach(function(doc, i) {
describe("Decode", function() {
it(levels[i], function() {
Object.keys(doc).forEach(function(e) {
for (var l = i; l < levels.length; l++) {
assert.equal(entities.decode("&" + e + ";", l), doc[e]);