Commit e952a6db authored by fxa's avatar fxa

fixed a pct-error for multibyte utf8 chars

parent 378dcb63
......@@ -24,7 +24,9 @@
"bold_suffix": "",
"assertion_prefix": "",
"assertion_suffix": ""
};
},
NODEUNIT_REPORTER = 'default';
// NODEUNIT_REPORTER = 'minimal';
var
SRC_HOME = 'src',
......@@ -58,7 +60,7 @@
ASYNC = {async: true};
var all = new jake.FileList();
all.include('./*');
all.include('./Jakefile.js', 'own-testcases.json');
all.include('src/**');
all.include('test/**');
all.exclude(TARGET_COMPRESSED);
......@@ -91,7 +93,7 @@
},
function (callback) {
jake.logger.log('unit testing ...');
nodeunit.reporters['default'].run(UNIT_TESTS, NODEUNIT_OPTIONS, callback);
nodeunit.reporters[NODEUNIT_REPORTER].run(UNIT_TESTS, NODEUNIT_OPTIONS, callback);
},
function (callback) {
jake.logger.log('build concatenated version ...');
......@@ -107,7 +109,7 @@
},
function (callback) {
jake.logger.log('integration tests ...');
nodeunit.reporters['default'].run(INTEGRATION_TESTS, NODEUNIT_OPTIONS, callback);
nodeunit.reporters[NODEUNIT_REPORTER].run(INTEGRATION_TESTS, NODEUNIT_OPTIONS, callback);
},
function (callback) {
jake.logger.log('move uncompressed version to target directory');
......@@ -126,7 +128,7 @@
},
function (callback) {
jake.logger.log('integration tests with minified version ... ');
nodeunit.reporters['default'].run(INTEGRATION_TESTS, NODEUNIT_OPTIONS, callback);
nodeunit.reporters[NODEUNIT_REPORTER].run(INTEGRATION_TESTS, NODEUNIT_OPTIONS, callback);
},
function (callback) {
jake.logger.log('move compressed version to target ... ');
......@@ -135,8 +137,15 @@
], closeTask);
}, ASYNC);
desc('release');
task('release', [TARGET_COMPRESSED], function () {
// for short test only
desc('unit tests');
task('unit', [], function () {
// here we want the default reporter and not the configured one
nodeunit.reporters['default'].run(UNIT_TESTS, NODEUNIT_OPTIONS, complete);
}, ASYNC);
desc('build');
task('build', [TARGET_COMPRESSED], function () {
jake.logger.log('done.');
});
task('default', ['clean', 'release']);
......
......@@ -17,7 +17,7 @@ But you can also use it with node:
**npm install uritemplate**
and then:
and then in a node application:
var
UriTemplate = require('uritemplate'),
......@@ -26,6 +26,15 @@ and then:
template.expand({query: {first: 1, second: 2}});
--> "?first=1&second=2"
or within a html document (see also demo.html):
<script type="text/javascript" src="bin/uritemplate.js"></script>
<script type="text/javascript">
var template = UriTemplate.parse('{?query*}');
alert(template.expand({query: {first: 1, second: 2}}));
</script>
If you want to clone the git project, be aware of the submodule uritemplate-test.
So you have to to:
......@@ -35,7 +44,7 @@ So you have to to:
Build
-----
jake clean release
jake clean build
Tests
-----
......@@ -51,8 +60,9 @@ MIT License, see http://mit-license.org/
Release Notes
-------------
0.2.0 heavy project refactoring, splitting source files, introducing jshint (preparation of next steps)
0.2.1 fixed a bug in package.json
* 0.2.2 fixed pct encoding bug with multibyte utf8 chars
* 0.2.1 fixed a bug in package.json
* 0.2.0 heavy project refactoring, splitting source files, introducing jshint (preparation of next steps)
Next Steps
----------
......
This diff is collapsed.
......@@ -73,15 +73,41 @@ var charHelper = (function () {
var pctEncoder = (function () {
// see http://ecmanaut.blogspot.de/2006/07/encoding-decoding-utf8-in-javascript.html
function toUtf8(s) {
return unescape(encodeURIComponent(s));
}
var utf8 = {
encode: function (chr) {
// see http://ecmanaut.blogspot.de/2006/07/encoding-decoding-utf8-in-javascript.html
return unescape(encodeURIComponent(chr));
},
numBytes: function (firstCharCode) {
if (firstCharCode <= 0x7F) {
return 1;
}
else if (0xC2 <= firstCharCode && firstCharCode <= 0xDF) {
return 2;
}
else if (0xE0 <= firstCharCode && firstCharCode <= 0xEF) {
return 3;
}
else if (0xF0 <= firstCharCode && firstCharCode <= 0xF4) {
return 4;
}
// no valid first octet
return 0;
},
isValidFollowingCharCode: function (charCode) {
return 0x80 <= charCode && charCode <= 0xBF;
}
};
function encode(chr) {
/**
* encodes a character, if needed or not.
* @param chr
* @return pct-encoded character
*/
function encodeCharacter (chr) {
var
result = '',
octets = toUtf8(chr),
octets = utf8.encode(chr),
octet,
index;
for (index = 0; index < octets.length; index += 1) {
......@@ -91,33 +117,62 @@ var pctEncoder = (function () {
return result;
}
function isPctEncoded(chr) {
if (chr.length < 3) {
function isPercentDigitDigit (text, start) {
return text[start] === '%' && charHelper.isHexDigit(text[start + 1]) && charHelper.isHexDigit(text[start + 2]);
}
function parseHex2(text, start) {
return parseInt(text.substr(start, 2), 16);
}
/**
* Returns wether or not the given char sequence is a correctly pct-encoded sequence.
* @param chr
* @return {boolean}
*/
function isPctEncoded (chr) {
if (!isPercentDigitDigit(chr, 0)) {
return false;
}
var firstCharCode = parseHex2(chr, 1);
var numBytes = utf8.numBytes(firstCharCode);
if (numBytes === 0) {
return false;
}
for (var index = 0; index < chr.length; index += 3) {
if (chr.charAt(index) !== '%' || !charHelper.isHexDigit(chr.charAt(index + 1) || !charHelper.isHexDigit(chr.charAt(index + 2)))) {
for (var byteNumber = 1; byteNumber < numBytes; byteNumber += 1) {
if (!isPercentDigitDigit(chr, 3*byteNumber) || !utf8.isValidFollowingCharCode(parseHex2(chr, 3*byteNumber + 1))) {
return false;
}
}
return true;
}
/**
* Reads as much as needed from the text, e.g. '%20' or '%C3%B6'. It does not decode!
* @param text
* @param startIndex
* @return the character or pct-string of the text at startIndex
*/
function pctCharAt(text, startIndex) {
var chr = text.charAt(startIndex);
if (chr !== '%') {
var chr = text[startIndex];
if (!isPercentDigitDigit(text, startIndex)) {
return chr;
}
var utf8CharCode = parseHex2(text, startIndex + 1);
var numBytes = utf8.numBytes(utf8CharCode);
if (numBytes === 0) {
return chr;
}
chr = text.substr(startIndex, 3);
if (!isPctEncoded(chr)) {
return '%';
for (var byteNumber = 1; byteNumber < numBytes; byteNumber += 1) {
if (!isPercentDigitDigit(text, startIndex + 3 * byteNumber) || !utf8.isValidFollowingCharCode(parseHex2(text, startIndex + 3 * byteNumber + 1))) {
return chr;
}
}
return chr;
return text.substr(startIndex, 3 * numBytes);
}
return {
encodeCharacter: encode,
decodeCharacter: decodeURIComponent,
encodeCharacter: encodeCharacter,
isPctEncoded: isPctEncoded,
pctCharAt: pctCharAt
};
......@@ -161,9 +216,12 @@ var rfcCharHelper = (function () {
}());
/**
* encoding of rfc 6570
*/
var encodingHelper = (function () {
function encode(text, passReserved) {
function encode (text, passReserved) {
var
result = '',
index,
......@@ -183,7 +241,7 @@ var encodingHelper = (function () {
return result;
}
function encodePassReserved(text) {
function encodePassReserved (text) {
return encode(text, true);
}
......@@ -319,7 +377,7 @@ var parse = (function () {
varnameStart = null,
maxLengthStart = null,
index,
chr;
chr = '';
function closeVarname() {
varspec = {varname: text.substring(varnameStart, index), exploded: false, maxLength: null};
......@@ -345,7 +403,7 @@ var parse = (function () {
for (; index < text.length; index += chr.length) {
chr = pctEncoder.pctCharAt(text, index);
if (varnameStart !== null) {
// the spec says: varname = varchar *( ["."] varchar )
// the spec says: varname = varchar *( ["."] varchar )
// so a dot is allowed except for the first char
if (chr === '.') {
if (varnameStart === index) {
......@@ -391,7 +449,7 @@ var parse = (function () {
varnameStart = index + 1;
continue;
}
throw new Error("illegal character '" + chr + "' at position " + index);
throw new Error("illegal character '" + chr + "' at position " + index + ' of "' + text + '"');
} // for chr
if (varnameStart !== null) {
closeVarname();
......@@ -525,9 +583,9 @@ var VariableExpression = (function () {
for (index = 0; index < this.varspecs.length; index += 1) {
varspec = this.varspecs[index];
value = variables[varspec.varname];
// if (!isDefined(value)) {
// continue;
// }
if (!isDefined(value)) {
continue;
}
if (isFirstVarspec) {
result += this.operator.first;
isFirstVarspec = false;
......
......@@ -35,10 +35,16 @@
"uritemplate-test/spec-examples-by-sections.json",
"uritemplate-test/spec-examples.json"
],
"version": "0.2.1",
"version": "0.2.2",
"readmeFilename": "README.md",
"gitHead": "901b85201a821427dfb4591b56aea3a70d45c67c",
"devDependencies": {},
"devDependencies": {
"jshint": "*",
"nodeunit": "*",
"jake": "*",
"uglify-js": "*",
"async": "*"
},
"repository": {
"type": "git",
"url": "https://github.com/fxa/uritemplate-js.git"
......
......@@ -70,9 +70,9 @@ var VariableExpression = (function () {
for (index = 0; index < this.varspecs.length; index += 1) {
varspec = this.varspecs[index];
value = variables[varspec.varname];
// if (!isDefined(value)) {
// continue;
// }
if (!isDefined(value)) {
continue;
}
if (isFirstVarspec) {
result += this.operator.first;
isFirstVarspec = false;
......
/*jshint unused: false */
/*global rfcCharHelper, pctEncoder*/
/**
* encoding of rfc 6570
*/
var encodingHelper = (function () {
"use strict";
function encode(text, passReserved) {
function encode (text, passReserved) {
var
result = '',
index,
......@@ -23,7 +26,7 @@ var encodingHelper = (function () {
return result;
}
function encodePassReserved(text) {
function encodePassReserved (text) {
return encode(text, true);
}
......
......@@ -12,7 +12,7 @@ var parse = (function () {
varnameStart = null,
maxLengthStart = null,
index,
chr;
chr = '';
function closeVarname() {
varspec = {varname: text.substring(varnameStart, index), exploded: false, maxLength: null};
......@@ -38,7 +38,7 @@ var parse = (function () {
for (; index < text.length; index += chr.length) {
chr = pctEncoder.pctCharAt(text, index);
if (varnameStart !== null) {
// the spec says: varname = varchar *( ["."] varchar )
// the spec says: varname = varchar *( ["."] varchar )
// so a dot is allowed except for the first char
if (chr === '.') {
if (varnameStart === index) {
......@@ -84,7 +84,7 @@ var parse = (function () {
varnameStart = index + 1;
continue;
}
throw new Error("illegal character '" + chr + "' at position " + index);
throw new Error("illegal character '" + chr + "' at position " + index + ' of "' + text + '"');
} // for chr
if (varnameStart !== null) {
closeVarname();
......
/*jshint unused:false */
/*global unescape, charHelper*/
/*global charHelper, unescape*/
var pctEncoder = (function () {
"use strict";
// see http://ecmanaut.blogspot.de/2006/07/encoding-decoding-utf8-in-javascript.html
function toUtf8(s) {
return unescape(encodeURIComponent(s));
}
var utf8 = {
encode: function (chr) {
// see http://ecmanaut.blogspot.de/2006/07/encoding-decoding-utf8-in-javascript.html
return unescape(encodeURIComponent(chr));
},
numBytes: function (firstCharCode) {
if (firstCharCode <= 0x7F) {
return 1;
}
else if (0xC2 <= firstCharCode && firstCharCode <= 0xDF) {
return 2;
}
else if (0xE0 <= firstCharCode && firstCharCode <= 0xEF) {
return 3;
}
else if (0xF0 <= firstCharCode && firstCharCode <= 0xF4) {
return 4;
}
// no valid first octet
return 0;
},
isValidFollowingCharCode: function (charCode) {
return 0x80 <= charCode && charCode <= 0xBF;
}
};
function encode(chr) {
/**
* encodes a character, if needed or not.
* @param chr
* @return pct-encoded character
*/
function encodeCharacter (chr) {
var
result = '',
octets = toUtf8(chr),
octets = utf8.encode(chr),
octet,
index;
for (index = 0; index < octets.length; index += 1) {
......@@ -21,33 +47,62 @@ var pctEncoder = (function () {
return result;
}
function isPctEncoded(chr) {
if (chr.length < 3) {
function isPercentDigitDigit (text, start) {
return text[start] === '%' && charHelper.isHexDigit(text[start + 1]) && charHelper.isHexDigit(text[start + 2]);
}
function parseHex2(text, start) {
return parseInt(text.substr(start, 2), 16);
}
/**
* Returns wether or not the given char sequence is a correctly pct-encoded sequence.
* @param chr
* @return {boolean}
*/
function isPctEncoded (chr) {
if (!isPercentDigitDigit(chr, 0)) {
return false;
}
var firstCharCode = parseHex2(chr, 1);
var numBytes = utf8.numBytes(firstCharCode);
if (numBytes === 0) {
return false;
}
for (var index = 0; index < chr.length; index += 3) {
if (chr.charAt(index) !== '%' || !charHelper.isHexDigit(chr.charAt(index + 1) || !charHelper.isHexDigit(chr.charAt(index + 2)))) {
for (var byteNumber = 1; byteNumber < numBytes; byteNumber += 1) {
if (!isPercentDigitDigit(chr, 3*byteNumber) || !utf8.isValidFollowingCharCode(parseHex2(chr, 3*byteNumber + 1))) {
return false;
}
}
return true;
}
/**
* Reads as much as needed from the text, e.g. '%20' or '%C3%B6'. It does not decode!
* @param text
* @param startIndex
* @return the character or pct-string of the text at startIndex
*/
function pctCharAt(text, startIndex) {
var chr = text.charAt(startIndex);
if (chr !== '%') {
var chr = text[startIndex];
if (!isPercentDigitDigit(text, startIndex)) {
return chr;
}
var utf8CharCode = parseHex2(text, startIndex + 1);
var numBytes = utf8.numBytes(utf8CharCode);
if (numBytes === 0) {
return chr;
}
chr = text.substr(startIndex, 3);
if (!isPctEncoded(chr)) {
return '%';
for (var byteNumber = 1; byteNumber < numBytes; byteNumber += 1) {
if (!isPercentDigitDigit(text, startIndex + 3 * byteNumber) || !utf8.isValidFollowingCharCode(parseHex2(text, startIndex + 3 * byteNumber + 1))) {
return chr;
}
}
return chr;
return text.substr(startIndex, 3 * numBytes);
}
return {
encodeCharacter: encode,
decodeCharacter: decodeURIComponent,
encodeCharacter: encodeCharacter,
isPctEncoded: isPctEncoded,
pctCharAt: pctCharAt
};
......
......@@ -3,7 +3,16 @@ module.exports = (function () {
var
fs = require('fs'),
path = require('path'),
sandbox = require('nodeunit').utils.sandbox;
// var testCase = require('nodeunit').testCase;
var NOISY = false;
function log(text) {
if (NOISY) {
console.log(text);
}
}
function loadUriTemplate() {
......@@ -23,18 +32,15 @@ module.exports = (function () {
index;
try {
uriTemplate = UriTemplate.parse(template);
// console.log('uritemplate parsed:' + uriTemplate);
}
catch (error) {
// console.log('error', error);
// console.log('expected', expected.toString());
// if expected === false, the error was expected!
if (expected === false) {
log('ok. expected error found');
return;
}
console.log('error', error);
console.log('expected', expected.toString());
test.notEqual('chapter ' + chapterName + ', template ' + template + ' threw error: ' + error);
log('error', error);
test.fail('chapter ' + chapterName + ', template ' + template + ' threw error: ' + error);
return;
}
test.ok(!!uriTemplate, 'uri template could not be parsed');
......@@ -49,7 +55,7 @@ module.exports = (function () {
if (expected === false) {
return;
}
test.notEqual('chapter ' + chapterName + ', template ' + template + ' threw error: ' + JSON.stringify(exception, null, 4));
test.fail('chapter ' + chapterName + ', template ' + template + ' threw error: ' + JSON.stringify(exception, null, 4));
return;
}
if (expected.constructor === Array) {
......@@ -59,7 +65,7 @@ module.exports = (function () {
return;
}
}
test.notEqual('actual: ' + actual + ', expected: one of ' + JSON.stringify(expected) + 'chapter ' + chapterName + ', template ' + template);
test.fail("actual: '" + actual + "', expected: one of " + JSON.stringify(expected) + ', chapter ' + chapterName + ', template ' + template);
}
else {
test.equal(actual, expected, 'actual: ' + actual + ', expected: ' + expected + ', template: ' + template);
......@@ -76,33 +82,39 @@ module.exports = (function () {
template,
expexted,
UriTemplate;
log(filename);
UriTemplate = loadUriTemplate();
tests = loadTestFile(filename);
for (chapterName in tests) {
if (tests.hasOwnProperty(chapterName)) {
log('-> ' + chapterName);
chapter = tests[chapterName];
variables = chapter.variables;
for (index = 0; index < chapter.testcases.length; index += 1) {
template = chapter.testcases[index][0];
expexted = chapter.testcases[index][1];
log(' -> ' + template);
assertMatches(test, template, variables, expexted, chapterName, UriTemplate);
}
console.log(chapterName);
}
}
test.done();
}
// var SPEC_HOME = '../uritemplate-test';
var SPEC_HOME = 'uritemplate-test';
// var SPEC_HOME = "C:/Users/developer/git/uritemplate-test";
return {
'spec examples': function (test) {
runTestFile(test, 'uritemplate-test/spec-examples.json');
runTestFile(test, path.join(SPEC_HOME, 'spec-examples.json'));
},
'extended tests': function (test) {
runTestFile(test, 'uritemplate-test/extended-tests.json');
runTestFile(test, path.join(SPEC_HOME, 'extended-tests.json'));
},
'negative tests': function (test) {
runTestFile(test, path.join(SPEC_HOME, 'negative-tests.json'));
},
// 'negative tests': function (test) {
// runTestFile(test, 'uritemplate-test/negative-tests.json');
// },
'own tests': function (test) {
runTestFile(test, 'own-testcases.json');
}
......
module.exports = (function () {
"use strict";
var
sandbox = require('nodeunit').utils.sandbox;
var context = {};
sandbox('src/objectHelper.js', context);
var context = {console: console};
require('nodeunit').utils.sandbox('src/objectHelper.js', context);
var objectHelper = context.objectHelper;
return {
'reduce works with initial value': function (test) {
var callNum = 0;
......
module.exports = (function () {
"use strict";
var
sandbox = require('nodeunit').utils.sandbox,
context = {console: console};
sandbox('src/charHelper.js', context);
sandbox('src/pctEncoder.js', context);
var charHelper = context.charHelper;
var pctEncoder = context.pctEncoder;
return {
'isPctEncoded': {
'one byte utf8 characters are recognized': function (test) {
test.ok(pctEncoder.isPctEncoded('%00'));
test.ok(pctEncoder.isPctEncoded('%7F'));
test.ok(!pctEncoder.isPctEncoded('%80'));
test.ok(!pctEncoder.isPctEncoded('%C0'));
test.ok(!pctEncoder.isPctEncoded('%FF'));
test.done();
},
'two byte utf8 characters are recognized': function (test) {
test.ok(pctEncoder.isPctEncoded('%C3%B6'));
test.ok(!pctEncoder.isPctEncoded('%C1%B6'));
test.ok(!pctEncoder.isPctEncoded('%C3%7F'));
test.ok(!pctEncoder.isPctEncoded('%C3%C0'));
test.done();
},
'three byte utf8 characters are recognized': function (test) {
// the euro sign
test.ok(pctEncoder.isPctEncoded('%E2%82%AC'));
test.ok(!pctEncoder.isPctEncoded('%E2%82'));
test.done();
},
'four byte utf8 characters are recognized': function (test) {
// violin clef
test.ok(pctEncoder.isPctEncoded('%F0%9D%84%9E'));
test.ok(!pctEncoder.isPctEncoded('%F0%9D%84'));
test.done();
}
},
'pctCharAt': {
'corner cases are detected': function (test) {
test.equal(pctEncoder.pctCharAt('%', 0), '%');
test.equal(pctEncoder.pctCharAt('%X', 0), '%');
test.equal(pctEncoder.pctCharAt('%A', 0), '%');
test.equal(pctEncoder.pctCharAt('%1', 0), '%');
test.equal(pctEncoder.pctCharAt('%F0%9D%8', 0), '%');
test.equal(pctEncoder.pctCharAt('%20', 1), '2');
test.done();
},
'one byte utf8 characters are detected': function (test) {
test.equal(pctEncoder.pctCharAt('%25%', 0), '%25');
test.equal(pctEncoder.pctCharAt('%3C%', 0), '%3C');
test.equal(pctEncoder.pctCharAt('%3c%', 0), '%3c');
test.done();
}
},
'encodeCharacter': {
'encodeCharacter encodes even if not needed': function (test) {
test.equal(pctEncoder.encodeCharacter('y'), '%79');
test.equal(pctEncoder.encodeCharacter('!'), '%21');
test.done();
}
},
'this and that': {
"ouml is correct encoded": function (test) {
test.equal(pctEncoder.encodeCharacter('ö'), '%C3%B6');
test.done();
},
'ouml is pctEncoded': function (test) {
test.ok(pctEncoder.isPctEncoded('%C3%B6'));
test.done();
},
'ouml is extracted from text': function (test) {
test.equal(pctEncoder.pctCharAt('zw%C3%B6lf', 2), '%C3%B6');
test.done();
}
}
};
}());
module.exports = (function () {
"use strict";
var
sandbox = require('nodeunit').utils.sandbox,
context = {console: console};
sandbox('src/objectHelper.js', context);
sandbox('src/charHelper.js', context);
sandbox('src/pctEncoder.js', context);
sandbox('src/rfcCharHelper.js', context);
sandbox('src/encodingHelper.js', context);
sandbox('src/operators.js', context);
sandbox('src/isDefined.js', context);
sandbox('src/VariableExpression.js', context);
// var charHelper = context.charHelper;