From 716f55cd8ee942f82b96c9706c8ebc6f84b067d9 Mon Sep 17 00:00:00 2001 From: Caleb Date: Tue, 3 Oct 2017 14:57:03 -0600 Subject: [PATCH] Make sure `sanitizeIRI` replacement character is safe. --- src/lib/__tests__/urlHelper.spec.js | 9 +++------ src/lib/urlHelper.js | 5 +++++ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/lib/__tests__/urlHelper.spec.js b/src/lib/__tests__/urlHelper.spec.js index cc079b78..9116fa41 100644 --- a/src/lib/__tests__/urlHelper.spec.js +++ b/src/lib/__tests__/urlHelper.spec.js @@ -36,12 +36,9 @@ describe('sanitizeIRI', () => { }); it('should not allow an improper replacement character', () => { - expect( - sanitizeIRI("I! like! dollars!", { replacement: '$' }) - ).not.toEqual('I$$like$$dollars$'); - expect( - sanitizeIRI("I! like! dollars!", { replacement: '$' }) - ).toThrow(); + expect(() => { + sanitizeIRI("I! like! dollars!", { replacement: '$' }); + }).toThrow(); }); it('should not actually URI-encode the characters', () => { diff --git a/src/lib/urlHelper.js b/src/lib/urlHelper.js index 3eb0d646..c146618b 100644 --- a/src/lib/urlHelper.js +++ b/src/lib/urlHelper.js @@ -23,7 +23,12 @@ export function getNewEntryUrl(collectionName, direct) { */ const uriChars = /[\w\-.~]/i; const ucsChars = /[\xA0-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E1000}-\u{EFFFD}]/u; +// `sanitizeIRI` does not actually URI-encode the chars (that is the browser's and server's job), just removes the ones that are not allowed. export function sanitizeIRI(str, { replacement = "" } = {}) { + if (replacement !== "") { + const validReplacement = (sanitizeIRI(replacement) === replacement); + if (!validReplacement) throw "The replacement character(s) for `sanitizeIRI` is itself unsafe."; + } let result = ""; // We cannot use a `map` function here because `string.split()` // splits things like emojis into UTF-16 surrogate pairs,