From 51b89cc84b08c4c104addb52a343eec290565ebf Mon Sep 17 00:00:00 2001 From: Caleb Date: Fri, 29 Sep 2017 14:56:15 -0600 Subject: [PATCH 01/19] Switch from `slug` to `sanitize-filename`. --- package.json | 2 +- src/backends/backend.js | 6 +++--- yarn.lock | 26 ++++++++++++++++---------- 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/package.json b/package.json index 00bf5588..3cd76a57 100644 --- a/package.json +++ b/package.json @@ -149,12 +149,12 @@ "remark-parse": "^3.0.1", "remark-rehype": "^2.0.0", "remark-stringify": "^3.0.1", + "sanitize-filename": "^1.6.1", "semaphore": "^1.0.5", "slate": "^0.21.0", "slate-edit-list": "^0.7.1", "slate-edit-table": "^0.10.1", "slate-soft-break": "^0.3.0", - "slug": "^0.9.1", "toml-j0.4": "^1.1.1", "unified": "^6.1.4", "unist-builder": "^1.0.2", diff --git a/src/backends/backend.js b/src/backends/backend.js index 782422bc..6f38f184 100644 --- a/src/backends/backend.js +++ b/src/backends/backend.js @@ -5,7 +5,7 @@ import GitGatewayBackend from "./git-gateway/implementation"; import { resolveFormat } from "../formats/formats"; import { selectListMethod, selectEntrySlug, selectEntryPath, selectAllowNewEntries, selectFolderEntryExtension } from "../reducers/collections"; import { createEntry } from "../valueObjects/Entry"; -import slug from 'slug'; +import sanitize from 'sanitize-filename'; class LocalStorageAuthStore { storageKey = "netlify-cms-user"; @@ -51,9 +51,9 @@ const slugFormatter = (template = "{{slug}}", entryData) => { case "day": return (`0${ date.getDate() }`).slice(-2); case "slug": - return slug(getIdentifier(entryData).trim(), {lower: true}); + return sanitize(getIdentifier(entryData).trim().toLowerCase(), {replacement: "-"}); default: - return slug(entryData.get(field, "").trim(), {lower: true}); + return sanitize(entryData.get(field, "").trim().toLowerCase(), {replacement: "-"}); } }); }; diff --git a/yarn.lock b/yarn.lock index e7722533..657f2ebd 100644 --- a/yarn.lock +++ b/yarn.lock @@ -7622,6 +7622,12 @@ sane@~1.6.0: walker "~1.0.5" watch "~0.10.0" +sanitize-filename@^1.6.1: + version "1.6.1" + resolved "https://registry.yarnpkg.com/sanitize-filename/-/sanitize-filename-1.6.1.tgz#612da1c96473fa02dccda92dcd5b4ab164a6772a" + dependencies: + truncate-utf8-bytes "^1.0.0" + sax@^1.2.1, sax@~1.2.1: version "1.2.4" resolved "https://registry.yarnpkg.com/sax/-/sax-1.2.4.tgz#2816234e2378bddc4e5354fab5caa895df7100d9" @@ -7815,12 +7821,6 @@ slice-ansi@0.0.4: version "0.0.4" resolved "https://registry.yarnpkg.com/slice-ansi/-/slice-ansi-0.0.4.tgz#edbf8903f66f7ce2f8eafd6ceed65e264c831b35" -slug@^0.9.1: - version "0.9.1" - resolved "https://registry.yarnpkg.com/slug/-/slug-0.9.1.tgz#af08f608a7c11516b61778aa800dce84c518cfda" - dependencies: - unicode ">= 0.3.1" - sntp@1.x.x: version "1.0.9" resolved "https://registry.yarnpkg.com/sntp/-/sntp-1.0.9.tgz#6541184cc90aeea6c6e7b35e2659082443c66198" @@ -8547,6 +8547,12 @@ trough@^1.0.0: version "1.0.1" resolved "https://registry.yarnpkg.com/trough/-/trough-1.0.1.tgz#a9fd8b0394b0ae8fff82e0633a0a36ccad5b5f86" +truncate-utf8-bytes@^1.0.0: + version "1.0.2" + resolved "https://registry.yarnpkg.com/truncate-utf8-bytes/-/truncate-utf8-bytes-1.0.2.tgz#405923909592d56f78a5818434b0b78489ca5f2b" + dependencies: + utf8-byte-length "^1.0.1" + tryit@^1.0.1: version "1.0.3" resolved "https://registry.yarnpkg.com/tryit/-/tryit-1.0.3.tgz#393be730a9446fd1ead6da59a014308f36c289cb" @@ -8635,10 +8641,6 @@ unherit@^1.0.4: inherits "^2.0.1" xtend "^4.0.1" -"unicode@>= 0.3.1": - version "9.0.1" - resolved "https://registry.yarnpkg.com/unicode/-/unicode-9.0.1.tgz#104706272c6464c574801be1b086f7245cf25158" - unified@^6.1.4: version "6.1.5" resolved "https://registry.yarnpkg.com/unified/-/unified-6.1.5.tgz#716937872621a63135e62ced2f3ac6a063c6fb87" @@ -8793,6 +8795,10 @@ user-home@^2.0.0: dependencies: os-homedir "^1.0.0" +utf8-byte-length@^1.0.1: + version "1.0.4" + resolved "https://registry.yarnpkg.com/utf8-byte-length/-/utf8-byte-length-1.0.4.tgz#f45f150c4c66eee968186505ab93fcbb8ad6bf61" + util-deprecate@~1.0.1: version "1.0.2" resolved "https://registry.yarnpkg.com/util-deprecate/-/util-deprecate-1.0.2.tgz#450d4dc9fa70de732762fbd2d4a28981419a0ccf" From d21de7e82e85a14e869e9bf9c2b7f67ec83d87b4 Mon Sep 17 00:00:00 2001 From: Caleb Date: Fri, 29 Sep 2017 18:47:08 -0600 Subject: [PATCH 02/19] Remove periods from filenames/slugs. --- src/backends/backend.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/backends/backend.js b/src/backends/backend.js index 6f38f184..cfc003ff 100644 --- a/src/backends/backend.js +++ b/src/backends/backend.js @@ -51,9 +51,9 @@ const slugFormatter = (template = "{{slug}}", entryData) => { case "day": return (`0${ date.getDate() }`).slice(-2); case "slug": - return sanitize(getIdentifier(entryData).trim().toLowerCase(), {replacement: "-"}); + return sanitize(getIdentifier(entryData).trim().toLowerCase(), {replacement: "-"}).replace('.', '-'); default: - return sanitize(entryData.get(field, "").trim().toLowerCase(), {replacement: "-"}); + return sanitize(entryData.get(field, "").trim().toLowerCase(), {replacement: "-"}).replace('.', '-'); } }); }; From d895112f9d3c3c2ff678a5f06c70e30528e9e231 Mon Sep 17 00:00:00 2001 From: Caleb Date: Sat, 30 Sep 2017 09:22:15 -0600 Subject: [PATCH 03/19] Replace spaces in slugs/filenames with dashes. --- src/backends/backend.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/backends/backend.js b/src/backends/backend.js index cfc003ff..f91131e4 100644 --- a/src/backends/backend.js +++ b/src/backends/backend.js @@ -51,9 +51,9 @@ const slugFormatter = (template = "{{slug}}", entryData) => { case "day": return (`0${ date.getDate() }`).slice(-2); case "slug": - return sanitize(getIdentifier(entryData).trim().toLowerCase(), {replacement: "-"}).replace('.', '-'); + return sanitize(getIdentifier(entryData).trim().toLowerCase(), {replacement: "-"}).replace(/[\.\s]/g, '-'); default: - return sanitize(entryData.get(field, "").trim().toLowerCase(), {replacement: "-"}).replace('.', '-'); + return sanitize(entryData.get(field, "").trim().toLowerCase(), {replacement: "-"}).replace(/[\.\s]/g, '-'); } }); }; From 8a2b4fc843fc5192b12bbc0809b8ceeae45121b2 Mon Sep 17 00:00:00 2001 From: Caleb Date: Sat, 30 Sep 2017 17:25:05 -0600 Subject: [PATCH 04/19] Sanitize entire slug instead of just parts. --- src/backends/backend.js | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/backends/backend.js b/src/backends/backend.js index f91131e4..f79bc856 100644 --- a/src/backends/backend.js +++ b/src/backends/backend.js @@ -42,7 +42,7 @@ const slugFormatter = (template = "{{slug}}", entryData) => { return identifier; }; - return template.replace(/\{\{([^\}]+)\}\}/g, (_, field) => { + let slug = template.replace(/\{\{([^\}]+)\}\}/g, (_, field) => { switch (field) { case "year": return date.getFullYear(); @@ -51,11 +51,13 @@ const slugFormatter = (template = "{{slug}}", entryData) => { case "day": return (`0${ date.getDate() }`).slice(-2); case "slug": - return sanitize(getIdentifier(entryData).trim().toLowerCase(), {replacement: "-"}).replace(/[\.\s]/g, '-'); + return getIdentifier(entryData).trim(); default: - return sanitize(entryData.get(field, "").trim().toLowerCase(), {replacement: "-"}).replace(/[\.\s]/g, '-'); + return entryData.get(field, "").trim(); } }); + + return sanitize(slug, {replacement: "-"}).replace(/[.]/g, '-'); }; class Backend { From 9bc65cd0ac12c82618732346a94aac15de75d94b Mon Sep 17 00:00:00 2001 From: Caleb Date: Sat, 30 Sep 2017 17:27:07 -0600 Subject: [PATCH 05/19] Re-implement standard slugification with IRIs instead of URIs. --- src/backends/backend.js | 17 +++++++++++++++-- src/lib/urlHelper.js | 16 ++++++++++++++++ 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/src/backends/backend.js b/src/backends/backend.js index f79bc856..19d02097 100644 --- a/src/backends/backend.js +++ b/src/backends/backend.js @@ -5,7 +5,8 @@ import GitGatewayBackend from "./git-gateway/implementation"; import { resolveFormat } from "../formats/formats"; import { selectListMethod, selectEntrySlug, selectEntryPath, selectAllowNewEntries, selectFolderEntryExtension } from "../reducers/collections"; import { createEntry } from "../valueObjects/Entry"; -import sanitize from 'sanitize-filename'; +import { sanitizeIRI } from "../lib/urlHelper"; +import sanitizeFilename from 'sanitize-filename'; class LocalStorageAuthStore { storageKey = "netlify-cms-user"; @@ -57,7 +58,19 @@ const slugFormatter = (template = "{{slug}}", entryData) => { } }); - return sanitize(slug, {replacement: "-"}).replace(/[.]/g, '-'); + // Convert slug to lower-case; + slug = slug.toLocaleLowerCase(); + + // Replace periods and spaces with dashes. + slug = slug.replace(/[.\s]/g, '-'); + // Sanitize as IRI (i18n URI) and as filename. + slug = sanitizeIRI(slug, {replacement: "-"}); + slug = sanitizeFilename(slug, {replacement: "-"}); + + // Remove any doubled or trailing replacement characters (that were added in the sanitizers). + slug = slug.replace(/-+/g, '-').replace(/-$/, ''); + + return slug; }; class Backend { diff --git a/src/lib/urlHelper.js b/src/lib/urlHelper.js index 62dfb588..c3a90939 100644 --- a/src/lib/urlHelper.js +++ b/src/lib/urlHelper.js @@ -12,6 +12,22 @@ export function getNewEntryUrl(collectionName, direct) { return getUrl(`/collections/${ collectionName }/entries/new`, direct); } +// Unreserved chars from RFC3987. +const uriChars = /[\w\-.~]/i; +const ucsChars = /[\xA0-\u{D7FF}]|[\u{F900}-\u{FDCF}]|[\u{FDF0}-\u{FFEF}]|[\u{10000}-\u{1FFFD}]|[\u{20000}-\u{2FFFD}]|[\u{30000}-\u{3FFFD}]|[\u{40000}-\u{4FFFD}]|[\u{50000}-\u{5FFFD}]|[\u{60000}-\u{6FFFD}]|[\u{70000}-\u{7FFFD}]|[\u{80000}-\u{8FFFD}]|[\u{90000}-\u{9FFFD}]|[\u{A0000}-\u{AFFFD}]|[\u{B0000}-\u{BFFFD}]|[\u{C0000}-\u{CFFFD}]|[\u{D0000}-\u{DFFFD}]|[\u{E1000}-\u{EFFFD}]/u; +export function sanitizeIRI(str, { replacement }) { + let result = ""; + // We cannot use a `map` function here because `string.split()` splits things like emojis into surrogate pairs. + for (const char of str) { + if (uriChars.test(char) || ucsChars.test(char)) { + result += char; + } else { + result += replacement; + } + } + return result; +} + export function urlize(string) { const sanitized = makePathSanitized(string); const parsedURL = url.parse(sanitized); From 4e5a004010e6f56116358c7bb86221edb3f28de4 Mon Sep 17 00:00:00 2001 From: Caleb Date: Sat, 30 Sep 2017 20:43:29 -0600 Subject: [PATCH 06/19] Make `ucschars` regex smaller. --- src/lib/urlHelper.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/urlHelper.js b/src/lib/urlHelper.js index c3a90939..63dff30c 100644 --- a/src/lib/urlHelper.js +++ b/src/lib/urlHelper.js @@ -14,7 +14,7 @@ export function getNewEntryUrl(collectionName, direct) { // Unreserved chars from RFC3987. const uriChars = /[\w\-.~]/i; -const ucsChars = /[\xA0-\u{D7FF}]|[\u{F900}-\u{FDCF}]|[\u{FDF0}-\u{FFEF}]|[\u{10000}-\u{1FFFD}]|[\u{20000}-\u{2FFFD}]|[\u{30000}-\u{3FFFD}]|[\u{40000}-\u{4FFFD}]|[\u{50000}-\u{5FFFD}]|[\u{60000}-\u{6FFFD}]|[\u{70000}-\u{7FFFD}]|[\u{80000}-\u{8FFFD}]|[\u{90000}-\u{9FFFD}]|[\u{A0000}-\u{AFFFD}]|[\u{B0000}-\u{BFFFD}]|[\u{C0000}-\u{CFFFD}]|[\u{D0000}-\u{DFFFD}]|[\u{E1000}-\u{EFFFD}]/u; +const ucsChars = /[\xA0-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E1000}-\u{EFFFD}]/u; export function sanitizeIRI(str, { replacement }) { let result = ""; // We cannot use a `map` function here because `string.split()` splits things like emojis into surrogate pairs. From 72492749d9507390d44a69ab82a81fe83d59161b Mon Sep 17 00:00:00 2001 From: Caleb Date: Tue, 3 Oct 2017 09:01:06 -0600 Subject: [PATCH 07/19] Move slug sanitizer to a seperate function. --- src/backends/backend.js | 22 +++++++--------------- src/lib/urlHelper.js | 19 +++++++++++++++++++ 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/src/backends/backend.js b/src/backends/backend.js index 19d02097..96746a0d 100644 --- a/src/backends/backend.js +++ b/src/backends/backend.js @@ -5,8 +5,7 @@ import GitGatewayBackend from "./git-gateway/implementation"; import { resolveFormat } from "../formats/formats"; import { selectListMethod, selectEntrySlug, selectEntryPath, selectAllowNewEntries, selectFolderEntryExtension } from "../reducers/collections"; import { createEntry } from "../valueObjects/Entry"; -import { sanitizeIRI } from "../lib/urlHelper"; -import sanitizeFilename from 'sanitize-filename'; +import { sanitizeSlug } from "../lib/urlHelper"; class LocalStorageAuthStore { storageKey = "netlify-cms-user"; @@ -43,7 +42,7 @@ const slugFormatter = (template = "{{slug}}", entryData) => { return identifier; }; - let slug = template.replace(/\{\{([^\}]+)\}\}/g, (_, field) => { + const slug = template.replace(/\{\{([^\}]+)\}\}/g, (_, field) => { switch (field) { case "year": return date.getFullYear(); @@ -56,21 +55,14 @@ const slugFormatter = (template = "{{slug}}", entryData) => { default: return entryData.get(field, "").trim(); } - }); - - // Convert slug to lower-case; - slug = slug.toLocaleLowerCase(); + }) + // Convert slug to lower-case + .toLocaleLowerCase() // Replace periods and spaces with dashes. - slug = slug.replace(/[.\s]/g, '-'); - // Sanitize as IRI (i18n URI) and as filename. - slug = sanitizeIRI(slug, {replacement: "-"}); - slug = sanitizeFilename(slug, {replacement: "-"}); + .replace(/[.\s]/g, '-'); - // Remove any doubled or trailing replacement characters (that were added in the sanitizers). - slug = slug.replace(/-+/g, '-').replace(/-$/, ''); - - return slug; + return sanitizeSlug(slug); }; class Backend { diff --git a/src/lib/urlHelper.js b/src/lib/urlHelper.js index 63dff30c..da81ec7e 100644 --- a/src/lib/urlHelper.js +++ b/src/lib/urlHelper.js @@ -1,4 +1,6 @@ import url from 'url'; +import sanitizeFilename from 'sanitize-filename'; +import { isString, escapeRegExp } from 'lodash'; function getUrl(url, direct) { return `${ direct ? '/#' : '' }${ url }`; @@ -28,6 +30,23 @@ export function sanitizeIRI(str, { replacement }) { return result; } +export function sanitizeSlug(str, { replacement = '-' }) { + if (!isString(str)) throw "`sanitizeSlug` only accepts strings as input."; + if (!isString(replacement)) throw "the `sanitizeSlug` replacement character must be a string."; + let slug = str; + + // Sanitize as IRI (i18n URI) and as filename. + slug = sanitizeIRI(slug, {replacement}); + slug = sanitizeFilename(slug, {replacement}); + + // Remove any doubled or trailing replacement characters (that were added in the sanitizers). + const doubleReplacement = new RegExp('(?:' + escapeRegExp(replacement) + ')+', 'g'); + const trailingReplacment = new RegExp(escapeRegExp(replacement) + '$') + slug = slug.replace(doubleReplacement, '-').replace(trailingReplacment, ''); + + return slug; +} + export function urlize(string) { const sanitized = makePathSanitized(string); const parsedURL = url.parse(sanitized); From 8fb326ff2b5d9004e735fb0fdf0eb6a067ce7645 Mon Sep 17 00:00:00 2001 From: Caleb Date: Tue, 3 Oct 2017 09:48:43 -0600 Subject: [PATCH 08/19] Make `sanitizeSlug` immutable. Thanks @erquhart! --- src/lib/urlHelper.js | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/src/lib/urlHelper.js b/src/lib/urlHelper.js index da81ec7e..a0581a4a 100644 --- a/src/lib/urlHelper.js +++ b/src/lib/urlHelper.js @@ -1,6 +1,6 @@ import url from 'url'; import sanitizeFilename from 'sanitize-filename'; -import { isString, escapeRegExp } from 'lodash'; +import { isString, escapeRegExp, flow, partialRight } from 'lodash'; function getUrl(url, direct) { return `${ direct ? '/#' : '' }${ url }`; @@ -33,18 +33,22 @@ export function sanitizeIRI(str, { replacement }) { export function sanitizeSlug(str, { replacement = '-' }) { if (!isString(str)) throw "`sanitizeSlug` only accepts strings as input."; if (!isString(replacement)) throw "the `sanitizeSlug` replacement character must be a string."; - let slug = str; - + // Sanitize as IRI (i18n URI) and as filename. - slug = sanitizeIRI(slug, {replacement}); - slug = sanitizeFilename(slug, {replacement}); - + const sanitize = flow([ + partialRight(sanitizeIRI, { replacement }), + partialRight(sanitizeFilename, { replacement }), + ]); + const sanitizedSlug = sanitize(str); + // Remove any doubled or trailing replacement characters (that were added in the sanitizers). const doubleReplacement = new RegExp('(?:' + escapeRegExp(replacement) + ')+', 'g'); - const trailingReplacment = new RegExp(escapeRegExp(replacement) + '$') - slug = slug.replace(doubleReplacement, '-').replace(trailingReplacment, ''); + const trailingReplacment = new RegExp(escapeRegExp(replacement) + '$'); + const normalizedSlug = sanitizedSlug + .replace(doubleReplacement, '-') + .replace(trailingReplacment, ''); - return slug; + return normalizedSlug; } export function urlize(string) { From be28f895bcc9762fcd1cdfc3c11264edda18c4ec Mon Sep 17 00:00:00 2001 From: Caleb Date: Tue, 3 Oct 2017 10:07:20 -0600 Subject: [PATCH 09/19] Document slug sanitizing functions. --- src/lib/urlHelper.js | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/lib/urlHelper.js b/src/lib/urlHelper.js index a0581a4a..dabccc07 100644 --- a/src/lib/urlHelper.js +++ b/src/lib/urlHelper.js @@ -14,12 +14,20 @@ export function getNewEntryUrl(collectionName, direct) { return getUrl(`/collections/${ collectionName }/entries/new`, direct); } -// Unreserved chars from RFC3987. +/* See https://www.w3.org/International/articles/idn-and-iri/#path. + * According to the new IRI (Internationalized Resource Identifier) spec, RFC 3987, + * ASCII chars should be kept the same way as in standard URIs (letters digits _ - . ~). + * Non-ASCII chars (unless they are not in the allowed "ucschars" list) should be percent-encoded. + * If the string is not encoded in Unicode, it should be converted to UTF-8 and normalized first, + * but JS stores strings as UTF-16/UCS-2 internally, so we should not normallize or re-encode. + */ const uriChars = /[\w\-.~]/i; const ucsChars = /[\xA0-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E1000}-\u{EFFFD}]/u; export function sanitizeIRI(str, { replacement }) { let result = ""; - // We cannot use a `map` function here because `string.split()` splits things like emojis into surrogate pairs. + // We cannot use a `map` function here because `string.split()` + // splits things like emojis into UTF-16 surrogate pairs, + // and we want to use UTF-8 (it isn't required, but is nicer). for (const char of str) { if (uriChars.test(char) || ucsChars.test(char)) { result += char; From f40f58b7dd77a9aeaa74f9a33b62a89e72ebe7e3 Mon Sep 17 00:00:00 2001 From: Caleb Date: Tue, 3 Oct 2017 14:11:20 -0600 Subject: [PATCH 10/19] Add default replacment to sanitizeIRI. --- src/lib/urlHelper.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/urlHelper.js b/src/lib/urlHelper.js index dabccc07..3eb0d646 100644 --- a/src/lib/urlHelper.js +++ b/src/lib/urlHelper.js @@ -23,7 +23,7 @@ export function getNewEntryUrl(collectionName, direct) { */ const uriChars = /[\w\-.~]/i; const ucsChars = /[\xA0-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E1000}-\u{EFFFD}]/u; -export function sanitizeIRI(str, { replacement }) { +export function sanitizeIRI(str, { replacement = "" } = {}) { let result = ""; // We cannot use a `map` function here because `string.split()` // splits things like emojis into UTF-16 surrogate pairs, From 476ff2e1abc5337313df53bbfba97f0d0bc96892 Mon Sep 17 00:00:00 2001 From: Caleb Date: Tue, 3 Oct 2017 14:51:33 -0600 Subject: [PATCH 11/19] Add tests for sanitizeIRI. --- src/lib/__tests__/urlHelper.spec.js | 55 +++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 src/lib/__tests__/urlHelper.spec.js diff --git a/src/lib/__tests__/urlHelper.spec.js b/src/lib/__tests__/urlHelper.spec.js new file mode 100644 index 00000000..cc079b78 --- /dev/null +++ b/src/lib/__tests__/urlHelper.spec.js @@ -0,0 +1,55 @@ +import { sanitizeIRI } from '../urlHelper'; + +describe('sanitizeIRI', () => { + // `sanitizeIRI` tests from RFC 3987 + it('should keep valid URI chars (letters digits _ - . ~)', () => { + expect( + sanitizeIRI("This, that-one_or.the~other 123!") + ).toEqual('Thisthat-one_or.the~other123'); + }); + + it('should not remove accents', () => { + expect( + sanitizeIRI("ěščřžý") + ).toEqual('ěščřžý'); + }); + + it('should keep valid non-latin chars (ucschars in RFC 3987)', () => { + expect( + sanitizeIRI("日本語のタイトル") + ).toEqual('日本語のタイトル'); + }); + + it('should not normalize Unicode strings', () => { + expect( + sanitizeIRI('\u017F\u0323\u0307') + ).toEqual('\u017F\u0323\u0307'); + expect( + sanitizeIRI('\u017F\u0323\u0307') + ).not.toEqual('\u1E9B\u0323'); + }); + + it('should allow a custom replacement character', () => { + expect( + sanitizeIRI("duck\\goose.elephant", { replacement: '-' }) + ).toEqual('duck-goose.elephant'); + }); + + it('should not allow an improper replacement character', () => { + expect( + sanitizeIRI("I! like! dollars!", { replacement: '$' }) + ).not.toEqual('I$$like$$dollars$'); + expect( + sanitizeIRI("I! like! dollars!", { replacement: '$' }) + ).toThrow(); + }); + + it('should not actually URI-encode the characters', () => { + expect( + sanitizeIRI("🎉") + ).toEqual('🎉'); + expect( + sanitizeIRI("🎉") + ).not.toEqual("%F0%9F%8E%89"); + }); +}); From 716f55cd8ee942f82b96c9706c8ebc6f84b067d9 Mon Sep 17 00:00:00 2001 From: Caleb Date: Tue, 3 Oct 2017 14:57:03 -0600 Subject: [PATCH 12/19] Make sure `sanitizeIRI` replacement character is safe. --- src/lib/__tests__/urlHelper.spec.js | 9 +++------ src/lib/urlHelper.js | 5 +++++ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/lib/__tests__/urlHelper.spec.js b/src/lib/__tests__/urlHelper.spec.js index cc079b78..9116fa41 100644 --- a/src/lib/__tests__/urlHelper.spec.js +++ b/src/lib/__tests__/urlHelper.spec.js @@ -36,12 +36,9 @@ describe('sanitizeIRI', () => { }); it('should not allow an improper replacement character', () => { - expect( - sanitizeIRI("I! like! dollars!", { replacement: '$' }) - ).not.toEqual('I$$like$$dollars$'); - expect( - sanitizeIRI("I! like! dollars!", { replacement: '$' }) - ).toThrow(); + expect(() => { + sanitizeIRI("I! like! dollars!", { replacement: '$' }); + }).toThrow(); }); it('should not actually URI-encode the characters', () => { diff --git a/src/lib/urlHelper.js b/src/lib/urlHelper.js index 3eb0d646..c146618b 100644 --- a/src/lib/urlHelper.js +++ b/src/lib/urlHelper.js @@ -23,7 +23,12 @@ export function getNewEntryUrl(collectionName, direct) { */ const uriChars = /[\w\-.~]/i; const ucsChars = /[\xA0-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E1000}-\u{EFFFD}]/u; +// `sanitizeIRI` does not actually URI-encode the chars (that is the browser's and server's job), just removes the ones that are not allowed. export function sanitizeIRI(str, { replacement = "" } = {}) { + if (replacement !== "") { + const validReplacement = (sanitizeIRI(replacement) === replacement); + if (!validReplacement) throw "The replacement character(s) for `sanitizeIRI` is itself unsafe."; + } let result = ""; // We cannot use a `map` function here because `string.split()` // splits things like emojis into UTF-16 surrogate pairs, From 2b64fbfba6da7d763cbc2bcd870a12f512538bdb Mon Sep 17 00:00:00 2001 From: Caleb Date: Tue, 3 Oct 2017 16:08:23 -0600 Subject: [PATCH 13/19] Allow empty options object for `sanitizeSlug`. --- src/lib/urlHelper.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/urlHelper.js b/src/lib/urlHelper.js index c146618b..a6bb71cf 100644 --- a/src/lib/urlHelper.js +++ b/src/lib/urlHelper.js @@ -43,7 +43,7 @@ export function sanitizeIRI(str, { replacement = "" } = {}) { return result; } -export function sanitizeSlug(str, { replacement = '-' }) { +export function sanitizeSlug(str, { replacement = '-' } = {}) { if (!isString(str)) throw "`sanitizeSlug` only accepts strings as input."; if (!isString(replacement)) throw "the `sanitizeSlug` replacement character must be a string."; From ddcf009fc642ad8cc080217d8766533d96f5e488 Mon Sep 17 00:00:00 2001 From: Darrel O'Pry Date: Tue, 3 Oct 2017 19:28:28 -0400 Subject: [PATCH 14/19] add sanitizeSlug tests. --- src/lib/__tests__/urlHelper.spec.js | 40 ++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/src/lib/__tests__/urlHelper.spec.js b/src/lib/__tests__/urlHelper.spec.js index 9116fa41..5258adfc 100644 --- a/src/lib/__tests__/urlHelper.spec.js +++ b/src/lib/__tests__/urlHelper.spec.js @@ -1,4 +1,4 @@ -import { sanitizeIRI } from '../urlHelper'; +import { sanitizeIRI, sanitizeSlug } from '../urlHelper'; describe('sanitizeIRI', () => { // `sanitizeIRI` tests from RFC 3987 @@ -50,3 +50,41 @@ describe('sanitizeIRI', () => { ).not.toEqual("%F0%9F%8E%89"); }); }); + + +describe('sanitizeSlug', ()=> { + + it('throws an error for non-strings', () => { + expect(() => sanitizeSlug({})).toThrowError("`sanitizeSlug` only accepts strings as input."); + expect(() => sanitizeSlug([])).toThrowError("`sanitizeSlug` only accepts strings as input."); + expect(() => sanitizeSlug(false)).toThrowError("`sanitizeSlug` only accepts strings as input."); + expect(() => sanitizeSlug(null)).toThrowError("`sanitizeSlug` only accepts strings as input."); + expect(() => sanitizeSlug(11234)).toThrowError("`sanitizeSlug` only accepts strings as input."); + expect(() => sanitizeSlug(undefined)).toThrowError("`sanitizeSlug` only accepts strings as input."); + expect(() => sanitizeSlug(()=>{})).toThrowError("`sanitizeSlug` only accepts strings as input."); + }); + + it('throws an error for non-string replacements', () => { + expect(() => sanitizeSlug('test', { replacement: {} })).toThrowError("the `sanitizeSlug` replacement character must be a string."); + expect(() => sanitizeSlug('test', { replacement: [] })).toThrowError("the `sanitizeSlug` replacement character must be a string."); + expect(() => sanitizeSlug('test', { replacement: false })).toThrowError("the `sanitizeSlug` replacement character must be a string."); + expect(() => sanitizeSlug('test', { replacement: null } )).toThrowError("the `sanitizeSlug` replacement character must be a string."); + expect(() => sanitizeSlug('test', { replacement: 11232 })).toThrowError("the `sanitizeSlug` replacement character must be a string."); + // do not test undefined for this variant since a default is set in the cosntructor. + //expect(() => sanitizeSlug('test', { replacement: undefined })).toThrowError("the `sanitizeSlug` replacement character must be a string."); + expect(() => sanitizeSlug('test', { replacement: ()=>{} })).toThrowError("the `sanitizeSlug` replacement character must be a string."); + }); + + it('removes double replacements', () => { + expect(sanitizeSlug('test test')).toEqual('test-test'); + }); + + it('removes trailing replacemenets', () => { + expect(sanitizeSlug('test test ')).toEqual('test-test'); + }); + + it('uses alternate replacements', () => { + expect(sanitizeSlug('test test ', { replacement: '_' })).toEqual('test_test'); + }); + +}); \ No newline at end of file From b8006bbcbe800bffc17b2f2ffc31e98c90f8c926 Mon Sep 17 00:00:00 2001 From: Caleb Date: Tue, 3 Oct 2017 18:06:15 -0600 Subject: [PATCH 15/19] Update slug sanitization errors. --- src/lib/__tests__/urlHelper.spec.js | 28 ++++++++++++++-------------- src/lib/urlHelper.js | 8 +++++--- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/src/lib/__tests__/urlHelper.spec.js b/src/lib/__tests__/urlHelper.spec.js index 5258adfc..78cb5162 100644 --- a/src/lib/__tests__/urlHelper.spec.js +++ b/src/lib/__tests__/urlHelper.spec.js @@ -55,24 +55,24 @@ describe('sanitizeIRI', () => { describe('sanitizeSlug', ()=> { it('throws an error for non-strings', () => { - expect(() => sanitizeSlug({})).toThrowError("`sanitizeSlug` only accepts strings as input."); - expect(() => sanitizeSlug([])).toThrowError("`sanitizeSlug` only accepts strings as input."); - expect(() => sanitizeSlug(false)).toThrowError("`sanitizeSlug` only accepts strings as input."); - expect(() => sanitizeSlug(null)).toThrowError("`sanitizeSlug` only accepts strings as input."); - expect(() => sanitizeSlug(11234)).toThrowError("`sanitizeSlug` only accepts strings as input."); - expect(() => sanitizeSlug(undefined)).toThrowError("`sanitizeSlug` only accepts strings as input."); - expect(() => sanitizeSlug(()=>{})).toThrowError("`sanitizeSlug` only accepts strings as input."); + expect(() => sanitizeSlug({})).toThrowError("The input slug must be a string."); + expect(() => sanitizeSlug([])).toThrowError("The input slug must be a string."); + expect(() => sanitizeSlug(false)).toThrowError("The input slug must be a string."); + expect(() => sanitizeSlug(null)).toThrowError("The input slug must be a string."); + expect(() => sanitizeSlug(11234)).toThrowError("The input slug must be a string."); + expect(() => sanitizeSlug(undefined)).toThrowError("The input slug must be a string."); + expect(() => sanitizeSlug(()=>{})).toThrowError("The input slug must be a string."); }); it('throws an error for non-string replacements', () => { - expect(() => sanitizeSlug('test', { replacement: {} })).toThrowError("the `sanitizeSlug` replacement character must be a string."); - expect(() => sanitizeSlug('test', { replacement: [] })).toThrowError("the `sanitizeSlug` replacement character must be a string."); - expect(() => sanitizeSlug('test', { replacement: false })).toThrowError("the `sanitizeSlug` replacement character must be a string."); - expect(() => sanitizeSlug('test', { replacement: null } )).toThrowError("the `sanitizeSlug` replacement character must be a string."); - expect(() => sanitizeSlug('test', { replacement: 11232 })).toThrowError("the `sanitizeSlug` replacement character must be a string."); + expect(() => sanitizeSlug('test', { replacement: {} })).toThrowError("`options.replacement` must be a string."); + expect(() => sanitizeSlug('test', { replacement: [] })).toThrowError("`options.replacement` must be a string."); + expect(() => sanitizeSlug('test', { replacement: false })).toThrowError("`options.replacement` must be a string."); + expect(() => sanitizeSlug('test', { replacement: null } )).toThrowError("`options.replacement` must be a string."); + expect(() => sanitizeSlug('test', { replacement: 11232 })).toThrowError("`options.replacement` must be a string."); // do not test undefined for this variant since a default is set in the cosntructor. - //expect(() => sanitizeSlug('test', { replacement: undefined })).toThrowError("the `sanitizeSlug` replacement character must be a string."); - expect(() => sanitizeSlug('test', { replacement: ()=>{} })).toThrowError("the `sanitizeSlug` replacement character must be a string."); + //expect(() => sanitizeSlug('test', { replacement: undefined })).toThrowError("`options.replacement` must be a string."); + expect(() => sanitizeSlug('test', { replacement: ()=>{} })).toThrowError("`options.replacement` must be a string."); }); it('removes double replacements', () => { diff --git a/src/lib/urlHelper.js b/src/lib/urlHelper.js index a6bb71cf..624c6358 100644 --- a/src/lib/urlHelper.js +++ b/src/lib/urlHelper.js @@ -25,10 +25,12 @@ const uriChars = /[\w\-.~]/i; const ucsChars = /[\xA0-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E1000}-\u{EFFFD}]/u; // `sanitizeIRI` does not actually URI-encode the chars (that is the browser's and server's job), just removes the ones that are not allowed. export function sanitizeIRI(str, { replacement = "" } = {}) { + if (!isString(replacement)) throw "`options.replacement` must be a string."; if (replacement !== "") { const validReplacement = (sanitizeIRI(replacement) === replacement); - if (!validReplacement) throw "The replacement character(s) for `sanitizeIRI` is itself unsafe."; + if (!validReplacement) throw "The replacement character(s) (options.replacement) is itself unsafe."; } + let result = ""; // We cannot use a `map` function here because `string.split()` // splits things like emojis into UTF-16 surrogate pairs, @@ -44,8 +46,8 @@ export function sanitizeIRI(str, { replacement = "" } = {}) { } export function sanitizeSlug(str, { replacement = '-' } = {}) { - if (!isString(str)) throw "`sanitizeSlug` only accepts strings as input."; - if (!isString(replacement)) throw "the `sanitizeSlug` replacement character must be a string."; + if (!isString(str)) throw "The input slug must be a string."; + if (!isString(replacement)) throw "`options.replacement` must be a string."; // Sanitize as IRI (i18n URI) and as filename. const sanitize = flow([ From 451c69cb877e27de481ccf65b3c7f1b95255e239 Mon Sep 17 00:00:00 2001 From: Caleb Date: Tue, 3 Oct 2017 18:46:12 -0600 Subject: [PATCH 16/19] Fix alternate replacement not always working in `sanitizeSlug`. --- src/lib/urlHelper.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/urlHelper.js b/src/lib/urlHelper.js index 624c6358..c664265a 100644 --- a/src/lib/urlHelper.js +++ b/src/lib/urlHelper.js @@ -60,7 +60,7 @@ export function sanitizeSlug(str, { replacement = '-' } = {}) { const doubleReplacement = new RegExp('(?:' + escapeRegExp(replacement) + ')+', 'g'); const trailingReplacment = new RegExp(escapeRegExp(replacement) + '$'); const normalizedSlug = sanitizedSlug - .replace(doubleReplacement, '-') + .replace(doubleReplacement, replacement) .replace(trailingReplacment, ''); return normalizedSlug; From fdc4054fbd023338bbffcd70c149bc23d44fbe5e Mon Sep 17 00:00:00 2001 From: Caleb Date: Tue, 3 Oct 2017 19:21:58 -0600 Subject: [PATCH 17/19] Remove recursion for `sanitizeIRI` replacement checking. --- src/lib/urlHelper.js | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/src/lib/urlHelper.js b/src/lib/urlHelper.js index c664265a..ac8d830d 100644 --- a/src/lib/urlHelper.js +++ b/src/lib/urlHelper.js @@ -24,25 +24,34 @@ export function getNewEntryUrl(collectionName, direct) { const uriChars = /[\w\-.~]/i; const ucsChars = /[\xA0-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E1000}-\u{EFFFD}]/u; // `sanitizeIRI` does not actually URI-encode the chars (that is the browser's and server's job), just removes the ones that are not allowed. -export function sanitizeIRI(str, { replacement = "" } = {}) { +function sanitizeIRI(str, { replacement = "" } = {}) { + if (!isString(str)) throw "The input slug must be a string."; if (!isString(replacement)) throw "`options.replacement` must be a string."; + + // This is where sanitization is actually done. + const sanitize = (input) => { + let result = ""; + // We cannot use a `map` function here because `string.split()` + // splits things like emojis into UTF-16 surrogate pairs, + // and we want to use UTF-8 (it isn't required, but is nicer). + for (const char of input) { + if (uriChars.test(char) || ucsChars.test(char)) { + result += char; + } else { + result += replacement; + } + } + return result; + } + + // Check and make sure the replacement character is actually a safe char itself. if (replacement !== "") { - const validReplacement = (sanitizeIRI(replacement) === replacement); + const validReplacement = (sanitize(replacement) === replacement); if (!validReplacement) throw "The replacement character(s) (options.replacement) is itself unsafe."; } - let result = ""; - // We cannot use a `map` function here because `string.split()` - // splits things like emojis into UTF-16 surrogate pairs, - // and we want to use UTF-8 (it isn't required, but is nicer). - for (const char of str) { - if (uriChars.test(char) || ucsChars.test(char)) { - result += char; - } else { - result += replacement; - } - } - return result; + // Actually do the sanitization. + return sanitize(str); } export function sanitizeSlug(str, { replacement = '-' } = {}) { From 793afc8b736a95a3729f75547660eccf9f670189 Mon Sep 17 00:00:00 2001 From: Caleb Date: Tue, 3 Oct 2017 20:21:56 -0600 Subject: [PATCH 18/19] Remove mutation from `sanitizeIRI`. --- src/lib/urlHelper.js | 29 ++++++----------------------- 1 file changed, 6 insertions(+), 23 deletions(-) diff --git a/src/lib/urlHelper.js b/src/lib/urlHelper.js index ac8d830d..0080c02f 100644 --- a/src/lib/urlHelper.js +++ b/src/lib/urlHelper.js @@ -23,35 +23,18 @@ export function getNewEntryUrl(collectionName, direct) { */ const uriChars = /[\w\-.~]/i; const ucsChars = /[\xA0-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E1000}-\u{EFFFD}]/u; +const validIRIChar = (char) => (uriChars.test(char) || ucsChars.test(char)); // `sanitizeIRI` does not actually URI-encode the chars (that is the browser's and server's job), just removes the ones that are not allowed. -function sanitizeIRI(str, { replacement = "" } = {}) { +export function sanitizeIRI(str, { replacement = "" } = {}) { if (!isString(str)) throw "The input slug must be a string."; if (!isString(replacement)) throw "`options.replacement` must be a string."; - // This is where sanitization is actually done. - const sanitize = (input) => { - let result = ""; - // We cannot use a `map` function here because `string.split()` - // splits things like emojis into UTF-16 surrogate pairs, - // and we want to use UTF-8 (it isn't required, but is nicer). - for (const char of input) { - if (uriChars.test(char) || ucsChars.test(char)) { - result += char; - } else { - result += replacement; - } - } - return result; - } - // Check and make sure the replacement character is actually a safe char itself. - if (replacement !== "") { - const validReplacement = (sanitize(replacement) === replacement); - if (!validReplacement) throw "The replacement character(s) (options.replacement) is itself unsafe."; - } + if (!Array.from(replacement).every(validIRIChar)) throw "The replacement character(s) (options.replacement) is itself unsafe."; - // Actually do the sanitization. - return sanitize(str); + // `Array.from` must be used instead of `String.split` because + // `split` converts things like emojis into UTF-16 surrogate pairs. + return Array.from(str).map(char => (validIRIChar(char) ? char : replacement)).join(''); } export function sanitizeSlug(str, { replacement = '-' } = {}) { From 57202376ba10273901058dd656950a27b8eef43b Mon Sep 17 00:00:00 2001 From: Caleb Date: Tue, 3 Oct 2017 20:24:20 -0600 Subject: [PATCH 19/19] Update tests. Thanks @dopry! --- src/lib/__tests__/urlHelper.spec.js | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/lib/__tests__/urlHelper.spec.js b/src/lib/__tests__/urlHelper.spec.js index 78cb5162..13ebb7dc 100644 --- a/src/lib/__tests__/urlHelper.spec.js +++ b/src/lib/__tests__/urlHelper.spec.js @@ -75,7 +75,14 @@ describe('sanitizeSlug', ()=> { expect(() => sanitizeSlug('test', { replacement: ()=>{} })).toThrowError("`options.replacement` must be a string."); }); + it('should keep valid URI chars (letters digits _ - . ~)', () => { + expect( + sanitizeSlug("This, that-one_or.the~other 123!") + ).toEqual('This-that-one_or.the~other-123'); + }); + it('removes double replacements', () => { + expect(sanitizeSlug('test--test')).toEqual('test-test'); expect(sanitizeSlug('test test')).toEqual('test-test'); });