Merge pull request #640 from netlify/utf8-slugs
Support Unicode characters in slugs
This commit is contained in:
commit
876cb2ca94
@ -149,12 +149,12 @@
|
||||
"remark-parse": "^3.0.1",
|
||||
"remark-rehype": "^2.0.0",
|
||||
"remark-stringify": "^3.0.1",
|
||||
"sanitize-filename": "^1.6.1",
|
||||
"semaphore": "^1.0.5",
|
||||
"slate": "^0.21.0",
|
||||
"slate-edit-list": "^0.7.1",
|
||||
"slate-edit-table": "^0.10.1",
|
||||
"slate-soft-break": "^0.3.0",
|
||||
"slug": "^0.9.1",
|
||||
"toml-j0.4": "^1.1.1",
|
||||
"unified": "^6.1.4",
|
||||
"unist-builder": "^1.0.2",
|
||||
|
@ -5,7 +5,7 @@ import GitGatewayBackend from "./git-gateway/implementation";
|
||||
import { resolveFormat } from "../formats/formats";
|
||||
import { selectListMethod, selectEntrySlug, selectEntryPath, selectAllowNewEntries, selectFolderEntryExtension } from "../reducers/collections";
|
||||
import { createEntry } from "../valueObjects/Entry";
|
||||
import slug from 'slug';
|
||||
import { sanitizeSlug } from "../lib/urlHelper";
|
||||
|
||||
class LocalStorageAuthStore {
|
||||
storageKey = "netlify-cms-user";
|
||||
@ -42,7 +42,7 @@ const slugFormatter = (template = "{{slug}}", entryData) => {
|
||||
return identifier;
|
||||
};
|
||||
|
||||
return template.replace(/\{\{([^\}]+)\}\}/g, (_, field) => {
|
||||
const slug = template.replace(/\{\{([^\}]+)\}\}/g, (_, field) => {
|
||||
switch (field) {
|
||||
case "year":
|
||||
return date.getFullYear();
|
||||
@ -51,11 +51,18 @@ const slugFormatter = (template = "{{slug}}", entryData) => {
|
||||
case "day":
|
||||
return (`0${ date.getDate() }`).slice(-2);
|
||||
case "slug":
|
||||
return slug(getIdentifier(entryData).trim(), {lower: true});
|
||||
return getIdentifier(entryData).trim();
|
||||
default:
|
||||
return slug(entryData.get(field, "").trim(), {lower: true});
|
||||
return entryData.get(field, "").trim();
|
||||
}
|
||||
});
|
||||
})
|
||||
// Convert slug to lower-case
|
||||
.toLocaleLowerCase()
|
||||
|
||||
// Replace periods and spaces with dashes.
|
||||
.replace(/[.\s]/g, '-');
|
||||
|
||||
return sanitizeSlug(slug);
|
||||
};
|
||||
|
||||
class Backend {
|
||||
|
97
src/lib/__tests__/urlHelper.spec.js
Normal file
97
src/lib/__tests__/urlHelper.spec.js
Normal file
@ -0,0 +1,97 @@
|
||||
import { sanitizeIRI, sanitizeSlug } from '../urlHelper';
|
||||
|
||||
describe('sanitizeIRI', () => {
|
||||
// `sanitizeIRI` tests from RFC 3987
|
||||
it('should keep valid URI chars (letters digits _ - . ~)', () => {
|
||||
expect(
|
||||
sanitizeIRI("This, that-one_or.the~other 123!")
|
||||
).toEqual('Thisthat-one_or.the~other123');
|
||||
});
|
||||
|
||||
it('should not remove accents', () => {
|
||||
expect(
|
||||
sanitizeIRI("ěščřžý")
|
||||
).toEqual('ěščřžý');
|
||||
});
|
||||
|
||||
it('should keep valid non-latin chars (ucschars in RFC 3987)', () => {
|
||||
expect(
|
||||
sanitizeIRI("日本語のタイトル")
|
||||
).toEqual('日本語のタイトル');
|
||||
});
|
||||
|
||||
it('should not normalize Unicode strings', () => {
|
||||
expect(
|
||||
sanitizeIRI('\u017F\u0323\u0307')
|
||||
).toEqual('\u017F\u0323\u0307');
|
||||
expect(
|
||||
sanitizeIRI('\u017F\u0323\u0307')
|
||||
).not.toEqual('\u1E9B\u0323');
|
||||
});
|
||||
|
||||
it('should allow a custom replacement character', () => {
|
||||
expect(
|
||||
sanitizeIRI("duck\\goose.elephant", { replacement: '-' })
|
||||
).toEqual('duck-goose.elephant');
|
||||
});
|
||||
|
||||
it('should not allow an improper replacement character', () => {
|
||||
expect(() => {
|
||||
sanitizeIRI("I! like! dollars!", { replacement: '$' });
|
||||
}).toThrow();
|
||||
});
|
||||
|
||||
it('should not actually URI-encode the characters', () => {
|
||||
expect(
|
||||
sanitizeIRI("🎉")
|
||||
).toEqual('🎉');
|
||||
expect(
|
||||
sanitizeIRI("🎉")
|
||||
).not.toEqual("%F0%9F%8E%89");
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
describe('sanitizeSlug', ()=> {
|
||||
|
||||
it('throws an error for non-strings', () => {
|
||||
expect(() => sanitizeSlug({})).toThrowError("The input slug must be a string.");
|
||||
expect(() => sanitizeSlug([])).toThrowError("The input slug must be a string.");
|
||||
expect(() => sanitizeSlug(false)).toThrowError("The input slug must be a string.");
|
||||
expect(() => sanitizeSlug(null)).toThrowError("The input slug must be a string.");
|
||||
expect(() => sanitizeSlug(11234)).toThrowError("The input slug must be a string.");
|
||||
expect(() => sanitizeSlug(undefined)).toThrowError("The input slug must be a string.");
|
||||
expect(() => sanitizeSlug(()=>{})).toThrowError("The input slug must be a string.");
|
||||
});
|
||||
|
||||
it('throws an error for non-string replacements', () => {
|
||||
expect(() => sanitizeSlug('test', { replacement: {} })).toThrowError("`options.replacement` must be a string.");
|
||||
expect(() => sanitizeSlug('test', { replacement: [] })).toThrowError("`options.replacement` must be a string.");
|
||||
expect(() => sanitizeSlug('test', { replacement: false })).toThrowError("`options.replacement` must be a string.");
|
||||
expect(() => sanitizeSlug('test', { replacement: null } )).toThrowError("`options.replacement` must be a string.");
|
||||
expect(() => sanitizeSlug('test', { replacement: 11232 })).toThrowError("`options.replacement` must be a string.");
|
||||
// do not test undefined for this variant since a default is set in the cosntructor.
|
||||
//expect(() => sanitizeSlug('test', { replacement: undefined })).toThrowError("`options.replacement` must be a string.");
|
||||
expect(() => sanitizeSlug('test', { replacement: ()=>{} })).toThrowError("`options.replacement` must be a string.");
|
||||
});
|
||||
|
||||
it('should keep valid URI chars (letters digits _ - . ~)', () => {
|
||||
expect(
|
||||
sanitizeSlug("This, that-one_or.the~other 123!")
|
||||
).toEqual('This-that-one_or.the~other-123');
|
||||
});
|
||||
|
||||
it('removes double replacements', () => {
|
||||
expect(sanitizeSlug('test--test')).toEqual('test-test');
|
||||
expect(sanitizeSlug('test test')).toEqual('test-test');
|
||||
});
|
||||
|
||||
it('removes trailing replacemenets', () => {
|
||||
expect(sanitizeSlug('test test ')).toEqual('test-test');
|
||||
});
|
||||
|
||||
it('uses alternate replacements', () => {
|
||||
expect(sanitizeSlug('test test ', { replacement: '_' })).toEqual('test_test');
|
||||
});
|
||||
|
||||
});
|
@ -1,4 +1,6 @@
|
||||
import url from 'url';
|
||||
import sanitizeFilename from 'sanitize-filename';
|
||||
import { isString, escapeRegExp, flow, partialRight } from 'lodash';
|
||||
|
||||
function getUrl(url, direct) {
|
||||
return `${ direct ? '/#' : '' }${ url }`;
|
||||
@ -12,6 +14,50 @@ export function getNewEntryUrl(collectionName, direct) {
|
||||
return getUrl(`/collections/${ collectionName }/entries/new`, direct);
|
||||
}
|
||||
|
||||
/* See https://www.w3.org/International/articles/idn-and-iri/#path.
|
||||
* According to the new IRI (Internationalized Resource Identifier) spec, RFC 3987,
|
||||
* ASCII chars should be kept the same way as in standard URIs (letters digits _ - . ~).
|
||||
* Non-ASCII chars (unless they are not in the allowed "ucschars" list) should be percent-encoded.
|
||||
* If the string is not encoded in Unicode, it should be converted to UTF-8 and normalized first,
|
||||
* but JS stores strings as UTF-16/UCS-2 internally, so we should not normallize or re-encode.
|
||||
*/
|
||||
const uriChars = /[\w\-.~]/i;
|
||||
const ucsChars = /[\xA0-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E1000}-\u{EFFFD}]/u;
|
||||
const validIRIChar = (char) => (uriChars.test(char) || ucsChars.test(char));
|
||||
// `sanitizeIRI` does not actually URI-encode the chars (that is the browser's and server's job), just removes the ones that are not allowed.
|
||||
export function sanitizeIRI(str, { replacement = "" } = {}) {
|
||||
if (!isString(str)) throw "The input slug must be a string.";
|
||||
if (!isString(replacement)) throw "`options.replacement` must be a string.";
|
||||
|
||||
// Check and make sure the replacement character is actually a safe char itself.
|
||||
if (!Array.from(replacement).every(validIRIChar)) throw "The replacement character(s) (options.replacement) is itself unsafe.";
|
||||
|
||||
// `Array.from` must be used instead of `String.split` because
|
||||
// `split` converts things like emojis into UTF-16 surrogate pairs.
|
||||
return Array.from(str).map(char => (validIRIChar(char) ? char : replacement)).join('');
|
||||
}
|
||||
|
||||
export function sanitizeSlug(str, { replacement = '-' } = {}) {
|
||||
if (!isString(str)) throw "The input slug must be a string.";
|
||||
if (!isString(replacement)) throw "`options.replacement` must be a string.";
|
||||
|
||||
// Sanitize as IRI (i18n URI) and as filename.
|
||||
const sanitize = flow([
|
||||
partialRight(sanitizeIRI, { replacement }),
|
||||
partialRight(sanitizeFilename, { replacement }),
|
||||
]);
|
||||
const sanitizedSlug = sanitize(str);
|
||||
|
||||
// Remove any doubled or trailing replacement characters (that were added in the sanitizers).
|
||||
const doubleReplacement = new RegExp('(?:' + escapeRegExp(replacement) + ')+', 'g');
|
||||
const trailingReplacment = new RegExp(escapeRegExp(replacement) + '$');
|
||||
const normalizedSlug = sanitizedSlug
|
||||
.replace(doubleReplacement, replacement)
|
||||
.replace(trailingReplacment, '');
|
||||
|
||||
return normalizedSlug;
|
||||
}
|
||||
|
||||
export function urlize(string) {
|
||||
const sanitized = makePathSanitized(string);
|
||||
const parsedURL = url.parse(sanitized);
|
||||
|
26
yarn.lock
26
yarn.lock
@ -7622,6 +7622,12 @@ sane@~1.6.0:
|
||||
walker "~1.0.5"
|
||||
watch "~0.10.0"
|
||||
|
||||
sanitize-filename@^1.6.1:
|
||||
version "1.6.1"
|
||||
resolved "https://registry.yarnpkg.com/sanitize-filename/-/sanitize-filename-1.6.1.tgz#612da1c96473fa02dccda92dcd5b4ab164a6772a"
|
||||
dependencies:
|
||||
truncate-utf8-bytes "^1.0.0"
|
||||
|
||||
sax@^1.2.1, sax@~1.2.1:
|
||||
version "1.2.4"
|
||||
resolved "https://registry.yarnpkg.com/sax/-/sax-1.2.4.tgz#2816234e2378bddc4e5354fab5caa895df7100d9"
|
||||
@ -7815,12 +7821,6 @@ slice-ansi@0.0.4:
|
||||
version "0.0.4"
|
||||
resolved "https://registry.yarnpkg.com/slice-ansi/-/slice-ansi-0.0.4.tgz#edbf8903f66f7ce2f8eafd6ceed65e264c831b35"
|
||||
|
||||
slug@^0.9.1:
|
||||
version "0.9.1"
|
||||
resolved "https://registry.yarnpkg.com/slug/-/slug-0.9.1.tgz#af08f608a7c11516b61778aa800dce84c518cfda"
|
||||
dependencies:
|
||||
unicode ">= 0.3.1"
|
||||
|
||||
sntp@1.x.x:
|
||||
version "1.0.9"
|
||||
resolved "https://registry.yarnpkg.com/sntp/-/sntp-1.0.9.tgz#6541184cc90aeea6c6e7b35e2659082443c66198"
|
||||
@ -8547,6 +8547,12 @@ trough@^1.0.0:
|
||||
version "1.0.1"
|
||||
resolved "https://registry.yarnpkg.com/trough/-/trough-1.0.1.tgz#a9fd8b0394b0ae8fff82e0633a0a36ccad5b5f86"
|
||||
|
||||
truncate-utf8-bytes@^1.0.0:
|
||||
version "1.0.2"
|
||||
resolved "https://registry.yarnpkg.com/truncate-utf8-bytes/-/truncate-utf8-bytes-1.0.2.tgz#405923909592d56f78a5818434b0b78489ca5f2b"
|
||||
dependencies:
|
||||
utf8-byte-length "^1.0.1"
|
||||
|
||||
tryit@^1.0.1:
|
||||
version "1.0.3"
|
||||
resolved "https://registry.yarnpkg.com/tryit/-/tryit-1.0.3.tgz#393be730a9446fd1ead6da59a014308f36c289cb"
|
||||
@ -8635,10 +8641,6 @@ unherit@^1.0.4:
|
||||
inherits "^2.0.1"
|
||||
xtend "^4.0.1"
|
||||
|
||||
"unicode@>= 0.3.1":
|
||||
version "9.0.1"
|
||||
resolved "https://registry.yarnpkg.com/unicode/-/unicode-9.0.1.tgz#104706272c6464c574801be1b086f7245cf25158"
|
||||
|
||||
unified@^6.1.4:
|
||||
version "6.1.5"
|
||||
resolved "https://registry.yarnpkg.com/unified/-/unified-6.1.5.tgz#716937872621a63135e62ced2f3ac6a063c6fb87"
|
||||
@ -8793,6 +8795,10 @@ user-home@^2.0.0:
|
||||
dependencies:
|
||||
os-homedir "^1.0.0"
|
||||
|
||||
utf8-byte-length@^1.0.1:
|
||||
version "1.0.4"
|
||||
resolved "https://registry.yarnpkg.com/utf8-byte-length/-/utf8-byte-length-1.0.4.tgz#f45f150c4c66eee968186505ab93fcbb8ad6bf61"
|
||||
|
||||
util-deprecate@~1.0.1:
|
||||
version "1.0.2"
|
||||
resolved "https://registry.yarnpkg.com/util-deprecate/-/util-deprecate-1.0.2.tgz#450d4dc9fa70de732762fbd2d4a28981419a0ccf"
|
||||
|
Loading…
x
Reference in New Issue
Block a user