Add option to strip Unicode from entry filenames (#1135)
This commit is contained in:
parent
2475af1870
commit
cd10a713d8
@ -130,6 +130,7 @@
|
||||
"dependencies": {
|
||||
"classnames": "^2.2.5",
|
||||
"create-react-class": "^15.6.0",
|
||||
"diacritics": "^1.3.0",
|
||||
"fuzzy": "^0.1.1",
|
||||
"gotrue-js": "^0.9.15",
|
||||
"gray-matter": "^3.0.6",
|
||||
|
@ -1,6 +1,6 @@
|
||||
import yaml from "js-yaml";
|
||||
import { Map, List, fromJS } from "immutable";
|
||||
import { trimStart, flow } from "lodash";
|
||||
import { trimStart, flow, isBoolean } from "lodash";
|
||||
import { authenticateUser } from "Actions/auth";
|
||||
import * as publishModes from "Constants/publishModes";
|
||||
|
||||
@ -43,6 +43,13 @@ export function validateConfig(config) {
|
||||
if (typeof config.get('media_folder') !== 'string') {
|
||||
throw new Error("Error in configuration file: Your `media_folder` must be a string. Check your config.yml file.");
|
||||
}
|
||||
const slug_encoding = config.getIn(['slug', 'encoding'], "unicode");
|
||||
if (slug_encoding !== "unicode" && slug_encoding !== "ascii") {
|
||||
throw new Error("Error in configuration file: Your `slug.encoding` must be either `unicode` or `ascii`. Check your config.yml file.")
|
||||
}
|
||||
if (!isBoolean(config.getIn(['slug', 'clean_accents'], false))) {
|
||||
throw new Error("Error in configuration file: Your `slug.clean_accents` must be a boolean. Check your config.yml file.");
|
||||
}
|
||||
if (!config.get('collections')) {
|
||||
throw new Error("Error in configuration file: A `collections` wasn\'t found. Check your config.yml file.");
|
||||
}
|
||||
|
@ -4,6 +4,7 @@ import { createAssetProxy } from 'ValueObjects/AssetProxy';
|
||||
import { getAsset, selectIntegration } from 'Reducers';
|
||||
import { getIntegrationProvider } from 'Integrations';
|
||||
import { addAsset } from './media';
|
||||
import { sanitizeSlug } from "Lib/urlHelper";
|
||||
|
||||
const { notifSend } = notifActions;
|
||||
|
||||
@ -79,7 +80,8 @@ export function persistMedia(file, opts = {}) {
|
||||
const backend = currentBackend(state.config);
|
||||
const integration = selectIntegration(state, null, 'assetStore');
|
||||
const files = state.mediaLibrary.get('files');
|
||||
const existingFile = files.find(existingFile => existingFile.name.toLowerCase() === file.name.toLowerCase());
|
||||
const fileName = sanitizeSlug(file.name.toLowerCase(), state.config.get('slug'));
|
||||
const existingFile = files.find(existingFile => existingFile.name.toLowerCase() === fileName);
|
||||
|
||||
/**
|
||||
* Check for existing files of the same name before persisting. If no asset
|
||||
@ -98,7 +100,7 @@ export function persistMedia(file, opts = {}) {
|
||||
dispatch(mediaPersisting());
|
||||
|
||||
try {
|
||||
const assetProxy = await createAssetProxy(file.name.toLowerCase(), file, false, privateUpload);
|
||||
const assetProxy = await createAssetProxy(fileName, file, false, privateUpload);
|
||||
dispatch(addAsset(assetProxy));
|
||||
if (!integration) {
|
||||
const asset = await backend.persistMedia(assetProxy);
|
||||
|
@ -41,7 +41,7 @@ class LocalStorageAuthStore {
|
||||
}
|
||||
}
|
||||
|
||||
const slugFormatter = (template = "{{slug}}", entryData) => {
|
||||
const slugFormatter = (template = "{{slug}}", entryData, slugConfig) => {
|
||||
const date = new Date();
|
||||
|
||||
const getIdentifier = (entryData) => {
|
||||
@ -76,10 +76,10 @@ const slugFormatter = (template = "{{slug}}", entryData) => {
|
||||
// Convert slug to lower-case
|
||||
.toLocaleLowerCase()
|
||||
|
||||
// Replace periods and spaces with dashes.
|
||||
.replace(/[.\s]/g, '-');
|
||||
// Replace periods with dashes.
|
||||
.replace(/[.]/g, '-');
|
||||
|
||||
return sanitizeSlug(slug);
|
||||
return sanitizeSlug(slug, slugConfig);
|
||||
};
|
||||
|
||||
class Backend {
|
||||
@ -242,7 +242,7 @@ class Backend {
|
||||
if (!selectAllowNewEntries(collection)) {
|
||||
throw (new Error("Not allowed to create new entries in this collection"));
|
||||
}
|
||||
const slug = slugFormatter(collection.get("slug"), entryDraft.getIn(["entry", "data"]));
|
||||
const slug = slugFormatter(collection.get("slug"), entryDraft.getIn(["entry", "data"]), config.get("slug"));
|
||||
const path = selectEntryPath(collection, slug);
|
||||
entryObj = {
|
||||
path,
|
||||
|
@ -1,52 +1,59 @@
|
||||
import { sanitizeIRI, sanitizeSlug } from '../urlHelper';
|
||||
import { Map } from 'immutable';
|
||||
import { sanitizeURI, sanitizeSlug } from '../urlHelper';
|
||||
|
||||
describe('sanitizeIRI', () => {
|
||||
// `sanitizeIRI` tests from RFC 3987
|
||||
describe('sanitizeURI', () => {
|
||||
// `sanitizeURI` tests from RFC 3987
|
||||
it('should keep valid URI chars (letters digits _ - . ~)', () => {
|
||||
expect(
|
||||
sanitizeIRI("This, that-one_or.the~other 123!")
|
||||
sanitizeURI("This, that-one_or.the~other 123!")
|
||||
).toEqual('Thisthat-one_or.the~other123');
|
||||
});
|
||||
|
||||
it('should not remove accents', () => {
|
||||
expect(
|
||||
sanitizeIRI("ěščřžý")
|
||||
sanitizeURI("ěščřžý")
|
||||
).toEqual('ěščřžý');
|
||||
});
|
||||
|
||||
it('should keep valid non-latin chars (ucschars in RFC 3987)', () => {
|
||||
expect(
|
||||
sanitizeIRI("日本語のタイトル")
|
||||
sanitizeURI("日本語のタイトル")
|
||||
).toEqual('日本語のタイトル');
|
||||
});
|
||||
|
||||
it('should not keep valid non-latin chars (ucschars in RFC 3987) if set to ASCII mode', () => {
|
||||
expect(
|
||||
sanitizeURI("ěščřžý日本語のタイトル", { encoding: 'ascii' })
|
||||
).toEqual('');
|
||||
});
|
||||
|
||||
it('should not normalize Unicode strings', () => {
|
||||
expect(
|
||||
sanitizeIRI('\u017F\u0323\u0307')
|
||||
sanitizeURI('\u017F\u0323\u0307')
|
||||
).toEqual('\u017F\u0323\u0307');
|
||||
expect(
|
||||
sanitizeIRI('\u017F\u0323\u0307')
|
||||
sanitizeURI('\u017F\u0323\u0307')
|
||||
).not.toEqual('\u1E9B\u0323');
|
||||
});
|
||||
|
||||
it('should allow a custom replacement character', () => {
|
||||
expect(
|
||||
sanitizeIRI("duck\\goose.elephant", { replacement: '-' })
|
||||
sanitizeURI("duck\\goose.elephant", { replacement: '-' })
|
||||
).toEqual('duck-goose.elephant');
|
||||
});
|
||||
|
||||
it('should not allow an improper replacement character', () => {
|
||||
expect(() => {
|
||||
sanitizeIRI("I! like! dollars!", { replacement: '$' });
|
||||
sanitizeURI("I! like! dollars!", { replacement: '$' });
|
||||
}).toThrow();
|
||||
});
|
||||
|
||||
it('should not actually URI-encode the characters', () => {
|
||||
expect(
|
||||
sanitizeIRI("🎉")
|
||||
sanitizeURI("🎉")
|
||||
).toEqual('🎉');
|
||||
expect(
|
||||
sanitizeIRI("🎉")
|
||||
sanitizeURI("🎉")
|
||||
).not.toEqual("%F0%9F%8E%89");
|
||||
});
|
||||
});
|
||||
@ -65,14 +72,14 @@ describe('sanitizeSlug', ()=> {
|
||||
});
|
||||
|
||||
it('throws an error for non-string replacements', () => {
|
||||
expect(() => sanitizeSlug('test', { replacement: {} })).toThrowError("`options.replacement` must be a string.");
|
||||
expect(() => sanitizeSlug('test', { replacement: [] })).toThrowError("`options.replacement` must be a string.");
|
||||
expect(() => sanitizeSlug('test', { replacement: false })).toThrowError("`options.replacement` must be a string.");
|
||||
expect(() => sanitizeSlug('test', { replacement: null } )).toThrowError("`options.replacement` must be a string.");
|
||||
expect(() => sanitizeSlug('test', { replacement: 11232 })).toThrowError("`options.replacement` must be a string.");
|
||||
expect(() => sanitizeSlug('test', Map({ sanitize_replacement: {} }))).toThrowError("`options.replacement` must be a string.");
|
||||
expect(() => sanitizeSlug('test', Map({ sanitize_replacement: [] }))).toThrowError("`options.replacement` must be a string.");
|
||||
expect(() => sanitizeSlug('test', Map({ sanitize_replacement: false }))).toThrowError("`options.replacement` must be a string.");
|
||||
expect(() => sanitizeSlug('test', Map({ sanitize_replacement: null } ))).toThrowError("`options.replacement` must be a string.");
|
||||
expect(() => sanitizeSlug('test', Map({ sanitize_replacement: 11232 }))).toThrowError("`options.replacement` must be a string.");
|
||||
// do not test undefined for this variant since a default is set in the cosntructor.
|
||||
//expect(() => sanitizeSlug('test', { replacement: undefined })).toThrowError("`options.replacement` must be a string.");
|
||||
expect(() => sanitizeSlug('test', { replacement: ()=>{} })).toThrowError("`options.replacement` must be a string.");
|
||||
//expect(() => sanitizeSlug('test', { sanitize_replacement: undefined })).toThrowError("`options.replacement` must be a string.");
|
||||
expect(() => sanitizeSlug('test', Map({ sanitize_replacement: ()=>{} }))).toThrowError("`options.replacement` must be a string.");
|
||||
});
|
||||
|
||||
it('should keep valid URI chars (letters digits _ - . ~)', () => {
|
||||
@ -81,6 +88,24 @@ describe('sanitizeSlug', ()=> {
|
||||
).toEqual('This-that-one_or.the~other-123');
|
||||
});
|
||||
|
||||
it('should remove accents with `clean_accents` set', () => {
|
||||
expect(
|
||||
sanitizeSlug("ěščřžý", Map({ clean_accents: true }))
|
||||
).toEqual('escrzy');
|
||||
});
|
||||
|
||||
it('should remove non-latin chars in "ascii" mode', () => {
|
||||
expect(
|
||||
sanitizeSlug("ěščřžý日本語のタイトル", Map({ encoding: 'ascii' }))
|
||||
).toEqual('');
|
||||
});
|
||||
|
||||
it('should clean accents and strip non-latin chars in "ascii" mode with `clean_accents` set', () => {
|
||||
expect(
|
||||
sanitizeSlug("ěščřžý日本語のタイトル", Map({ encoding: 'ascii', clean_accents: true }))
|
||||
).toEqual('escrzy');
|
||||
});
|
||||
|
||||
it('removes double replacements', () => {
|
||||
expect(sanitizeSlug('test--test')).toEqual('test-test');
|
||||
expect(sanitizeSlug('test test')).toEqual('test-test');
|
||||
@ -91,7 +116,7 @@ describe('sanitizeSlug', ()=> {
|
||||
});
|
||||
|
||||
it('uses alternate replacements', () => {
|
||||
expect(sanitizeSlug('test test ', { replacement: '_' })).toEqual('test_test');
|
||||
expect(sanitizeSlug('test test ', Map({ sanitize_replacement: '_' }))).toEqual('test_test');
|
||||
});
|
||||
|
||||
});
|
||||
});
|
||||
|
@ -1,9 +1,11 @@
|
||||
import url from 'url';
|
||||
import diacritics from 'diacritics';
|
||||
import sanitizeFilename from 'sanitize-filename';
|
||||
import { isString, escapeRegExp, flow, partialRight } from 'lodash';
|
||||
import { Map } from 'immutable';
|
||||
|
||||
function getUrl(url, direct) {
|
||||
return `${ direct ? '/#' : '' }${ url }`;
|
||||
function getUrl(urlString, direct) {
|
||||
return `${ direct ? '/#' : '' }${ urlString }`;
|
||||
}
|
||||
|
||||
export function getCollectionUrl(collectionName, direct) {
|
||||
@ -20,9 +22,9 @@ export function addParams(urlString, params) {
|
||||
return url.format(parsedUrl);
|
||||
}
|
||||
|
||||
export function stripProtocol(url) {
|
||||
const protocolEndIndex = url.indexOf('//');
|
||||
return protocolEndIndex > -1 ? url.slice(protocolEndIndex + 2) : url;
|
||||
export function stripProtocol(urlString) {
|
||||
const protocolEndIndex = urlString.indexOf('//');
|
||||
return protocolEndIndex > -1 ? urlString.slice(protocolEndIndex + 2) : urlString;
|
||||
}
|
||||
|
||||
/* See https://www.w3.org/International/articles/idn-and-iri/#path.
|
||||
@ -34,34 +36,52 @@ export function stripProtocol(url) {
|
||||
*/
|
||||
const uriChars = /[\w\-.~]/i;
|
||||
const ucsChars = /[\xA0-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E1000}-\u{EFFFD}]/u;
|
||||
const validIRIChar = (char) => (uriChars.test(char) || ucsChars.test(char));
|
||||
// `sanitizeIRI` does not actually URI-encode the chars (that is the browser's and server's job), just removes the ones that are not allowed.
|
||||
export function sanitizeIRI(str, { replacement = "" } = {}) {
|
||||
if (!isString(str)) throw "The input slug must be a string.";
|
||||
if (!isString(replacement)) throw "`options.replacement` must be a string.";
|
||||
const validURIChar = char => uriChars.test(char);
|
||||
const validIRIChar = char => uriChars.test(char) || ucsChars.test(char);
|
||||
// `sanitizeURI` does not actually URI-encode the chars (that is the browser's and server's job), just removes the ones that are not allowed.
|
||||
export function sanitizeURI(str, { replacement = "", encoding = "unicode" } = {}) {
|
||||
if (!isString(str)) {
|
||||
throw new Error("The input slug must be a string.");
|
||||
}
|
||||
if (!isString(replacement)) {
|
||||
throw new Error("`options.replacement` must be a string.");
|
||||
}
|
||||
|
||||
let validChar;
|
||||
if (encoding === "unicode") {
|
||||
validChar = validIRIChar;
|
||||
} else if (encoding === "ascii") {
|
||||
validChar = validURIChar;
|
||||
} else {
|
||||
throw new Error('`options.encoding` must be "unicode" or "ascii".');
|
||||
}
|
||||
|
||||
// Check and make sure the replacement character is actually a safe char itself.
|
||||
if (!Array.from(replacement).every(validIRIChar)) throw "The replacement character(s) (options.replacement) is itself unsafe.";
|
||||
if (!Array.from(replacement).every(validChar)) {
|
||||
throw new Error("The replacement character(s) (options.replacement) is itself unsafe.");
|
||||
}
|
||||
|
||||
// `Array.from` must be used instead of `String.split` because
|
||||
// `split` converts things like emojis into UTF-16 surrogate pairs.
|
||||
return Array.from(str).map(char => (validIRIChar(char) ? char : replacement)).join('');
|
||||
return Array.from(str).map(char => (validChar(char) ? char : replacement)).join('');
|
||||
}
|
||||
|
||||
export function sanitizeSlug(str, { replacement = '-' } = {}) {
|
||||
if (!isString(str)) throw "The input slug must be a string.";
|
||||
if (!isString(replacement)) throw "`options.replacement` must be a string.";
|
||||
export function sanitizeSlug(str, options = Map()) {
|
||||
const encoding = options.get('encoding', 'unicode');
|
||||
const stripDiacritics = options.get('clean_accents', false);
|
||||
const replacement = options.get('sanitize_replacement', '-');
|
||||
|
||||
if (!isString(str)) { throw new Error("The input slug must be a string."); }
|
||||
|
||||
// Sanitize as IRI (i18n URI) and as filename.
|
||||
const sanitize = flow([
|
||||
partialRight(sanitizeIRI, { replacement }),
|
||||
const sanitizedSlug = flow([
|
||||
...(stripDiacritics ? [diacritics.remove] : []),
|
||||
partialRight(sanitizeURI, { replacement, encoding }),
|
||||
partialRight(sanitizeFilename, { replacement }),
|
||||
]);
|
||||
const sanitizedSlug = sanitize(str);
|
||||
|
||||
])(str);
|
||||
|
||||
// Remove any doubled or trailing replacement characters (that were added in the sanitizers).
|
||||
const doubleReplacement = new RegExp('(?:' + escapeRegExp(replacement) + ')+', 'g');
|
||||
const trailingReplacment = new RegExp(escapeRegExp(replacement) + '$');
|
||||
const doubleReplacement = new RegExp(`(?:${ escapeRegExp(replacement) })+`, 'g');
|
||||
const trailingReplacment = new RegExp(`${ escapeRegExp(replacement) }$`);
|
||||
const normalizedSlug = sanitizedSlug
|
||||
.replace(doubleReplacement, replacement)
|
||||
.replace(trailingReplacment, '');
|
||||
|
@ -60,6 +60,24 @@ public_folder: "/images/uploads"
|
||||
|
||||
Based on the settings above, if a user used an image widget field called `avatar` to upload and select an image called `philosoraptor.png`, the image would be saved to the repository at `/static/images/uploads/philosoraptor.png`, and the `avatar` field for the file would be set to `/images/uploads/philosoraptor.png`.
|
||||
|
||||
## Slug Type
|
||||
|
||||
The `slug` option allows you to change how filenames for entries are created and sanitized. For modifying the actual data in a slug, see the per-collection option below.
|
||||
|
||||
`slug` accepts multiple options:
|
||||
|
||||
- `encoding`
|
||||
- `unicode` (default): Sanitize filenames (slugs) according to [RFC3987](https://tools.ietf.org/html/rfc3987) and the [WHATWG URL spec](https://url.spec.whatwg.org/). This spec allows non-ASCII (or non-Latin) characters to exist in URLs.
|
||||
- `ascii`: Sanitize filenames (slugs) according to [RFC3986](https://tools.ietf.org/html/rfc3986). The only allowed characters are (0-9, a-z, A-Z, `_`, `-`, `~`).
|
||||
- `clean_accents`: Set to `true` to remove diacritics from slug characters before sanitizing. This is often helpful when using `ascii` encoding.
|
||||
|
||||
**Example**
|
||||
|
||||
``` yaml
|
||||
slug:
|
||||
encoding: "ascii"
|
||||
clean_accents: true
|
||||
```
|
||||
|
||||
## Collections
|
||||
|
||||
|
@ -2333,6 +2333,10 @@ detect-node@^2.0.3:
|
||||
version "2.0.3"
|
||||
resolved "https://registry.yarnpkg.com/detect-node/-/detect-node-2.0.3.tgz#a2033c09cc8e158d37748fbde7507832bd6ce127"
|
||||
|
||||
diacritics@^1.3.0:
|
||||
version "1.3.0"
|
||||
resolved "https://registry.yarnpkg.com/diacritics/-/diacritics-1.3.0.tgz#3efa87323ebb863e6696cebb0082d48ff3d6f7a1"
|
||||
|
||||
diff@^3.2.0:
|
||||
version "3.4.0"
|
||||
resolved "https://registry.yarnpkg.com/diff/-/diff-3.4.0.tgz#b1d85507daf3964828de54b37d0d73ba67dda56c"
|
||||
|
Loading…
x
Reference in New Issue
Block a user