static-cms/src/lib/urlHelper.js

import url from 'url';
import sanitizeFilename from 'sanitize-filename';
import { isString, escapeRegExp, flow, partialRight } from 'lodash';

function getUrl(url, direct) {
  return `${ direct ? '/#' : '' }${ url }`;
}

export function getCollectionUrl(collectionName, direct) {
  return getUrl(`/collections/${ collectionName }`, direct);
}

export function getNewEntryUrl(collectionName, direct) {
  return getUrl(`/collections/${ collectionName }/new`, direct);
}

export function addParams(urlString, params) {
  const parsedUrl = url.parse(urlString, true);
  parsedUrl.query = { ...parsedUrl.query, ...params };
  return url.format(parsedUrl);
}

/* See https://www.w3.org/International/articles/idn-and-iri/#path.
 * According to the new IRI (Internationalized Resource Identifier) spec, RFC 3987,
 *   ASCII chars should be kept the same way as in standard URIs (letters digits _ - . ~).
 * Non-ASCII chars (unless they are not in the allowed "ucschars" list) should be percent-encoded.
 * If the string is not encoded in Unicode, it should be converted to UTF-8 and normalized first,
 *   but JS stores strings as UTF-16/UCS-2 internally, so we should not normallize or re-encode.
 */
const uriChars = /[\w\-.~]/i;
const ucsChars = /[\xA0-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E1000}-\u{EFFFD}]/u;
const validIRIChar = (char) => (uriChars.test(char) || ucsChars.test(char));
// `sanitizeIRI` does not actually URI-encode the chars (that is the browser's and server's job), just removes the ones that are not allowed.
export function sanitizeIRI(str, { replacement = "" } = {}) {
  if (!isString(str)) throw "The input slug must be a string.";
  if (!isString(replacement)) throw "`options.replacement` must be a string.";

  // Check and make sure the replacement character is actually a safe char itself.
  if (!Array.from(replacement).every(validIRIChar)) throw "The replacement character(s) (options.replacement) is itself unsafe.";

  // `Array.from` must be used instead of `String.split` because
  //   `split` converts things like emojis into UTF-16 surrogate pairs.
  return Array.from(str).map(char => (validIRIChar(char) ? char : replacement)).join('');
}

export function sanitizeSlug(str, { replacement = '-' } = {}) {
  if (!isString(str)) throw "The input slug must be a string.";
  if (!isString(replacement)) throw "`options.replacement` must be a string.";
  
  // Sanitize as IRI (i18n URI) and as filename.
  const sanitize = flow([
    partialRight(sanitizeIRI, { replacement }),
    partialRight(sanitizeFilename, { replacement }),
  ]);
  const sanitizedSlug = sanitize(str);
  
  // Remove any doubled or trailing replacement characters (that were added in the sanitizers).
  const doubleReplacement = new RegExp('(?:' + escapeRegExp(replacement) + ')+', 'g');
  const trailingReplacment = new RegExp(escapeRegExp(replacement) + '$');
  const normalizedSlug = sanitizedSlug
    .replace(doubleReplacement, replacement)
    .replace(trailingReplacment, '');

  return normalizedSlug;
}
add media library * rebase editorial workflow pull requests when behind * fix async/await transpilation * add media library pagination * switch media library to grid layout * ensure that only cms branches can be force updated 2017-08-14 09:00:47 -04:00			`import url from 'url';`
Move slug sanitizer to a seperate function. 2017-10-03 09:01:06 -06:00			`import sanitizeFilename from 'sanitize-filename';`
Make `sanitizeSlug` immutable. Thanks @erquhart! 2017-10-03 09:48:43 -06:00			`import { isString, escapeRegExp, flow, partialRight } from 'lodash';`
Update old netlify-auth-js to gotrue-js 2017-09-05 13:24:16 -07:00
update sidebar styling, add new entry links (#276) * udpate sidebar styling, add new entry links * make sidebar new entry links always visible * simplify app bar implementation, findbar 2017-03-15 14:32:41 -04:00			`function getUrl(url, direct) {`
			return `${ direct ? '/#' : '' }${ url }`;
			`}`

			`export function getCollectionUrl(collectionName, direct) {`
			return getUrl(`/collections/${ collectionName }`, direct);
			`}`

			`export function getNewEntryUrl(collectionName, direct) {`
Change "new entry" path to avoid slug conflicts. 2017-10-12 19:51:28 -06:00			return getUrl(`/collections/${ collectionName }/new`, direct);
update sidebar styling, add new entry links (#276) * udpate sidebar styling, add new entry links * make sidebar new entry links always visible * simplify app bar implementation, findbar 2017-03-15 14:32:41 -04:00			`}`
Update old netlify-auth-js to gotrue-js 2017-09-05 13:24:16 -07:00
add media library * rebase editorial workflow pull requests when behind * fix async/await transpilation * add media library pagination * switch media library to grid layout * ensure that only cms branches can be force updated 2017-08-14 09:00:47 -04:00			`export function addParams(urlString, params) {`
			`const parsedUrl = url.parse(urlString, true);`
			`parsedUrl.query = { ...parsedUrl.query, ...params };`
			`return url.format(parsedUrl);`
			`}`

Document slug sanitizing functions. 2017-10-03 10:07:20 -06:00			`/* See https://www.w3.org/International/articles/idn-and-iri/#path.`
			`* According to the new IRI (Internationalized Resource Identifier) spec, RFC 3987,`
			`* ASCII chars should be kept the same way as in standard URIs (letters digits _ - . ~).`
			`* Non-ASCII chars (unless they are not in the allowed "ucschars" list) should be percent-encoded.`
			`* If the string is not encoded in Unicode, it should be converted to UTF-8 and normalized first,`
			`* but JS stores strings as UTF-16/UCS-2 internally, so we should not normallize or re-encode.`
			`*/`
Re-implement standard slugification with IRIs instead of URIs. 2017-09-30 17:27:07 -06:00			`const uriChars = /[\w\-.~]/i;`
Make `ucschars` regex smaller. 2017-09-30 20:43:29 -06:00			`const ucsChars = /[\xA0-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E1000}-\u{EFFFD}]/u;`
Remove mutation from `sanitizeIRI`. 2017-10-03 20:21:56 -06:00			`const validIRIChar = (char) => (uriChars.test(char) \|\| ucsChars.test(char));`
Make sure `sanitizeIRI` replacement character is safe. 2017-10-03 14:57:03 -06:00			// `sanitizeIRI` does not actually URI-encode the chars (that is the browser's and server's job), just removes the ones that are not allowed.
Remove mutation from `sanitizeIRI`. 2017-10-03 20:21:56 -06:00			`export function sanitizeIRI(str, { replacement = "" } = {}) {`
Remove recursion for `sanitizeIRI` replacement checking. 2017-10-03 19:21:58 -06:00			`if (!isString(str)) throw "The input slug must be a string.";`
Update slug sanitization errors. 2017-10-03 18:06:15 -06:00			if (!isString(replacement)) throw "`options.replacement` must be a string.";
Remove recursion for `sanitizeIRI` replacement checking. 2017-10-03 19:21:58 -06:00
			`// Check and make sure the replacement character is actually a safe char itself.`
Remove mutation from `sanitizeIRI`. 2017-10-03 20:21:56 -06:00			`if (!Array.from(replacement).every(validIRIChar)) throw "The replacement character(s) (options.replacement) is itself unsafe.";`
Update slug sanitization errors. 2017-10-03 18:06:15 -06:00
Remove mutation from `sanitizeIRI`. 2017-10-03 20:21:56 -06:00			// `Array.from` must be used instead of `String.split` because
			// `split` converts things like emojis into UTF-16 surrogate pairs.
			`return Array.from(str).map(char => (validIRIChar(char) ? char : replacement)).join('');`
Re-implement standard slugification with IRIs instead of URIs. 2017-09-30 17:27:07 -06:00			`}`

Allow empty options object for `sanitizeSlug`. 2017-10-03 16:08:23 -06:00			`export function sanitizeSlug(str, { replacement = '-' } = {}) {`
Update slug sanitization errors. 2017-10-03 18:06:15 -06:00			`if (!isString(str)) throw "The input slug must be a string.";`
			if (!isString(replacement)) throw "`options.replacement` must be a string.";
Make `sanitizeSlug` immutable. Thanks @erquhart! 2017-10-03 09:48:43 -06:00
Move slug sanitizer to a seperate function. 2017-10-03 09:01:06 -06:00			`// Sanitize as IRI (i18n URI) and as filename.`
Make `sanitizeSlug` immutable. Thanks @erquhart! 2017-10-03 09:48:43 -06:00			`const sanitize = flow([`
			`partialRight(sanitizeIRI, { replacement }),`
			`partialRight(sanitizeFilename, { replacement }),`
			`]);`
			`const sanitizedSlug = sanitize(str);`

Move slug sanitizer to a seperate function. 2017-10-03 09:01:06 -06:00			`// Remove any doubled or trailing replacement characters (that were added in the sanitizers).`
			`const doubleReplacement = new RegExp('(?:' + escapeRegExp(replacement) + ')+', 'g');`
Make `sanitizeSlug` immutable. Thanks @erquhart! 2017-10-03 09:48:43 -06:00			`const trailingReplacment = new RegExp(escapeRegExp(replacement) + '$');`
			`const normalizedSlug = sanitizedSlug`
Fix alternate replacement not always working in `sanitizeSlug`. 2017-10-03 18:46:12 -06:00			`.replace(doubleReplacement, replacement)`
Make `sanitizeSlug` immutable. Thanks @erquhart! 2017-10-03 09:48:43 -06:00			`.replace(trailingReplacment, '');`
Move slug sanitizer to a seperate function. 2017-10-03 09:01:06 -06:00
Make `sanitizeSlug` immutable. Thanks @erquhart! 2017-10-03 09:48:43 -06:00			`return normalizedSlug;`
Move slug sanitizer to a seperate function. 2017-10-03 09:01:06 -06:00			`}`