Remove mutation from sanitizeIRI
.
This commit is contained in:
@ -23,35 +23,18 @@ export function getNewEntryUrl(collectionName, direct) {
|
|||||||
*/
|
*/
|
||||||
const uriChars = /[\w\-.~]/i;
|
const uriChars = /[\w\-.~]/i;
|
||||||
const ucsChars = /[\xA0-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E1000}-\u{EFFFD}]/u;
|
const ucsChars = /[\xA0-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E1000}-\u{EFFFD}]/u;
|
||||||
|
const validIRIChar = (char) => (uriChars.test(char) || ucsChars.test(char));
|
||||||
// `sanitizeIRI` does not actually URI-encode the chars (that is the browser's and server's job), just removes the ones that are not allowed.
|
// `sanitizeIRI` does not actually URI-encode the chars (that is the browser's and server's job), just removes the ones that are not allowed.
|
||||||
function sanitizeIRI(str, { replacement = "" } = {}) {
|
export function sanitizeIRI(str, { replacement = "" } = {}) {
|
||||||
if (!isString(str)) throw "The input slug must be a string.";
|
if (!isString(str)) throw "The input slug must be a string.";
|
||||||
if (!isString(replacement)) throw "`options.replacement` must be a string.";
|
if (!isString(replacement)) throw "`options.replacement` must be a string.";
|
||||||
|
|
||||||
// This is where sanitization is actually done.
|
|
||||||
const sanitize = (input) => {
|
|
||||||
let result = "";
|
|
||||||
// We cannot use a `map` function here because `string.split()`
|
|
||||||
// splits things like emojis into UTF-16 surrogate pairs,
|
|
||||||
// and we want to use UTF-8 (it isn't required, but is nicer).
|
|
||||||
for (const char of input) {
|
|
||||||
if (uriChars.test(char) || ucsChars.test(char)) {
|
|
||||||
result += char;
|
|
||||||
} else {
|
|
||||||
result += replacement;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check and make sure the replacement character is actually a safe char itself.
|
// Check and make sure the replacement character is actually a safe char itself.
|
||||||
if (replacement !== "") {
|
if (!Array.from(replacement).every(validIRIChar)) throw "The replacement character(s) (options.replacement) is itself unsafe.";
|
||||||
const validReplacement = (sanitize(replacement) === replacement);
|
|
||||||
if (!validReplacement) throw "The replacement character(s) (options.replacement) is itself unsafe.";
|
|
||||||
}
|
|
||||||
|
|
||||||
// Actually do the sanitization.
|
// `Array.from` must be used instead of `String.split` because
|
||||||
return sanitize(str);
|
// `split` converts things like emojis into UTF-16 surrogate pairs.
|
||||||
|
return Array.from(str).map(char => (validIRIChar(char) ? char : replacement)).join('');
|
||||||
}
|
}
|
||||||
|
|
||||||
export function sanitizeSlug(str, { replacement = '-' } = {}) {
|
export function sanitizeSlug(str, { replacement = '-' } = {}) {
|
||||||
|
Reference in New Issue
Block a user