improve markdown entity escaping perf
This commit is contained in:
parent
516a5e4c7f
commit
e2232e1067
@ -1,4 +1,4 @@
|
|||||||
import { flow, partial, flatMap, flatten } from 'lodash';
|
import { flow, partial, flatMap, flatten, map } from 'lodash';
|
||||||
import { joinPatternSegments, combinePatterns, replaceWhen } from '../../../../lib/regexHelper';
|
import { joinPatternSegments, combinePatterns, replaceWhen } from '../../../../lib/regexHelper';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -138,6 +138,100 @@ const escapePatterns = [
|
|||||||
];
|
];
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate new non-escape expression. The non-escape expression matches
|
||||||
|
* substrings whose contents should not be processed for escaping.
|
||||||
|
*/
|
||||||
|
const joinedNonEscapePatterns = map(nonEscapePatterns, pattern => {
|
||||||
|
return new RegExp(joinPatternSegments(pattern));
|
||||||
|
});
|
||||||
|
const nonEscapePattern = combinePatterns(joinedNonEscapePatterns);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create chain of successive escape functions for various markdown entities.
|
||||||
|
*/
|
||||||
|
const escapeFunctions = escapePatterns.map(pattern => partial(escapeDelimiters, pattern));
|
||||||
|
const escapeAll = flow(escapeFunctions);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Executes both the `escapeCommonChars` and `escapeLeadingChars` functions.
|
||||||
|
*/
|
||||||
|
function escapeAllChars(text) {
|
||||||
|
const partiallyEscapedMarkdown = escapeCommonChars(text);
|
||||||
|
return escapeLeadingChars(partiallyEscapedMarkdown);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* escapeLeadingChars
|
||||||
|
*
|
||||||
|
* Handles escaping for characters that must be positioned at the beginning of
|
||||||
|
* the string, such as headers and list items.
|
||||||
|
*
|
||||||
|
* Escapes '#', '*', '-', '>', '=', '|', and sequences of 3+ backticks or 4+
|
||||||
|
* spaces when found at the beginning of a string, preceded by zero or more
|
||||||
|
* whitespace characters.
|
||||||
|
*/
|
||||||
|
function escapeLeadingChars(text) {
|
||||||
|
return text.replace(/^\s*([-#*>=|]| {4,}|`{3,})/, '$`\\$1');
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* escapeCommonChars
|
||||||
|
*
|
||||||
|
* Escapes active markdown entities. See escape pattern groups for details on
|
||||||
|
* which entities are replaced.
|
||||||
|
*/
|
||||||
|
function escapeCommonChars(text) {
|
||||||
|
/**
|
||||||
|
* Generate new non-escape expression (must happen at execution time because
|
||||||
|
* we use `RegExp.exec`, which tracks it's own state internally).
|
||||||
|
*/
|
||||||
|
const nonEscapeExpression = new RegExp(nonEscapePattern, 'gm');
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Use `replaceWhen` to escape markdown entities only within substrings that
|
||||||
|
* are eligible for escaping.
|
||||||
|
*/
|
||||||
|
return replaceWhen(nonEscapeExpression, escapeAll, text, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* escapeDelimiters
|
||||||
|
*
|
||||||
|
* Executes `String.replace` for a given pattern, but only on the first two
|
||||||
|
* capture groups. Specifically intended for escaping opening (and optionally
|
||||||
|
* closing) markdown entities without escaping the content in between.
|
||||||
|
*/
|
||||||
|
function escapeDelimiters(pattern, text) {
|
||||||
|
return text.replace(pattern, (match, start, end) => {
|
||||||
|
const hasEnd = typeof end === 'string';
|
||||||
|
const matchSliceEnd = hasEnd ? match.length - end.length : match.length;
|
||||||
|
const content = match.slice(start.length, matchSliceEnd);
|
||||||
|
return `${escape(start)}${content}${hasEnd ? escape(end) : ''}`;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* escape
|
||||||
|
*
|
||||||
|
* Simple replacement function for escaping markdown entities. Prepends every
|
||||||
|
* character in the received string with a backslash.
|
||||||
|
*/
|
||||||
|
function escape(delim) {
|
||||||
|
let result = '';
|
||||||
|
for (const char of delim) {
|
||||||
|
result += `\\${char}`;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A Remark plugin for escaping markdown entities.
|
* A Remark plugin for escaping markdown entities.
|
||||||
*
|
*
|
||||||
@ -178,90 +272,3 @@ export default function remarkEscapeMarkdownEntities() {
|
|||||||
|
|
||||||
return transform;
|
return transform;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Executes both the `escapeCommonChars` and `escapeLeadingChars` functions.
|
|
||||||
*/
|
|
||||||
function escapeAllChars(text) {
|
|
||||||
const partiallyEscapedMarkdown = escapeCommonChars(text);
|
|
||||||
return escapeLeadingChars(partiallyEscapedMarkdown);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* escapeLeadingChars
|
|
||||||
*
|
|
||||||
* Handles escaping for characters that must be positioned at the beginning of
|
|
||||||
* the string, such as headers and list items.
|
|
||||||
*
|
|
||||||
* Escapes '#', '*', '-', '>', '=', '|', and sequences of 3+ backticks or 4+
|
|
||||||
* spaces when found at the beginning of a string, preceded by zero or more
|
|
||||||
* whitespace characters.
|
|
||||||
*/
|
|
||||||
function escapeLeadingChars(text) {
|
|
||||||
return text.replace(/^\s*([-#*>=|]| {4,}|`{3,})/, '$`\\$1');
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* escapeCommonChars
|
|
||||||
*
|
|
||||||
* Escapes active markdown entities. See escape pattern groups for details on
|
|
||||||
* which entities are replaced.
|
|
||||||
*/
|
|
||||||
function escapeCommonChars(text) {
|
|
||||||
/**
|
|
||||||
* Generate new non-escape expression (must happen at execution time because
|
|
||||||
* we use `RegExp.exec`, which tracks it's own state internally). The
|
|
||||||
* non-escape expression matches substrings whose contents should not be
|
|
||||||
* processed for escaping.
|
|
||||||
*/
|
|
||||||
const { htmlTags, preformattedHtmlBlocks } = nonEscapePatterns;
|
|
||||||
const joinedNonEscapePatterns = [ htmlTags, preformattedHtmlBlocks ].map(p => joinPatternSegments(p));
|
|
||||||
const nonEscapePattern = combinePatterns(joinedNonEscapePatterns, 'gm');
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create chain of successive escape functions for various markdown entities.
|
|
||||||
*/
|
|
||||||
const escapeFunctions = escapePatterns.map(pattern => partial(escapeDelimiters, pattern));
|
|
||||||
const escapeAll = flow(escapeFunctions);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Use `replaceWhen` to escape markdown entities only within substrings that
|
|
||||||
* are eligible for escaping.
|
|
||||||
*/
|
|
||||||
return replaceWhen(nonEscapePattern, escapeAll, text, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* escapeDelimiters
|
|
||||||
*
|
|
||||||
* Executes `String.replace` for a given pattern, but only on the first two
|
|
||||||
* capture groups. Specifically intended for escaping opening (and optionally
|
|
||||||
* closing) markdown entities without escaping the content in between.
|
|
||||||
*/
|
|
||||||
function escapeDelimiters(pattern, text) {
|
|
||||||
return text.replace(pattern, (match, start, end) => {
|
|
||||||
const hasEnd = typeof end === 'string';
|
|
||||||
const matchSliceEnd = hasEnd ? match.length - end.length : match.length;
|
|
||||||
const content = match.slice(start.length, matchSliceEnd);
|
|
||||||
return `${escape(start)}${content}${hasEnd ? escape(end) : ''}`;
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* escape
|
|
||||||
*
|
|
||||||
* Simple replacement function for escaping markdown entities. Prepends every
|
|
||||||
* character in the received string with a backslash.
|
|
||||||
*/
|
|
||||||
function escape(delim) {
|
|
||||||
let result = '';
|
|
||||||
for (const char of delim) {
|
|
||||||
result += `\\${char}`;
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
@ -2,24 +2,20 @@ import { last } from 'lodash';
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Joins an array of regular expressions into a single expression, without
|
* Joins an array of regular expressions into a single expression, without
|
||||||
* altering the received expressions. Only flags passed as an argument will
|
* altering the received expressions.
|
||||||
* apply to the resulting regular expression.
|
|
||||||
*/
|
*/
|
||||||
export function joinPatternSegments(patterns, flags = '') {
|
export function joinPatternSegments(patterns) {
|
||||||
const pattern = patterns.map(p => p.source).join('');
|
return patterns.map(p => p.source).join('');
|
||||||
return new RegExp(pattern, flags);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Combines an array of regular expressions into a single expression, wrapping
|
* Combines an array of regular expressions into a single expression, wrapping
|
||||||
* each in a non-capturing group and interposing alternation characters (|) so
|
* each in a non-capturing group and interposing alternation characters (|) so
|
||||||
* that each expression is executed separately. Only flags passed as an argument
|
* that each expression is executed separately.
|
||||||
* will apply to the resulting regular expression.
|
|
||||||
*/
|
*/
|
||||||
export function combinePatterns(patterns, flags = '') {
|
export function combinePatterns(patterns, flags = '') {
|
||||||
const pattern = patterns.map(p => `(?:${p.source})`).join('|');
|
return patterns.map(p => `(?:${p.source})`).join('|');
|
||||||
return new RegExp(pattern, flags);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user