improve markdown entity escaping perf

2017-09-26 16:35:01 -04:00 · 2017-09-26 16:35:01 -04:00 · e2232e1067
commit e2232e1067
parent 516a5e4c7f
2 changed files with 100 additions and 97 deletions
--- a/src/components/Widgets/Markdown/serializers/remarkEscapeMarkdownEntities.js
+++ b/src/components/Widgets/Markdown/serializers/remarkEscapeMarkdownEntities.js
@ -1,4 +1,4 @@
-import { flow, partial, flatMap, flatten } from 'lodash';
+import { flow, partial, flatMap, flatten, map } from 'lodash';
 import { joinPatternSegments, combinePatterns, replaceWhen } from '../../../../lib/regexHelper';
 /**
@ -138,6 +138,100 @@ const escapePatterns = [
 ];
 /**
 * Generate new non-escape expression. The non-escape expression matches
 * substrings whose contents should not be processed for escaping.
 */
 const joinedNonEscapePatterns = map(nonEscapePatterns, pattern => {
  return new RegExp(joinPatternSegments(pattern));
 });
 const nonEscapePattern = combinePatterns(joinedNonEscapePatterns);
 /**
 * Create chain of successive escape functions for various markdown entities.
 */
 const escapeFunctions = escapePatterns.map(pattern => partial(escapeDelimiters, pattern));
 const escapeAll = flow(escapeFunctions);
 /**
 * Executes both the `escapeCommonChars` and `escapeLeadingChars` functions.
 */
 function escapeAllChars(text) {
  const partiallyEscapedMarkdown = escapeCommonChars(text);
  return escapeLeadingChars(partiallyEscapedMarkdown);
 }
 /**
 * escapeLeadingChars
 *
 * Handles escaping for characters that must be positioned at the beginning of
 * the string, such as headers and list items.
 *
 * Escapes '#', '*', '-', '>', '=', '|', and sequences of 3+ backticks or 4+
 * spaces when found at the beginning of a string, preceded by zero or more
 * whitespace characters.
 */
 function escapeLeadingChars(text) {
  return text.replace(/^\s*([-#*>=|]| {4,}|`{3,})/, '$`\\$1');
 }
 /**
 * escapeCommonChars
 *
 * Escapes active markdown entities. See escape pattern groups for details on
 * which entities are replaced.
 */
 function escapeCommonChars(text) {
  /**
   * Generate new non-escape expression (must happen at execution time because
   * we use `RegExp.exec`, which tracks it's own state internally).
   */
  const nonEscapeExpression = new RegExp(nonEscapePattern, 'gm');
  /**
   * Use `replaceWhen` to escape markdown entities only within substrings that
   * are eligible for escaping.
   */
  return replaceWhen(nonEscapeExpression, escapeAll, text, true);
 }
 /**
 * escapeDelimiters
 *
 * Executes `String.replace` for a given pattern, but only on the first two
 * capture groups. Specifically intended for escaping opening (and optionally
 * closing) markdown entities without escaping the content in between.
 */
 function escapeDelimiters(pattern, text) {
  return text.replace(pattern, (match, start, end) => {
    const hasEnd = typeof end === 'string';
    const matchSliceEnd = hasEnd ? match.length - end.length : match.length;
    const content = match.slice(start.length, matchSliceEnd);
    return `${escape(start)}${content}${hasEnd ? escape(end) : ''}`;
  });
 }
 /**
 * escape
 *
 * Simple replacement function for escaping markdown entities. Prepends every
 * character in the received string with a backslash.
 */
 function escape(delim) {
  let result = '';
  for (const char of delim) {
    result += `\\${char}`;
  }
  return result;
 }
 /**
 * A Remark plugin for escaping markdown entities.
 *
@ -178,90 +272,3 @@ export default function remarkEscapeMarkdownEntities() {
  return transform;
 }
 /**
 * Executes both the `escapeCommonChars` and `escapeLeadingChars` functions.
 */
 function escapeAllChars(text) {
  const partiallyEscapedMarkdown = escapeCommonChars(text);
  return escapeLeadingChars(partiallyEscapedMarkdown);
 }
 /**
 * escapeLeadingChars
 *
 * Handles escaping for characters that must be positioned at the beginning of
 * the string, such as headers and list items.
 *
 * Escapes '#', '*', '-', '>', '=', '|', and sequences of 3+ backticks or 4+
 * spaces when found at the beginning of a string, preceded by zero or more
 * whitespace characters.
 */
 function escapeLeadingChars(text) {
  return text.replace(/^\s*([-#*>=|]| {4,}|`{3,})/, '$`\\$1');
 }
 /**
 * escapeCommonChars
 *
 * Escapes active markdown entities. See escape pattern groups for details on
 * which entities are replaced.
 */
 function escapeCommonChars(text) {
  /**
   * Generate new non-escape expression (must happen at execution time because
   * we use `RegExp.exec`, which tracks it's own state internally). The
   * non-escape expression matches substrings whose contents should not be
   * processed for escaping.
   */
  const { htmlTags, preformattedHtmlBlocks } = nonEscapePatterns;
  const joinedNonEscapePatterns = [ htmlTags, preformattedHtmlBlocks ].map(p => joinPatternSegments(p));
  const nonEscapePattern = combinePatterns(joinedNonEscapePatterns, 'gm');
  /**
   * Create chain of successive escape functions for various markdown entities.
   */
  const escapeFunctions = escapePatterns.map(pattern => partial(escapeDelimiters, pattern));
  const escapeAll = flow(escapeFunctions);
  /**
   * Use `replaceWhen` to escape markdown entities only within substrings that
   * are eligible for escaping.
   */
  return replaceWhen(nonEscapePattern, escapeAll, text, true);
 }
 /**
 * escapeDelimiters
 *
 * Executes `String.replace` for a given pattern, but only on the first two
 * capture groups. Specifically intended for escaping opening (and optionally
 * closing) markdown entities without escaping the content in between.
 */
 function escapeDelimiters(pattern, text) {
  return text.replace(pattern, (match, start, end) => {
    const hasEnd = typeof end === 'string';
    const matchSliceEnd = hasEnd ? match.length - end.length : match.length;
    const content = match.slice(start.length, matchSliceEnd);
    return `${escape(start)}${content}${hasEnd ? escape(end) : ''}`;
  });
 }
 /**
 * escape
 *
 * Simple replacement function for escaping markdown entities. Prepends every
 * character in the received string with a backslash.
 */
 function escape(delim) {
  let result = '';
  for (const char of delim) {
    result += `\\${char}`;
  }
  return result;
 }
--- a/src/lib/regexHelper.js
+++ b/src/lib/regexHelper.js
@ -2,24 +2,20 @@ import { last } from 'lodash';
 /**
 * Joins an array of regular expressions into a single expression, without
- * altering the received expressions. Only flags passed as an argument will
+ * altering the received expressions.
 * apply to the resulting regular expression.
 */
-export function joinPatternSegments(patterns, flags = '') {
+export function joinPatternSegments(patterns) {
-  const pattern = patterns.map(p => p.source).join('');
+  return patterns.map(p => p.source).join('');
  return new RegExp(pattern, flags);
 }
 /**
 * Combines an array of regular expressions into a single expression, wrapping
 * each in a non-capturing group and interposing alternation characters (|) so
- * that each expression is executed separately. Only flags passed as an argument
+ * that each expression is executed separately.
 * will apply to the resulting regular expression.
 */
 export function combinePatterns(patterns, flags = '') {
-  const pattern = patterns.map(p => `(?:${p.source})`).join('|');
+  return patterns.map(p => `(?:${p.source})`).join('|');
  return new RegExp(pattern, flags);
 }