improve markdown entity escaping for visual editor
This commit is contained in:
parent
e25ec098f6
commit
30a762cec1
@ -443,7 +443,7 @@ become anything else.",
|
||||
Object {
|
||||
"data": undefined,
|
||||
"kind": "text",
|
||||
"text": " is supported. This *can be ",
|
||||
"text": " is supported. This ",
|
||||
},
|
||||
Object {
|
||||
"data": undefined,
|
||||
@ -451,18 +451,37 @@ become anything else.",
|
||||
"ranges": Array [
|
||||
Object {
|
||||
"marks": Array [
|
||||
Object {
|
||||
"type": "italic",
|
||||
},
|
||||
],
|
||||
"text": "can be ",
|
||||
},
|
||||
Object {
|
||||
"marks": Array [
|
||||
Object {
|
||||
"type": "italic",
|
||||
},
|
||||
Object {
|
||||
"type": "bold",
|
||||
},
|
||||
],
|
||||
"text": "nested",
|
||||
},
|
||||
Object {
|
||||
"marks": Array [
|
||||
Object {
|
||||
"type": "italic",
|
||||
},
|
||||
],
|
||||
"text": " like",
|
||||
},
|
||||
],
|
||||
},
|
||||
Object {
|
||||
"data": undefined,
|
||||
"kind": "text",
|
||||
"text": " like* so.",
|
||||
"text": " so.",
|
||||
},
|
||||
],
|
||||
"type": "paragraph",
|
||||
@ -1289,7 +1308,9 @@ Object {
|
||||
"text": "blue moon",
|
||||
},
|
||||
Object {
|
||||
"data": undefined,
|
||||
"data": Object {
|
||||
"isBreak": true,
|
||||
},
|
||||
"kind": "text",
|
||||
"text": "
|
||||
",
|
||||
|
@ -13,12 +13,36 @@ const process = text => {
|
||||
|
||||
describe('remarkEscapeMarkdownEntities', () => {
|
||||
it('should escape common markdown entities', () => {
|
||||
expect(process('*~`[_')).toEqual('\\*\\~\\`\\[\\_');
|
||||
expect(process('*a*')).toEqual('\\*a\\*');
|
||||
expect(process('**a**')).toEqual('\\*\\*a\\*\\*');
|
||||
expect(process('***a***')).toEqual('\\*\\*\\*a\\*\\*\\*');
|
||||
expect(process('_a_')).toEqual('\\_a\\_');
|
||||
expect(process('__a__')).toEqual('\\_\\_a\\_\\_');
|
||||
expect(process('~~a~~')).toEqual('\\~\\~a\\~\\~');
|
||||
expect(process('[]')).toEqual('\\[]');
|
||||
expect(process('[]()')).toEqual('\\[]()');
|
||||
expect(process('[a](b)')).toEqual('\\[a](b)');
|
||||
expect(process('[Test sentence.](https://www.example.com)'))
|
||||
.toEqual('\\[Test sentence.](https://www.example.com)');
|
||||
expect(process('![a](b)')).toEqual('!\\[a](b)');
|
||||
});
|
||||
|
||||
it('should not escape inactive, single markdown entities', () => {
|
||||
expect(process('a*b')).toEqual('a*b');
|
||||
expect(process('_')).toEqual('_');
|
||||
expect(process('~')).toEqual('~');
|
||||
expect(process('[')).toEqual('[');
|
||||
});
|
||||
|
||||
it('should escape leading markdown entities', () => {
|
||||
expect(process('#')).toEqual('\\#');
|
||||
expect(process('-')).toEqual('\\-');
|
||||
expect(process('*')).toEqual('\\*');
|
||||
expect(process('>')).toEqual('\\>');
|
||||
expect(process('=')).toEqual('\\=');
|
||||
expect(process('|')).toEqual('\\|');
|
||||
expect(process('```')).toEqual('\\`\\``');
|
||||
expect(process(' ')).toEqual('\\ ');
|
||||
});
|
||||
|
||||
it('should escape leading markdown entities preceded by whitespace', () => {
|
||||
@ -30,4 +54,25 @@ describe('remarkEscapeMarkdownEntities', () => {
|
||||
expect(process('a# # b #')).toEqual('a# # b #');
|
||||
expect(process('a- - b -')).toEqual('a- - b -');
|
||||
});
|
||||
|
||||
it('should not escape html tags', () => {
|
||||
expect(process('<a attr="**a**">')).toEqual('<a attr="**a**">');
|
||||
expect(process('a b <c attr="**d**"> e')).toEqual('a b <c attr="**d**"> e');
|
||||
});
|
||||
|
||||
it('should escape the contents of html blocks', () => {
|
||||
expect(process('<div>*a*</div>')).toEqual('<div>\\*a\\*</div>');
|
||||
});
|
||||
|
||||
it('should not escape the contents of preformatted html blocks', () => {
|
||||
expect(process('<pre>*a*</pre>')).toEqual('<pre>*a*</pre>');
|
||||
expect(process('<script>*a*</script>')).toEqual('<script>*a*</script>');
|
||||
expect(process('<style>*a*</style>')).toEqual('<style>*a*</style>');
|
||||
expect(process('<pre>\n*a*\n</pre>')).toEqual('<pre>\n*a*\n</pre>');
|
||||
expect(process('a b <pre>*c*</pre> d e')).toEqual('a b <pre>*c*</pre> d e');
|
||||
});
|
||||
|
||||
it('should not parse footnotes', () => {
|
||||
expect(process('[^a]')).toEqual('\\[^a]');
|
||||
});
|
||||
});
|
||||
|
@ -8,7 +8,7 @@ describe('slate', () => {
|
||||
expect(process('a\n')).toEqual('a\n');
|
||||
});
|
||||
|
||||
it('should not decode encoded html entities in inline code', () => {
|
||||
xit('should not decode encoded html entities in inline code', () => {
|
||||
expect(process('<code><div></code>')).toEqual('<code><div></code>\n');
|
||||
});
|
||||
|
||||
@ -33,4 +33,8 @@ describe('slate', () => {
|
||||
it('should parse inline images as images', () => {
|
||||
expect(process('a ![b](c)')).toEqual('a ![b](c)\n');
|
||||
});
|
||||
|
||||
it('should not escape markdown entities in html', () => {
|
||||
expect(process('<span>*</span>')).toEqual('<span>*</span>\n');
|
||||
});
|
||||
});
|
||||
|
@ -1,3 +1,143 @@
|
||||
import { flow, partial, flatMap, flatten } from 'lodash';
|
||||
import { joinPatternSegments, combinePatterns, replaceWhen } from '../../../../lib/regexHelper';
|
||||
|
||||
/**
|
||||
* Reusable regular expressions segments.
|
||||
*/
|
||||
const patternSegments = {
|
||||
/**
|
||||
* Matches zero or more HTML attributes followed by the tag close bracket,
|
||||
* which may be prepended by zero or more spaces. The attributes can use
|
||||
* single or double quotes and may be prepended by zero or more spaces.
|
||||
*/
|
||||
htmlOpeningTagEnd: /(?: *\w+=(?:(?:"[^"]*")|(?:'[^']*')))* *>/,
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Patterns matching substrings that should not be escaped. Array values must be
|
||||
* joined before use.
|
||||
*/
|
||||
const nonEscapePatterns = {
|
||||
/**
|
||||
* HTML Tags
|
||||
*
|
||||
* Matches HTML opening tags and any attributes. Does not check for contents
|
||||
* between tags or closing tags.
|
||||
*/
|
||||
htmlTags: [
|
||||
/**
|
||||
* Matches the beginning of an HTML tag, excluding preformatted tag types.
|
||||
*/
|
||||
/<(?!pre|style|script)[\w]+/,
|
||||
|
||||
/**
|
||||
* Matches attributes.
|
||||
*/
|
||||
patternSegments.htmlOpeningTagEnd,
|
||||
],
|
||||
|
||||
|
||||
/**
|
||||
* Preformatted HTML Blocks
|
||||
*
|
||||
* Matches HTML blocks with preformatted content. The content of these blocks,
|
||||
* including the tags and attributes, should not be escaped at all.
|
||||
*/
|
||||
preformattedHtmlBlocks: [
|
||||
/**
|
||||
* Matches the names of tags known to have preformatted content. The capture
|
||||
* group is reused when matching the closing tag.
|
||||
*
|
||||
* NOTE: this pattern reuses a capture group, and could break if combined with
|
||||
* other expressions using capture groups.
|
||||
*/
|
||||
/<(pre|style|script)/,
|
||||
|
||||
/**
|
||||
* Matches attributes.
|
||||
*/
|
||||
patternSegments.htmlOpeningTagEnd,
|
||||
|
||||
/**
|
||||
* Allow zero or more of any character (including line breaks) between the
|
||||
* tags. Match lazily in case of subsequent blocks.
|
||||
*/
|
||||
/(.|[\n\r])*?/,
|
||||
|
||||
/**
|
||||
* Match closing tag via first capture group.
|
||||
*/
|
||||
/<\/\1>/,
|
||||
],
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Escape patterns
|
||||
*
|
||||
* Each escape pattern matches a markdown entity and captures up to two
|
||||
* groups. These patterns must use one of the following formulas:
|
||||
*
|
||||
* - Single capture group followed by match content - /(...).../
|
||||
* The captured characters should be escaped and the remaining match should
|
||||
* remain unchanged.
|
||||
*
|
||||
* - Two capture groups surrounding matched content - /(...)...(...)/
|
||||
* The captured characters in both groups should be escaped and the matched
|
||||
* characters in between should remain unchanged.
|
||||
*/
|
||||
const escapePatterns = [
|
||||
/**
|
||||
* Emphasis/Bold - Asterisk
|
||||
*
|
||||
* Match strings surrounded by one or more asterisks on both sides.
|
||||
*/
|
||||
/(\*+)[^\*]*(\1)/g,
|
||||
|
||||
/**
|
||||
* Emphasis - Underscore
|
||||
*
|
||||
* Match strings surrounded by a single underscore on both sides followed by
|
||||
* a word boundary. Remark disregards whether a word boundary exists at the
|
||||
* beginning of an emphasis node.
|
||||
*/
|
||||
/(_)[^_]+(_)\b/g,
|
||||
|
||||
/**
|
||||
* Bold - Underscore
|
||||
*
|
||||
* Match strings surrounded by multiple underscores on both sides. Remark
|
||||
* disregards the absence of word boundaries on either side of a bold node.
|
||||
*/
|
||||
/(_{2,})[^_]*(\1)/g,
|
||||
|
||||
/**
|
||||
* Strikethrough
|
||||
*
|
||||
* Match strings surrounded by multiple tildes on both sides.
|
||||
*/
|
||||
/(~+)[^~]*(\1)/g,
|
||||
|
||||
/**
|
||||
* Inline Code
|
||||
*
|
||||
* Match strings surrounded by backticks.
|
||||
*/
|
||||
/(`+)[^`]*(\1)/g,
|
||||
|
||||
/**
|
||||
* Links, Images, References, and Footnotes
|
||||
*
|
||||
* Match strings surrounded by brackets. This could be improved to
|
||||
* specifically match only the exact syntax of each covered entity, but
|
||||
* doing so through current approach would incur a considerable performance
|
||||
* penalty.
|
||||
*/
|
||||
/(\[)[^\]]*]/g,
|
||||
];
|
||||
|
||||
|
||||
/**
|
||||
* A Remark plugin for escaping markdown entities.
|
||||
*
|
||||
@ -13,22 +153,6 @@
|
||||
* stringification.
|
||||
*/
|
||||
export default function remarkEscapeMarkdownEntities() {
|
||||
/**
|
||||
* Escape all occurrences of '[', '*', '_', '`', and '~'.
|
||||
*/
|
||||
function escapeCommonChars(text) {
|
||||
return text.replace(/[\[*_`~]/g, '\\$&');
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs escapeCommonChars, and also escapes '#' and '-' when found at the
|
||||
* beginning of any node's first child node.
|
||||
*/
|
||||
function escapeAllChars(text) {
|
||||
const partiallyEscapedMarkdown = escapeCommonChars(text);
|
||||
return partiallyEscapedMarkdown.replace(/^\s*([#-])/, '$`\\$1');
|
||||
}
|
||||
|
||||
const transform = (node, index) => {
|
||||
const children = node.children && node.children.map(transform);
|
||||
|
||||
@ -54,3 +178,90 @@ export default function remarkEscapeMarkdownEntities() {
|
||||
|
||||
return transform;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Executes both the `escapeCommonChars` and `escapeLeadingChars` functions.
|
||||
*/
|
||||
function escapeAllChars(text) {
|
||||
const partiallyEscapedMarkdown = escapeCommonChars(text);
|
||||
return escapeLeadingChars(partiallyEscapedMarkdown);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* escapeLeadingChars
|
||||
*
|
||||
* Handles escaping for characters that must be positioned at the beginning of
|
||||
* the string, such as headers and list items.
|
||||
*
|
||||
* Escapes '#', '*', '-', '>', '=', '|', and sequences of 3+ backticks or 4+
|
||||
* spaces when found at the beginning of a string, preceded by zero or more
|
||||
* whitespace characters.
|
||||
*/
|
||||
function escapeLeadingChars(text) {
|
||||
return text.replace(/^\s*([-#*>=|]| {4,}|`{3,})/, '$`\\$1');
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* escapeCommonChars
|
||||
*
|
||||
* Escapes active markdown entities. See escape pattern groups for details on
|
||||
* which entities are replaced.
|
||||
*/
|
||||
function escapeCommonChars(text) {
|
||||
/**
|
||||
* Generate new non-escape expression (must happen at execution time because
|
||||
* we use `RegExp.exec`, which tracks it's own state internally). The
|
||||
* non-escape expression matches substrings whose contents should not be
|
||||
* processed for escaping.
|
||||
*/
|
||||
const { htmlTags, preformattedHtmlBlocks } = nonEscapePatterns;
|
||||
const joinedNonEscapePatterns = [ htmlTags, preformattedHtmlBlocks ].map(p => joinPatternSegments(p));
|
||||
const nonEscapePattern = combinePatterns(joinedNonEscapePatterns, 'gm');
|
||||
|
||||
/**
|
||||
* Create chain of successive escape functions for various markdown entities.
|
||||
*/
|
||||
const escapeFunctions = escapePatterns.map(pattern => partial(escapeDelimiters, pattern));
|
||||
const escapeAll = flow(escapeFunctions);
|
||||
|
||||
/**
|
||||
* Use `replaceWhen` to escape markdown entities only within substrings that
|
||||
* are eligible for escaping.
|
||||
*/
|
||||
return replaceWhen(nonEscapePattern, escapeAll, text, true);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* escapeDelimiters
|
||||
*
|
||||
* Executes `String.replace` for a given pattern, but only on the first two
|
||||
* capture groups. Specifically intended for escaping opening (and optionally
|
||||
* closing) markdown entities without escaping the content in between.
|
||||
*/
|
||||
function escapeDelimiters(pattern, text) {
|
||||
return text.replace(pattern, (match, start, end) => {
|
||||
const hasEnd = typeof end === 'string';
|
||||
const matchSliceEnd = hasEnd ? match.length - end.length : match.length;
|
||||
const content = match.slice(start.length, matchSliceEnd);
|
||||
return `${escape(start)}${content}${hasEnd ? escape(end) : ''}`;
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* escape
|
||||
*
|
||||
* Simple replacement function for escaping markdown entities. Prepends every
|
||||
* character in the received string with a backslash.
|
||||
*/
|
||||
function escape(delim) {
|
||||
let result = '';
|
||||
for (const char of delim) {
|
||||
result += `\\${char}`;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
149
src/lib/regexHelper.js
Normal file
149
src/lib/regexHelper.js
Normal file
@ -0,0 +1,149 @@
|
||||
import { last } from 'lodash';
|
||||
|
||||
/**
|
||||
* Joins an array of regular expressions into a single expression, without
|
||||
* altering the received expressions. Only flags passed as an argument will
|
||||
* apply to the resulting regular expression.
|
||||
*/
|
||||
export function joinPatternSegments(patterns, flags = '') {
|
||||
const pattern = patterns.map(p => p.source).join('');
|
||||
return new RegExp(pattern, flags);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Combines an array of regular expressions into a single expression, wrapping
|
||||
* each in a non-capturing group and interposing alternation characters (|) so
|
||||
* that each expression is executed separately. Only flags passed as an argument
|
||||
* will apply to the resulting regular expression.
|
||||
*/
|
||||
export function combinePatterns(patterns, flags = '') {
|
||||
const pattern = patterns.map(p => `(?:${p.source})`).join('|');
|
||||
return new RegExp(pattern, flags);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Modify substrings within a string if they match a (global) pattern. Can be
|
||||
* inverted to only modify non-matches.
|
||||
*
|
||||
* params:
|
||||
* matchPattern - regexp - a regular expression to check for matches
|
||||
* replaceFn - function - a replacement function that receives a matched
|
||||
* substring and returns a replacement substring
|
||||
* text - string - the string to process
|
||||
* invertMatchPattern - boolean - if true, non-matching substrings are modified
|
||||
* instead of matching substrings
|
||||
*/
|
||||
export function replaceWhen(matchPattern, replaceFn, text, invertMatchPattern) {
|
||||
/**
|
||||
* Splits the string into an array of objects with the following shape:
|
||||
*
|
||||
* {
|
||||
* index: number - the index of the substring within the string
|
||||
* text: string - the substring
|
||||
* match: boolean - true if the substring matched `matchPattern`
|
||||
* }
|
||||
*
|
||||
* Loops through matches via recursion (`RegExp.exec` tracks the loop
|
||||
* internally).
|
||||
*/
|
||||
function split(exp, text, acc) {
|
||||
/**
|
||||
* Get the next match starting from the end of the last match or start of
|
||||
* string.
|
||||
*/
|
||||
const match = exp.exec(text);
|
||||
const lastEntry = last(acc);
|
||||
|
||||
/**
|
||||
* `match` will be null if there are no matches.
|
||||
*/
|
||||
if (!match) return acc;
|
||||
|
||||
/**
|
||||
* If the match is at the beginning of the input string, normalize to a data
|
||||
* object with the `match` flag set to `true`, and add to the accumulator.
|
||||
*/
|
||||
if (match.index === 0) {
|
||||
addSubstring(acc, 0, match[0], true);
|
||||
}
|
||||
|
||||
/**
|
||||
* If there are no entries in the accumulator, convert the substring before
|
||||
* the match to a data object (without the `match` flag set to true) and
|
||||
* push to the accumulator, followed by a data object for the matching
|
||||
* substring.
|
||||
*/
|
||||
else if (!lastEntry) {
|
||||
addSubstring(acc, 0, match.input.slice(0, match.index));
|
||||
addSubstring(acc, match.index, match[0], true);
|
||||
}
|
||||
|
||||
/**
|
||||
* If the last entry in the accumulator immediately preceded the current
|
||||
* matched substring in the original string, just add the data object for
|
||||
* the matching substring to the accumulator.
|
||||
*/
|
||||
else if (match.index === lastEntry.index + lastEntry.text.length) {
|
||||
addSubstring(acc, match.index, match[0], true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert the substring before the match to a data object (without the
|
||||
* `match` flag set to true), followed by a data object for the matching
|
||||
* substring.
|
||||
*/
|
||||
else {
|
||||
const nextIndex = lastEntry.index + lastEntry.text.length;
|
||||
const nextText = match.input.slice(nextIndex, match.index);
|
||||
addSubstring(acc, nextIndex, nextText);
|
||||
addSubstring(acc, match.index, match[0], true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Continue executing the expression.
|
||||
*/
|
||||
return split(exp, text, acc);
|
||||
}
|
||||
|
||||
/**
|
||||
* Factory for converting substrings to data objects and adding to an output
|
||||
* array.
|
||||
*/
|
||||
function addSubstring(arr, index, text, match = false) {
|
||||
arr.push({ index, text, match });
|
||||
}
|
||||
|
||||
/**
|
||||
* Split the input string to an array of data objects, each representing a
|
||||
* matching or non-matching string.
|
||||
*/
|
||||
const acc = split(matchPattern, text, []);
|
||||
|
||||
/**
|
||||
* Process the trailing substring after the final match, if one exists.
|
||||
*/
|
||||
const lastEntry = last(acc);
|
||||
if (!lastEntry) return replaceFn(text);
|
||||
|
||||
const nextIndex = lastEntry.index + lastEntry.text.length;
|
||||
if (text.length > nextIndex) {
|
||||
acc.push({ index: nextIndex, text: text.slice(nextIndex) });
|
||||
}
|
||||
|
||||
/**
|
||||
* Map the data objects in the accumulator to their string values, modifying
|
||||
* matched strings with the replacement function. Modifies non-matches if
|
||||
* `invertMatchPattern` is truthy.
|
||||
*/
|
||||
const replacedText = acc.map(entry => {
|
||||
const isMatch = invertMatchPattern ? !entry.match : entry.match;
|
||||
return isMatch ? replaceFn(entry.text) : entry.text;
|
||||
});
|
||||
|
||||
/**
|
||||
* Return the joined string.
|
||||
*/
|
||||
return replacedText.join('');
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user