diff --git a/src/components/Widgets/Markdown/MarkdownControl/RawEditor/index.js b/src/components/Widgets/Markdown/MarkdownControl/RawEditor/index.js index d3ccda2d..80b4295a 100644 --- a/src/components/Widgets/Markdown/MarkdownControl/RawEditor/index.js +++ b/src/components/Widgets/Markdown/MarkdownControl/RawEditor/index.js @@ -22,16 +22,14 @@ export default class RawEditor extends React.Component { this.setState({ editorState }); } - onChange = debounce(this.props.onChange, 250); - /** * When the document value changes, serialize from Slate's AST back to plain * text (which is Markdown) and pass that up as the new value. */ - handleDocumentChange = (doc, editorState) => { + handleDocumentChange = debounce((doc, editorState) => { const value = Plain.serialize(editorState); - this.onChange(value); - }; + this.props.onChange(value); + }, 150); /** * If a paste contains plain text, deserialize it to Slate's AST and insert diff --git a/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/__tests__/__snapshots__/parser.spec.js.snap b/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/__tests__/__snapshots__/parser.spec.js.snap index 341b42b3..81948733 100644 --- a/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/__tests__/__snapshots__/parser.spec.js.snap +++ b/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/__tests__/__snapshots__/parser.spec.js.snap @@ -443,7 +443,7 @@ become anything else.", Object { "data": undefined, "kind": "text", - "text": " is supported. This *can be ", + "text": " is supported. This ", }, Object { "data": undefined, @@ -451,18 +451,37 @@ become anything else.", "ranges": Array [ Object { "marks": Array [ + Object { + "type": "italic", + }, + ], + "text": "can be ", + }, + Object { + "marks": Array [ + Object { + "type": "italic", + }, Object { "type": "bold", }, ], "text": "nested", }, + Object { + "marks": Array [ + Object { + "type": "italic", + }, + ], + "text": " like", + }, ], }, Object { "data": undefined, "kind": "text", - "text": " like* so.", + "text": " so.", }, ], "type": "paragraph", @@ -1289,10 +1308,16 @@ Object { "text": "blue moon", }, Object { - "data": undefined, - "kind": "text", - "text": " + "kind": "inline", + "nodes": Array [ + Object { + "data": undefined, + "kind": "text", + "text": " ", + }, + ], + "type": "break", }, Object { "data": undefined, diff --git a/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/components.js b/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/components.js index 2f19dcc7..21c6421e 100644 --- a/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/components.js +++ b/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/components.js @@ -1,4 +1,5 @@ import React from 'react'; +import { List } from 'immutable'; import cn from 'classnames'; import styles from './index.css'; @@ -35,9 +36,28 @@ export const NODE_COMPONENTS = {
    {props.children}
, 'link': props => { const data = props.node.get('data'); + const marks = data.get('marks'); const url = data.get('url'); const title = data.get('title'); - return {props.children}; + const link = {props.children}; + const result = !marks ? link : marks.reduce((acc, mark) => { + const MarkComponent = MARK_COMPONENTS[mark.type]; + return {acc}; + }, link); + return result; + }, + 'image': props => { + const data = props.node.get('data'); + const marks = data.get('marks'); + const url = data.get('url'); + const title = data.get('title'); + const alt = data.get('alt'); + const image = {alt}; + const result = !marks ? image : marks.reduce((acc, mark) => { + const MarkComponent = MARK_COMPONENTS[mark.type]; + return {acc}; + }, image); + return result; }, 'shortcode': props => { const { attributes, node, state: editorState } = props; diff --git a/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/index.js b/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/index.js index ee18459d..34d2ceab 100644 --- a/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/index.js +++ b/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/index.js @@ -44,14 +44,12 @@ export default class Editor extends Component { return state.transform().insertFragment(doc).apply(); } - onChange = debounce(this.props.onChange, 250); - - handleDocumentChange = (doc, editorState) => { + handleDocumentChange = debounce((doc, editorState) => { const raw = Raw.serialize(editorState, { terse: true }); const plugins = this.state.shortcodePlugins; const markdown = slateToMarkdown(raw, plugins); - this.onChange(markdown); - }; + this.props.onChange(markdown); + }, 150); hasMark = type => this.state.editorState.marks.some(mark => mark.type === type); hasBlock = type => this.state.editorState.blocks.some(node => node.type === type); diff --git a/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/plugins.js b/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/plugins.js index efb7e029..6418e0d1 100644 --- a/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/plugins.js +++ b/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/plugins.js @@ -1,3 +1,4 @@ +import { Text, Inline } from 'slate'; import SlateSoftBreak from 'slate-soft-break'; import EditList from 'slate-edit-list'; import EditTable from 'slate-edit-table'; @@ -21,7 +22,13 @@ const SoftBreak = (options = {}) => ({ return unwrapped.insertBlock(defaultBlock).apply(); } - return state.transform().insertText('\n').apply(); + const textNode = Text.createFromString('\n'); + const breakNode = Inline.create({ type: 'break', nodes: [ textNode ] }); + return state.transform() + .insertInline(breakNode) + .insertText('') + .collapseToStartOfNextText() + .apply(); } }); @@ -32,7 +39,7 @@ const SoftBreakOpts = { export const SoftBreakConfigured = SoftBreak(SoftBreakOpts); -export const ParagraphSoftBreakConfigured = SlateSoftBreak({ onlyIn: ['paragraph'], shift: true }); +export const ParagraphSoftBreakConfigured = SoftBreak({ onlyIn: ['paragraph'], shift: true }); const BreakToDefaultBlock = ({ onlyIn = [], defaultBlock = 'paragraph' }) => ({ onKeyDown(e, data, state) { diff --git a/src/components/Widgets/Markdown/serializers/__tests__/remarkAllowHtmlEntities.spec.js b/src/components/Widgets/Markdown/serializers/__tests__/remarkAllowHtmlEntities.spec.js new file mode 100644 index 00000000..a7eed4c4 --- /dev/null +++ b/src/components/Widgets/Markdown/serializers/__tests__/remarkAllowHtmlEntities.spec.js @@ -0,0 +1,24 @@ +import unified from 'unified'; +import markdownToRemark from 'remark-parse'; +import remarkAllowHtmlEntities from '../remarkAllowHtmlEntities'; + +const process = markdown => { + const mdast = unified().use(markdownToRemark).use(remarkAllowHtmlEntities).parse(markdown); + + /** + * The MDAST will look like: + * + * { type: 'root', children: [ + * { type: 'paragraph', children: [ + * // results here + * ]} + * ]} + */ + return mdast.children[0].children[0].value; +}; + +describe('remarkAllowHtmlEntities', () => { + it('should not decode HTML entities', () => { + expect(process('<div>')).toEqual('<div>'); + }); +}); diff --git a/src/components/Widgets/Markdown/serializers/__tests__/remarkEscapeMarkdownEntities.spec.js b/src/components/Widgets/Markdown/serializers/__tests__/remarkEscapeMarkdownEntities.spec.js index 515ae75f..09af85db 100644 --- a/src/components/Widgets/Markdown/serializers/__tests__/remarkEscapeMarkdownEntities.spec.js +++ b/src/components/Widgets/Markdown/serializers/__tests__/remarkEscapeMarkdownEntities.spec.js @@ -13,12 +13,36 @@ const process = text => { describe('remarkEscapeMarkdownEntities', () => { it('should escape common markdown entities', () => { - expect(process('*~`[_')).toEqual('\\*\\~\\`\\[\\_'); + expect(process('*a*')).toEqual('\\*a\\*'); + expect(process('**a**')).toEqual('\\*\\*a\\*\\*'); + expect(process('***a***')).toEqual('\\*\\*\\*a\\*\\*\\*'); + expect(process('_a_')).toEqual('\\_a\\_'); + expect(process('__a__')).toEqual('\\_\\_a\\_\\_'); + expect(process('~~a~~')).toEqual('\\~\\~a\\~\\~'); + expect(process('[]')).toEqual('\\[]'); + expect(process('[]()')).toEqual('\\[]()'); + expect(process('[a](b)')).toEqual('\\[a](b)'); + expect(process('[Test sentence.](https://www.example.com)')) + .toEqual('\\[Test sentence.](https://www.example.com)'); + expect(process('![a](b)')).toEqual('!\\[a](b)'); + }); + + it('should not escape inactive, single markdown entities', () => { + expect(process('a*b')).toEqual('a*b'); + expect(process('_')).toEqual('_'); + expect(process('~')).toEqual('~'); + expect(process('[')).toEqual('['); }); it('should escape leading markdown entities', () => { expect(process('#')).toEqual('\\#'); expect(process('-')).toEqual('\\-'); + expect(process('*')).toEqual('\\*'); + expect(process('>')).toEqual('\\>'); + expect(process('=')).toEqual('\\='); + expect(process('|')).toEqual('\\|'); + expect(process('```')).toEqual('\\`\\``'); + expect(process(' ')).toEqual('\\ '); }); it('should escape leading markdown entities preceded by whitespace', () => { @@ -30,4 +54,25 @@ describe('remarkEscapeMarkdownEntities', () => { expect(process('a# # b #')).toEqual('a# # b #'); expect(process('a- - b -')).toEqual('a- - b -'); }); + + it('should not escape html tags', () => { + expect(process('')).toEqual(''); + expect(process('a b e')).toEqual('a b e'); + }); + + it('should escape the contents of html blocks', () => { + expect(process('
*a*
')).toEqual('
\\*a\\*
'); + }); + + it('should not escape the contents of preformatted html blocks', () => { + expect(process('
*a*
')).toEqual('
*a*
'); + expect(process('')).toEqual(''); + expect(process('')).toEqual(''); + expect(process('
\n*a*\n
')).toEqual('
\n*a*\n
'); + expect(process('a b
*c*
d e')).toEqual('a b
*c*
d e'); + }); + + it('should not parse footnotes', () => { + expect(process('[^a]')).toEqual('\\[^a]'); + }); }); diff --git a/src/components/Widgets/Markdown/serializers/__tests__/remarkStripTrailingBreaks.spec.js b/src/components/Widgets/Markdown/serializers/__tests__/remarkStripTrailingBreaks.spec.js new file mode 100644 index 00000000..a7b0500f --- /dev/null +++ b/src/components/Widgets/Markdown/serializers/__tests__/remarkStripTrailingBreaks.spec.js @@ -0,0 +1,24 @@ +import unified from 'unified'; +import u from 'unist-builder'; +import remarkStripTrailingBreaks from '../remarkStripTrailingBreaks'; + +const process = children => { + const tree = u('root', children); + const strippedMdast = unified() + .use(remarkStripTrailingBreaks) + .runSync(tree); + + return strippedMdast.children; +}; + +describe('remarkStripTrailingBreaks', () => { + it('should remove trailing breaks at the end of a block', () => { + expect(process([u('break')])).toEqual([]); + expect(process([u('break'), u('text', '\n \n')])).toEqual([u('text', '\n \n')]); + expect(process([u('text', 'a'), u('break')])).toEqual([u('text', 'a')]); + }); + + it('should not remove trailing breaks that are not at the end of a block', () => { + expect(process([u('break'), u('text', 'a')])).toEqual([u('break'), u('text', 'a')]); + }); +}); diff --git a/src/components/Widgets/Markdown/serializers/__tests__/slate.spec.js b/src/components/Widgets/Markdown/serializers/__tests__/slate.spec.js new file mode 100644 index 00000000..fd3bda41 --- /dev/null +++ b/src/components/Widgets/Markdown/serializers/__tests__/slate.spec.js @@ -0,0 +1,40 @@ +import { flow } from 'lodash'; +import { markdownToSlate, slateToMarkdown } from '../index'; + +const process = flow([markdownToSlate, slateToMarkdown]); + +describe('slate', () => { + it('should distinguish between newlines and hard breaks', () => { + expect(process('a\n')).toEqual('a\n'); + }); + + it('should not decode encoded html entities in inline code', () => { + expect(process('<div>')).toEqual('<div>\n'); + }); + + it('should parse non-text children of mark nodes', () => { + expect(process('**a[b](c)d**')).toEqual('**a[b](c)d**\n'); + expect(process('**[a](b)**')).toEqual('**[a](b)**\n'); + expect(process('**![a](b)**')).toEqual('**![a](b)**\n'); + expect(process('_`a`_')).toEqual('_`a`_\n'); + expect(process('_`a`b_')).toEqual('_`a`b_\n'); + }); + + it('should condense adjacent, identically styled text and inline nodes', () => { + expect(process('**a ~~b~~~~c~~**')).toEqual('**a ~~bc~~**\n'); + expect(process('**a ~~b~~~~[c](d)~~**')).toEqual('**a ~~b[c](d)~~**\n'); + }); + + it('should handle nested markdown entities', () => { + expect(process('**a**b**c**')).toEqual('**a**b**c**\n'); + expect(process('**a _b_ c**')).toEqual('**a _b_ c**\n'); + }); + + it('should parse inline images as images', () => { + expect(process('a ![b](c)')).toEqual('a ![b](c)\n'); + }); + + it('should not escape markdown entities in html', () => { + expect(process('*')).toEqual('*\n'); + }); +}); diff --git a/src/components/Widgets/Markdown/serializers/index.js b/src/components/Widgets/Markdown/serializers/index.js index f3c11a64..5b00150d 100644 --- a/src/components/Widgets/Markdown/serializers/index.js +++ b/src/components/Widgets/Markdown/serializers/index.js @@ -16,7 +16,9 @@ import remarkToSlate from './remarkSlate'; import remarkSquashReferences from './remarkSquashReferences'; import remarkImagesToText from './remarkImagesToText'; import remarkShortcodes from './remarkShortcodes'; -import remarkEscapeMarkdownEntities from './remarkEscapeMarkdownEntities' +import remarkEscapeMarkdownEntities from './remarkEscapeMarkdownEntities'; +import remarkStripTrailingBreaks from './remarkStripTrailingBreaks'; +import remarkAllowHtmlEntities from './remarkAllowHtmlEntities'; import slateToRemark from './slateRemark'; import registry from '../../../../lib/registry'; @@ -63,7 +65,9 @@ export const markdownToRemark = markdown => { * Parse the Markdown string input to an MDAST. */ const parsed = unified() - .use(markdownToRemarkPlugin, { fences: true, pedantic: true, commonmark: true }) + .use(markdownToRemarkPlugin, { fences: true, commonmark: true }) + .use(markdownToRemarkRemoveTokenizers, { inlineTokenizers: ['url'] }) + .use(remarkAllowHtmlEntities) .parse(markdown); /** @@ -79,6 +83,16 @@ export const markdownToRemark = markdown => { }; +/** + * Remove named tokenizers from the parser, effectively deactivating them. + */ +function markdownToRemarkRemoveTokenizers({ inlineTokenizers }) { + inlineTokenizers && inlineTokenizers.forEach(tokenizer => { + delete this.Parser.prototype.inlineTokenizers[tokenizer]; + }); +} + + /** * Serialize an MDAST to a Markdown string. */ @@ -102,7 +116,6 @@ export const remarkToMarkdown = obj => { const remarkToMarkdownPluginOpts = { commonmark: true, fences: true, - pedantic: true, listItemIndent: '1', // Settings to emulate the defaults from the Prosemirror editor, not @@ -117,6 +130,7 @@ export const remarkToMarkdown = obj => { */ const escapedMdast = unified() .use(remarkEscapeMarkdownEntities) + .use(remarkStripTrailingBreaks) .runSync(mdast); const markdown = unified() diff --git a/src/components/Widgets/Markdown/serializers/remarkAllowHtmlEntities.js b/src/components/Widgets/Markdown/serializers/remarkAllowHtmlEntities.js new file mode 100644 index 00000000..62e4d3be --- /dev/null +++ b/src/components/Widgets/Markdown/serializers/remarkAllowHtmlEntities.js @@ -0,0 +1,59 @@ +export default function remarkAllowHtmlEntities() { + this.Parser.prototype.inlineTokenizers.text = text; + + /** + * This is a port of the `remark-parse` text tokenizer, adapted to exclude + * HTML entity decoding. + */ + function text(eat, value, silent) { + var self = this; + var methods; + var tokenizers; + var index; + var length; + var subvalue; + var position; + var tokenizer; + var name; + var min; + var now; + + /* istanbul ignore if - never used (yet) */ + if (silent) { + return true; + } + + methods = self.inlineMethods; + length = methods.length; + tokenizers = self.inlineTokenizers; + index = -1; + min = value.length; + + while (++index < length) { + name = methods[index]; + + if (name === 'text' || !tokenizers[name]) { + continue; + } + + tokenizer = tokenizers[name].locator; + + if (!tokenizer) { + eat.file.fail('Missing locator: `' + name + '`'); + } + + position = tokenizer.call(self, value, 1); + + if (position !== -1 && position < min) { + min = position; + } + } + + subvalue = value.slice(0, min); + + eat(subvalue)({ + type: 'text', + value: subvalue, + }); + } +}; diff --git a/src/components/Widgets/Markdown/serializers/remarkEscapeMarkdownEntities.js b/src/components/Widgets/Markdown/serializers/remarkEscapeMarkdownEntities.js index d38bc8bd..6e7903c5 100644 --- a/src/components/Widgets/Markdown/serializers/remarkEscapeMarkdownEntities.js +++ b/src/components/Widgets/Markdown/serializers/remarkEscapeMarkdownEntities.js @@ -1,3 +1,237 @@ +import { flow, partial, flatMap, flatten, map } from 'lodash'; +import { joinPatternSegments, combinePatterns, replaceWhen } from '../../../../lib/regexHelper'; + +/** + * Reusable regular expressions segments. + */ +const patternSegments = { + /** + * Matches zero or more HTML attributes followed by the tag close bracket, + * which may be prepended by zero or more spaces. The attributes can use + * single or double quotes and may be prepended by zero or more spaces. + */ + htmlOpeningTagEnd: /(?: *\w+=(?:(?:"[^"]*")|(?:'[^']*')))* *>/, +}; + + +/** + * Patterns matching substrings that should not be escaped. Array values must be + * joined before use. + */ +const nonEscapePatterns = { + /** + * HTML Tags + * + * Matches HTML opening tags and any attributes. Does not check for contents + * between tags or closing tags. + */ + htmlTags: [ + /** + * Matches the beginning of an HTML tag, excluding preformatted tag types. + */ + /<(?!pre|style|script)[\w]+/, + + /** + * Matches attributes. + */ + patternSegments.htmlOpeningTagEnd, + ], + + + /** + * Preformatted HTML Blocks + * + * Matches HTML blocks with preformatted content. The content of these blocks, + * including the tags and attributes, should not be escaped at all. + */ + preformattedHtmlBlocks: [ + /** + * Matches the names of tags known to have preformatted content. The capture + * group is reused when matching the closing tag. + * + * NOTE: this pattern reuses a capture group, and could break if combined with + * other expressions using capture groups. + */ + /<(pre|style|script)/, + + /** + * Matches attributes. + */ + patternSegments.htmlOpeningTagEnd, + + /** + * Allow zero or more of any character (including line breaks) between the + * tags. Match lazily in case of subsequent blocks. + */ + /(.|[\n\r])*?/, + + /** + * Match closing tag via first capture group. + */ + /<\/\1>/, + ], +}; + + +/** + * Escape patterns + * + * Each escape pattern matches a markdown entity and captures up to two + * groups. These patterns must use one of the following formulas: + * + * - Single capture group followed by match content - /(...).../ + * The captured characters should be escaped and the remaining match should + * remain unchanged. + * + * - Two capture groups surrounding matched content - /(...)...(...)/ + * The captured characters in both groups should be escaped and the matched + * characters in between should remain unchanged. + */ +const escapePatterns = [ + /** + * Emphasis/Bold - Asterisk + * + * Match strings surrounded by one or more asterisks on both sides. + */ + /(\*+)[^\*]*(\1)/g, + + /** + * Emphasis - Underscore + * + * Match strings surrounded by a single underscore on both sides followed by + * a word boundary. Remark disregards whether a word boundary exists at the + * beginning of an emphasis node. + */ + /(_)[^_]+(_)\b/g, + + /** + * Bold - Underscore + * + * Match strings surrounded by multiple underscores on both sides. Remark + * disregards the absence of word boundaries on either side of a bold node. + */ + /(_{2,})[^_]*(\1)/g, + + /** + * Strikethrough + * + * Match strings surrounded by multiple tildes on both sides. + */ + /(~+)[^~]*(\1)/g, + + /** + * Inline Code + * + * Match strings surrounded by backticks. + */ + /(`+)[^`]*(\1)/g, + + /** + * Links, Images, References, and Footnotes + * + * Match strings surrounded by brackets. This could be improved to + * specifically match only the exact syntax of each covered entity, but + * doing so through current approach would incur a considerable performance + * penalty. + */ + /(\[)[^\]]*]/g, +]; + + +/** + * Generate new non-escape expression. The non-escape expression matches + * substrings whose contents should not be processed for escaping. + */ +const joinedNonEscapePatterns = map(nonEscapePatterns, pattern => { + return new RegExp(joinPatternSegments(pattern)); +}); +const nonEscapePattern = combinePatterns(joinedNonEscapePatterns); + + +/** + * Create chain of successive escape functions for various markdown entities. + */ +const escapeFunctions = escapePatterns.map(pattern => partial(escapeDelimiters, pattern)); +const escapeAll = flow(escapeFunctions); + + +/** + * Executes both the `escapeCommonChars` and `escapeLeadingChars` functions. + */ +function escapeAllChars(text) { + const partiallyEscapedMarkdown = escapeCommonChars(text); + return escapeLeadingChars(partiallyEscapedMarkdown); +} + + +/** + * escapeLeadingChars + * + * Handles escaping for characters that must be positioned at the beginning of + * the string, such as headers and list items. + * + * Escapes '#', '*', '-', '>', '=', '|', and sequences of 3+ backticks or 4+ + * spaces when found at the beginning of a string, preceded by zero or more + * whitespace characters. + */ +function escapeLeadingChars(text) { + return text.replace(/^\s*([-#*>=|]| {4,}|`{3,})/, '$`\\$1'); +} + + +/** + * escapeCommonChars + * + * Escapes active markdown entities. See escape pattern groups for details on + * which entities are replaced. + */ +function escapeCommonChars(text) { + /** + * Generate new non-escape expression (must happen at execution time because + * we use `RegExp.exec`, which tracks it's own state internally). + */ + const nonEscapeExpression = new RegExp(nonEscapePattern, 'gm'); + + /** + * Use `replaceWhen` to escape markdown entities only within substrings that + * are eligible for escaping. + */ + return replaceWhen(nonEscapeExpression, escapeAll, text, true); +} + + +/** + * escapeDelimiters + * + * Executes `String.replace` for a given pattern, but only on the first two + * capture groups. Specifically intended for escaping opening (and optionally + * closing) markdown entities without escaping the content in between. + */ +function escapeDelimiters(pattern, text) { + return text.replace(pattern, (match, start, end) => { + const hasEnd = typeof end === 'string'; + const matchSliceEnd = hasEnd ? match.length - end.length : match.length; + const content = match.slice(start.length, matchSliceEnd); + return `${escape(start)}${content}${hasEnd ? escape(end) : ''}`; + }); +} + + +/** + * escape + * + * Simple replacement function for escaping markdown entities. Prepends every + * character in the received string with a backslash. + */ +function escape(delim) { + let result = ''; + for (const char of delim) { + result += `\\${char}`; + } + return result; +} + + /** * A Remark plugin for escaping markdown entities. * @@ -13,22 +247,6 @@ * stringification. */ export default function remarkEscapeMarkdownEntities() { - /** - * Escape all occurrences of '[', '*', '_', '`', and '~'. - */ - function escapeCommonChars(text) { - return text.replace(/[\[*_`~]/g, '\\$&'); - } - - /** - * Runs escapeCommonChars, and also escapes '#' and '-' when found at the - * beginning of any node's first child node. - */ - function escapeAllChars(text) { - const partiallyEscapedMarkdown = escapeCommonChars(text); - return partiallyEscapedMarkdown.replace(/^\s*([#-])/, '$`\\$1'); - } - const transform = (node, index) => { const children = node.children && node.children.map(transform); diff --git a/src/components/Widgets/Markdown/serializers/remarkImagesToText.js b/src/components/Widgets/Markdown/serializers/remarkImagesToText.js index f63d3820..568e8a06 100644 --- a/src/components/Widgets/Markdown/serializers/remarkImagesToText.js +++ b/src/components/Widgets/Markdown/serializers/remarkImagesToText.js @@ -1,18 +1,26 @@ /** * Images must be parsed as shortcodes for asset proxying. This plugin converts - * MDAST image nodes back to text to allow shortcode pattern matching. + * MDAST image nodes back to text to allow shortcode pattern matching. Note that + * this transformation only occurs for images that are the sole child of a top + * level paragraph - any other image is left alone and treated as an inline + * image. */ export default function remarkImagesToText() { return transform; function transform(node) { - const children = node.children ? node.children.map(transform) : node.children; - if (node.type === 'image') { - const alt = node.alt || ''; - const url = node.url || ''; - const title = node.title ? ` "${node.title}"` : ''; - return { type: 'text', value: `![${alt}](${url}${title})` }; - } + const children = node.children.map(child => { + if ( + child.type === 'paragraph' + && child.children.length === 1 + && child.children[0].type === 'image' + ) { + const { alt = '', url = '', title = '' } = child.children[0]; + const value = `![${alt}](${url}${title ? ' title' : ''})`; + child.children = [{ type: 'text', value }]; + } + return child; + }); return { ...node, children }; } } diff --git a/src/components/Widgets/Markdown/serializers/remarkSlate.js b/src/components/Widgets/Markdown/serializers/remarkSlate.js index f8cc42f0..7f403089 100644 --- a/src/components/Widgets/Markdown/serializers/remarkSlate.js +++ b/src/components/Widgets/Markdown/serializers/remarkSlate.js @@ -1,6 +1,33 @@ -import { get, isEmpty, isArray } from 'lodash'; +import { get, isEmpty, isArray, last, flatMap } from 'lodash'; import u from 'unist-builder'; +/** + * A Remark plugin for converting an MDAST to Slate Raw AST. Remark plugins + * return a `transform` function that receives the MDAST as it's first argument. + */ +export default function remarkToSlate() { + return transform; +} + +function transform(node) { + + /** + * Call `transform` recursively on child nodes. + * + * If a node returns a falsey value, filter it out. Some nodes do not + * translate from MDAST to Slate, such as definitions for link/image + * references or footnotes. + */ + const children = !['strong', 'emphasis', 'delete'].includes(node.type) + && !isEmpty(node.children) + && flatMap(node.children, transform).filter(val => val); + + /** + * Run individual nodes through the conversion factory. + */ + return convertNode(node, children); +} + /** * Map of MDAST node types to Slate node types. */ @@ -47,7 +74,7 @@ function createBlock(type, nodes, props = {}) { /** * Create a Slate Block node. */ -function createInline(type, nodes, props = {}) { +function createInline(type, props = {}, nodes) { return { kind: 'inline', type, nodes, ...props }; } @@ -63,8 +90,7 @@ function createText(value, data) { return {...node, text: value }; } -function convertMarkNode(node, parentMarks = []) { - +function processMarkNode(node, parentMarks = []) { /** * Add the current node's mark type to the marks collected from parent * mark nodes, if any. @@ -72,34 +98,69 @@ function convertMarkNode(node, parentMarks = []) { const markType = markMap[node.type]; const marks = markType ? [...parentMarks, { type: markMap[node.type] }] : parentMarks; - /** - * Set an array to collect sections of text. - */ - const ranges = []; + const children = flatMap(node.children, childNode => { + switch (childNode.type) { + /** + * If a text node is a direct child of the current node, it should be + * set aside as a range, and all marks that have been collected in the + * `marks` array should apply to that specific range. + */ + case 'html': + case 'text': + return { text: childNode.value, marks }; - node.children && node.children.forEach(childNode => { + /** + * MDAST inline code nodes don't have children, just a text value, similar + * to a text node, so it receives the same treatment as a text node, but we + * first add the inline code mark to the marks array. + */ + case 'inlineCode': { + const childMarks = [ ...marks, { type: markMap['inlineCode'] } ]; + return { text: childNode.value, marks: childMarks }; + } - /** - * If a text node is a direct child of the current node, it should be - * set aside as a range, and all marks that have been collected in the - * `marks` array should apply to that specific range. - */ - if (['html', 'text'].includes(childNode.type)) { - ranges.push({ text: childNode.value, marks }); - return; + /** + * Process nested style nodes. The recursive results should be pushed into + * the ranges array. This way, every MDAST nested text structure becomes a + * flat array of ranges that can serve as the value of a single Slate Raw + * text node. + */ + case 'strong': + case 'emphasis': + case 'delete': + return processMarkNode(childNode, marks); + + /** + * Remaining nodes simply need mark data added to them, and to then be + * added into the cumulative children array. + */ + default: + return { ...childNode, data: { marks } }; } - - /** - * Any non-text child node should be processed as a parent node. The - * recursive results should be pushed into the ranges array. This way, - * every MDAST nested text structure becomes a flat array of ranges - * that can serve as the value of a single Slate Raw text node. - */ - const nestedRanges = convertMarkNode(childNode, marks); - ranges.push(...nestedRanges); }); - return ranges; + return children; +} + +function convertMarkNode(node) { + const slateNodes = processMarkNode(node); + + const convertedSlateNodes = slateNodes.reduce((acc, node, idx, nodes) => { + const lastConvertedNode = last(acc); + if (node.text && lastConvertedNode && lastConvertedNode.ranges) { + lastConvertedNode.ranges.push(node); + } + else if (node.text) { + acc.push(createText([node])); + } + else { + acc.push(transform(node)); + } + + return acc; + }, []); + + return convertedSlateNodes; } /** @@ -186,7 +247,7 @@ function convertNode(node, nodes) { case 'strong': case 'emphasis': case 'delete': { - return createText(convertMarkNode(node)); + return convertMarkNode(node); } /** @@ -239,7 +300,8 @@ function convertNode(node, nodes) { * line breaks within a text node. */ case 'break': { - return createText('\n'); + const textNode = createText('\n'); + return createInline('break', {}, [ textNode ]); } /** @@ -258,11 +320,25 @@ function convertNode(node, nodes) { * schema references them in the data object. */ case 'link': { - const { title, url } = node; - const data = { title, url }; - return createInline(typeMap[type], nodes, { data }); + const { title, url, data } = node; + const newData = { ...data, title, url }; + return createInline(typeMap[type], { data: newData }, nodes); } + /** + * Images + * + * Identical to link nodes except for the lack of child nodes and addition + * of alt attribute data MDAST stores the link attributes directly on the + * node, while our Slate schema references them in the data object. + */ + case 'image': { + const { title, url, alt, data } = node; + const newData = { ...data, title, alt, url }; + return createInline(typeMap[type], { isVoid: true, data: newData }); + } + + /** * Tables * @@ -275,29 +351,3 @@ function convertNode(node, nodes) { } } } - - -/** - * A Remark plugin for converting an MDAST to Slate Raw AST. Remark plugins - * return a `transform` function that receives the MDAST as it's first argument. - */ -export default function remarkToSlate() { - function transform(node) { - - /** - * Call `transform` recursively on child nodes. - * - * If a node returns a falsey value, filter it out. Some nodes do not - * translate from MDAST to Slate, such as definitions for link/image - * references or footnotes. - */ - const children = !isEmpty(node.children) && node.children.map(transform).filter(val => val); - - /** - * Run individual nodes through the conversion factory. - */ - return convertNode(node, children); - } - - return transform; -} diff --git a/src/components/Widgets/Markdown/serializers/remarkStripTrailingBreaks.js b/src/components/Widgets/Markdown/serializers/remarkStripTrailingBreaks.js new file mode 100644 index 00000000..67a37c3b --- /dev/null +++ b/src/components/Widgets/Markdown/serializers/remarkStripTrailingBreaks.js @@ -0,0 +1,56 @@ +import mdastToString from 'mdast-util-to-string'; + +/** + * Removes break nodes that are at the end of a block. + * + * When a trailing double space or backslash is encountered at the end of a + * markdown block, Remark will interpret the character(s) literally, as only + * break entities followed by text qualify as breaks. A manually created MDAST, + * however, may have such entities, and users of visual editors shouldn't see + * these artifacts in resulting markdown. + */ +export default function remarkStripTrailingBreaks() { + const transform = node => { + if (node.children) { + node.children = node.children + .map((child, idx, children) => { + + /** + * Only touch break nodes. Convert all subsequent nodes to their text + * value and exclude the break node if no non-whitespace characters + * are found. + */ + if (child.type === 'break') { + const subsequentNodes = children.slice(idx + 1); + + /** + * Create a small MDAST so that mdastToString can process all + * siblings as children of one node rather than making multiple + * calls. + */ + const fragment = { type: 'root', children: subsequentNodes }; + const subsequentText = mdastToString(fragment); + return subsequentText.trim() ? child : null; + } + + /** + * Always return the child if not a break. + */ + return child; + }) + + /** + * Because some break nodes may be excluded, we filter out the resulting + * null values. + */ + .filter(child => child) + + /** + * Recurse through the MDAST by transforming each individual child node. + */ + .map(transform); + } + return node; + }; + return transform; +}; diff --git a/src/components/Widgets/Markdown/serializers/slateRemark.js b/src/components/Widgets/Markdown/serializers/slateRemark.js index b586cc81..7812dfac 100644 --- a/src/components/Widgets/Markdown/serializers/slateRemark.js +++ b/src/components/Widgets/Markdown/serializers/slateRemark.js @@ -1,4 +1,4 @@ -import { get, isEmpty, concat, without, flatten, flatMap, initial } from 'lodash'; +import { get, isEmpty, concat, without, flatten, flatMap, initial, last, difference, reverse, sortBy } from 'lodash'; import u from 'unist-builder'; /** @@ -21,6 +21,7 @@ const typeMap = { 'table': 'table', 'table-row': 'tableRow', 'table-cell': 'tableCell', + 'break': 'break', 'thematic-break': 'thematicBreak', 'link': 'link', 'image': 'image', @@ -37,6 +38,108 @@ const markMap = { code: 'inlineCode', }; +let shortcodePlugins; + +export default function slateToRemark(raw, opts) { + /** + * Set shortcode plugins in outer scope. + */ + ({ shortcodePlugins } = opts); + + /** + * The Slate Raw AST generally won't have a top level type, so we set it to + * "root" for clarity. + */ + raw.type = 'root'; + + return transform(raw); +} + + +/** + * The transform function mimics the approach of a Remark plugin for + * conformity with the other serialization functions. This function converts + * Slate nodes to MDAST nodes, and recursively calls itself to process child + * nodes to arbitrary depth. + */ +function transform(node) { + /** + * Combine adjacent text and inline nodes before processing so they can + * share marks. + */ + const combinedChildren = node.nodes && combineTextAndInline(node.nodes); + + /** + * Call `transform` recursively on child nodes, and flatten the resulting + * array. + */ + const children = !isEmpty(combinedChildren) && flatMap(combinedChildren, transform); + + /** + * Run individual nodes through conversion factories. + */ + return ['text'].includes(node.kind) + ? convertTextNode(node) + : convertNode(node, children, shortcodePlugins); +} + + +/** + * Includes inline nodes as ranges in adjacent text nodes where appropriate, so + * that mark node combining logic can apply to both text and inline nodes. This + * is necessary because Slate doesn't allow inline nodes to have marks while + * inline nodes in MDAST may be nested within mark nodes. Treating them as if + * they were text is a bit of a necessary hack. + */ +function combineTextAndInline(nodes) { + return nodes.reduce((acc, node, idx, nodes) => { + const prevNode = last(acc); + const prevNodeRanges = get(prevNode, 'ranges'); + const data = node.data || {}; + + /** + * If the previous node has ranges and the current node has marks in data + * (only happens when we place them on inline nodes here in the parser), or + * the current node also has ranges (because the previous node was + * originally an inline node that we've already squashed into a range) + * combine the current node into the previous. + */ + if (!isEmpty(prevNodeRanges) && !isEmpty(data.marks)) { + prevNodeRanges.push({ node, marks: data.marks }); + return acc; + } + + if (!isEmpty(prevNodeRanges) && !isEmpty(node.ranges)) { + prevNode.ranges = prevNodeRanges.concat(node.ranges); + return acc; + } + + /** + * Break nodes contain a single child text node with a newline character + * for visual purposes in the editor, but Remark break nodes have no + * children, so we remove the child node here. + */ + if (node.type === 'break') { + acc.push({ kind: 'inline', type: 'break' }); + return acc; + } + + /** + * Convert remaining inline nodes to standalone text nodes with ranges. + */ + if (node.kind === 'inline') { + acc.push({ kind: 'text', ranges: [{ node, marks: data.marks }] }); + return acc; + } + + /** + * Only remaining case is an actual text node, can be pushed as is. + */ + acc.push(node); + return acc; + }, []); +} + /** * Slate treats inline code decoration as a standard mark, but MDAST does @@ -56,24 +159,6 @@ function processCodeMark(markTypes) { } -/** - * Returns an array of one or more MDAST text nodes of the given type, derived - * from the text received. Certain transformations, such as line breaks, cause - * multiple nodes to be returned. - */ -function createTextNodes(text, type = 'html') { - /** - * Split the text string at line breaks, then map each substring to an array - * pair consisting of an MDAST text node followed by a break node. This will - * result in nested arrays, so we use `flatMap` to produce a flattened array, - * and `initial` to leave off the superfluous trailing break. - */ - const brokenText = text.split('\n'); - const toPair = str => [u(type, str), u('break')]; - return initial(flatMap(brokenText, toPair)); -} - - /** * Wraps a text node in one or more mark nodes by placing the text node in an * array and using that as the `children` value of a mark node. The resulting @@ -142,50 +227,134 @@ function wrapTextWithMarks(textNode, markTypes) { * replaced with multiple MDAST nodes, so the resulting array must be flattened. */ function convertTextNode(node) { - /** - * If the Slate text node has no "ranges" property, just return an equivalent - * MDAST node. + * If the Slate text node has a "ranges" property, translate the Slate AST to + * a nested MDAST structure. Otherwise, just return an equivalent MDAST text + * node. */ - if (!node.ranges) { - return createTextNodes(node.text); + if (node.ranges) { + const processedRanges = node.ranges.map(processRanges); + const condensedNodes = processedRanges.reduce(condenseNodesReducer, { nodes: [] }); + return condensedNodes.nodes; } - /** - * If there is no "text" property, convert the text range(s) to an array of - * one or more nested MDAST nodes. - */ - const textNodes = node.ranges.map(range => { - /** - * Get an array of the mark types, converted to their MDAST equivalent - * types. - */ - const { marks = [], text } = range; - const markTypes = marks.map(mark => markMap[mark.type]); + if (node.kind === 'inline') { + return transform(node); + } + return u('html', node.text); +} + + +/** + * Process Slate node ranges in preparation for MDAST transformation. + */ +function processRanges(range) { + /** + * Get an array of the mark types, converted to their MDAST equivalent + * types. + */ + const { marks = [], text } = range; + const markTypes = marks.map(mark => markMap[mark.type]); + + if (typeof range.text === 'string') { /** * Code marks must be removed from the marks array, and the presence of a * code mark changes the text node type that should be used. */ const { filteredMarkTypes, textNodeType } = processCodeMark(markTypes); + return { text, marks: filteredMarkTypes, textNodeType }; + } - /** - * Create the base text node. - */ - const textNodes = createTextNodes(text, textNodeType); + return { node: range.node, marks: markTypes }; +} - /** - * Recursively wrap the base text node in the individual mark nodes, if - * any exist. - */ - return textNodes.map(textNode => wrapTextWithMarks(textNode, filteredMarkTypes)); - }); + +/** + * Slate's AST doesn't group adjacent text nodes with the same marks - a + * change in marks from letter to letter, even if some are in common, results + * in a separate range. For example, given "**a_b_**", transformation to and + * from Slate's AST will result in "**a****_b_**". + * + * MDAST treats styling entities as distinct nodes that contain children, so a + * "strong" node can contain a plain text node with a sibling "emphasis" node, + * which contains more text. This reducer serves to create an optimized nested + * MDAST without the typical redundancies that Slate's AST would produce if + * transformed as-is. The reducer can be called recursively to produce nested + * structures. + */ +function condenseNodesReducer(acc, node, idx, nodes) { + /** + * Skip any nodes that are being processed as children of an MDAST node + * through recursive calls. + */ + if (typeof acc.nextIndex === 'number' && acc.nextIndex > idx) { + return acc; + } /** - * Since each range will be mapped into an array, we flatten the result to - * return a single array of all nodes. + * Processing for nodes with marks. */ - return flatten(textNodes); + if (node.marks && node.marks.length > 0) { + /** + * For each mark on the current node, get the number of consecutive nodes + * (starting with this one) that have the mark. Whichever mark covers the + * most nodes is used as the parent node, and the nodes with that mark are + * processed as children. If the greatest number of consecutive nodes is + * tied between multiple marks, there is no priority as to which goes + * first. + */ + const markLengths = node.marks.map(mark => getMarkLength(mark, nodes.slice(idx))); + const parentMarkLength = last(sortBy(markLengths, 'length')); + const { markType: parentType, length: parentLength } = parentMarkLength; + + /** + * Since this and any consecutive nodes with the parent mark are going to + * be processed as children of the parent mark, this reducer should simply + * return the accumulator until after the last node to be covered by the + * new parent node. Here we set the next index that should be processed, + * if any. + */ + const newNextIndex = idx + parentLength; + + /** + * Get the set of nodes that should be processed as children of the new + * parent mark node, run each through the reducer as children of the + * parent node, and create the parent MDAST node with the resulting + * children. + */ + const children = nodes.slice(idx, newNextIndex); + const denestedChildren = children.map(child => ({ ...child, marks: without(child.marks, parentType) })); + const mdastChildren = denestedChildren.reduce(condenseNodesReducer, { nodes: [], parentType }).nodes; + const mdastNode = u(parentType, mdastChildren); + + return { ...acc, nodes: [ ...acc.nodes, mdastNode ], nextIndex: newNextIndex }; + } + + /** + * Create the base text node, and pass in the array of mark types as data + * (helpful when optimizing/condensing the final structure). + */ + const baseNode = typeof node.text === 'string' + ? u(node.textNodeType, { marks: node.marks }, node.text) + : transform(node.node); + + /** + * Recursively wrap the base text node in the individual mark nodes, if + * any exist. + */ + return { ...acc, nodes: [ ...acc.nodes, baseNode ] }; +} + + +/** + * Get the number of consecutive Slate nodes containing a given mark beginning + * from the first received node. + */ +function getMarkLength(markType, nodes) { + let length = 0; + while(nodes[length] && nodes[length].marks.includes(markType)) { ++length; } + return { markType, length }; } @@ -267,8 +436,8 @@ function convertNode(node, children, shortcodePlugins) { */ case 'code': { const value = get(node.nodes, [0, 'text']); - const lang = get(node.data, 'lang'); - return u(typeMap[node.type], { lang }, value); + const { lang, ...data } = get(node, 'data', {}); + return u(typeMap[node.type], { lang, data }, value); } /** @@ -288,11 +457,11 @@ function convertNode(node, children, shortcodePlugins) { } /** - * Thematic Breaks + * Breaks * - * Thematic breaks don't have children. We parse them separately for - * clarity. + * Breaks don't have children. We parse them separately for clarity. */ + case 'break': case 'thematic-break': { return u(typeMap[node.type]); } @@ -304,45 +473,28 @@ function convertNode(node, children, shortcodePlugins) { * the node for both Slate and Remark schemas. */ case 'link': { - const { url, title } = get(node, 'data', {}); - return u(typeMap[node.type], { url, title }, children); + const { url, title, ...data } = get(node, 'data', {}); + return u(typeMap[node.type], { url, title, data }, children); } + /** + * Images + * + * This transformation is almost identical to that of links, except for the + * lack of child nodes and addition of `alt` attribute data. Currently the + * CMS handles block images by shortcode, so this case will only apply to + * inline images, which currently can only occur through raw markdown + * insertion. + */ + case 'image': { + const { url, title, alt, ...data } = get(node, 'data', {}); + return u(typeMap[node.type], { url, title, alt, data }); + } + + /** * No default case is supplied because an unhandled case should never * occur. In the event that it does, let the error throw (for now). */ } } - - -export default function slateToRemark(raw, { shortcodePlugins }) { - /** - * The transform function mimics the approach of a Remark plugin for - * conformity with the other serialization functions. This function converts - * Slate nodes to MDAST nodes, and recursively calls itself to process child - * nodes to arbitrary depth. - */ - function transform(node) { - - /** - * Call `transform` recursively on child nodes, and flatten the resulting - * array. - */ - const children = !isEmpty(node.nodes) && flatten(node.nodes.map(transform)); - - /** - * Run individual nodes through conversion factories. - */ - return node.kind === 'text' ? convertTextNode(node) : convertNode(node, children, shortcodePlugins); - } - - /** - * The Slate Raw AST generally won't have a top level type, so we set it to - * "root" for clarity. - */ - raw.type = 'root'; - - const mdast = transform(raw); - return mdast; -} diff --git a/src/components/Widgets/ObjectControl.js b/src/components/Widgets/ObjectControl.js index ae3dd43d..fea66f01 100644 --- a/src/components/Widgets/ObjectControl.js +++ b/src/components/Widgets/ObjectControl.js @@ -21,6 +21,13 @@ export default class ObjectControl extends Component { className: PropTypes.string, }; + /** + * In case the `onChange` function is frozen by a child widget implementation, + * e.g. when debounced, always get the latest object value instead of usin + * `this.props.value` directly. + */ + getObjectValue = () => this.props.value; + controlFor(field) { const { onAddAsset, onRemoveAsset, getAsset, value, onChange } = this.props; if (field.get('widget') === 'hidden') { @@ -38,7 +45,7 @@ export default class ObjectControl extends Component { field, value: fieldValue, onChange: (val, metadata) => { - onChange((value || Map()).set(field.get('name'), val), metadata); + onChange((this.getObjectValue() || Map()).set(field.get('name'), val), metadata); }, onAddAsset, onRemoveAsset, diff --git a/src/lib/regexHelper.js b/src/lib/regexHelper.js new file mode 100644 index 00000000..c4b14e41 --- /dev/null +++ b/src/lib/regexHelper.js @@ -0,0 +1,145 @@ +import { last } from 'lodash'; + +/** + * Joins an array of regular expressions into a single expression, without + * altering the received expressions. + */ +export function joinPatternSegments(patterns) { + return patterns.map(p => p.source).join(''); +} + + +/** + * Combines an array of regular expressions into a single expression, wrapping + * each in a non-capturing group and interposing alternation characters (|) so + * that each expression is executed separately. + */ +export function combinePatterns(patterns, flags = '') { + return patterns.map(p => `(?:${p.source})`).join('|'); +} + + +/** + * Modify substrings within a string if they match a (global) pattern. Can be + * inverted to only modify non-matches. + * + * params: + * matchPattern - regexp - a regular expression to check for matches + * replaceFn - function - a replacement function that receives a matched + * substring and returns a replacement substring + * text - string - the string to process + * invertMatchPattern - boolean - if true, non-matching substrings are modified + * instead of matching substrings + */ +export function replaceWhen(matchPattern, replaceFn, text, invertMatchPattern) { + /** + * Splits the string into an array of objects with the following shape: + * + * { + * index: number - the index of the substring within the string + * text: string - the substring + * match: boolean - true if the substring matched `matchPattern` + * } + * + * Loops through matches via recursion (`RegExp.exec` tracks the loop + * internally). + */ + function split(exp, text, acc) { + /** + * Get the next match starting from the end of the last match or start of + * string. + */ + const match = exp.exec(text); + const lastEntry = last(acc); + + /** + * `match` will be null if there are no matches. + */ + if (!match) return acc; + + /** + * If the match is at the beginning of the input string, normalize to a data + * object with the `match` flag set to `true`, and add to the accumulator. + */ + if (match.index === 0) { + addSubstring(acc, 0, match[0], true); + } + + /** + * If there are no entries in the accumulator, convert the substring before + * the match to a data object (without the `match` flag set to true) and + * push to the accumulator, followed by a data object for the matching + * substring. + */ + else if (!lastEntry) { + addSubstring(acc, 0, match.input.slice(0, match.index)); + addSubstring(acc, match.index, match[0], true); + } + + /** + * If the last entry in the accumulator immediately preceded the current + * matched substring in the original string, just add the data object for + * the matching substring to the accumulator. + */ + else if (match.index === lastEntry.index + lastEntry.text.length) { + addSubstring(acc, match.index, match[0], true); + } + + /** + * Convert the substring before the match to a data object (without the + * `match` flag set to true), followed by a data object for the matching + * substring. + */ + else { + const nextIndex = lastEntry.index + lastEntry.text.length; + const nextText = match.input.slice(nextIndex, match.index); + addSubstring(acc, nextIndex, nextText); + addSubstring(acc, match.index, match[0], true); + } + + /** + * Continue executing the expression. + */ + return split(exp, text, acc); + } + + /** + * Factory for converting substrings to data objects and adding to an output + * array. + */ + function addSubstring(arr, index, text, match = false) { + arr.push({ index, text, match }); + } + + /** + * Split the input string to an array of data objects, each representing a + * matching or non-matching string. + */ + const acc = split(matchPattern, text, []); + + /** + * Process the trailing substring after the final match, if one exists. + */ + const lastEntry = last(acc); + if (!lastEntry) return replaceFn(text); + + const nextIndex = lastEntry.index + lastEntry.text.length; + if (text.length > nextIndex) { + acc.push({ index: nextIndex, text: text.slice(nextIndex) }); + } + + /** + * Map the data objects in the accumulator to their string values, modifying + * matched strings with the replacement function. Modifies non-matches if + * `invertMatchPattern` is truthy. + */ + const replacedText = acc.map(entry => { + const isMatch = invertMatchPattern ? !entry.match : entry.match; + return isMatch ? replaceFn(entry.text) : entry.text; + }); + + /** + * Return the joined string. + */ + return replacedText.join(''); +}