From cd111f3a3d290babb3902201626b3adc2e3a60ec Mon Sep 17 00:00:00 2001 From: Shawn Erquhart Date: Sun, 3 Sep 2017 11:36:30 -0400 Subject: [PATCH 01/14] distinguish between newline and soft break in editor --- .../Markdown/serializers/remarkSlate.js | 2 +- .../Markdown/serializers/slateRemark.js | 27 +++++-------------- 2 files changed, 7 insertions(+), 22 deletions(-) diff --git a/src/components/Widgets/Markdown/serializers/remarkSlate.js b/src/components/Widgets/Markdown/serializers/remarkSlate.js index f8cc42f0..4e3a8236 100644 --- a/src/components/Widgets/Markdown/serializers/remarkSlate.js +++ b/src/components/Widgets/Markdown/serializers/remarkSlate.js @@ -239,7 +239,7 @@ function convertNode(node, nodes) { * line breaks within a text node. */ case 'break': { - return createText('\n'); + return createText('\n', { isBreak: true }); } /** diff --git a/src/components/Widgets/Markdown/serializers/slateRemark.js b/src/components/Widgets/Markdown/serializers/slateRemark.js index b586cc81..d4ec5f78 100644 --- a/src/components/Widgets/Markdown/serializers/slateRemark.js +++ b/src/components/Widgets/Markdown/serializers/slateRemark.js @@ -56,24 +56,6 @@ function processCodeMark(markTypes) { } -/** - * Returns an array of one or more MDAST text nodes of the given type, derived - * from the text received. Certain transformations, such as line breaks, cause - * multiple nodes to be returned. - */ -function createTextNodes(text, type = 'html') { - /** - * Split the text string at line breaks, then map each substring to an array - * pair consisting of an MDAST text node followed by a break node. This will - * result in nested arrays, so we use `flatMap` to produce a flattened array, - * and `initial` to leave off the superfluous trailing break. - */ - const brokenText = text.split('\n'); - const toPair = str => [u(type, str), u('break')]; - return initial(flatMap(brokenText, toPair)); -} - - /** * Wraps a text node in one or more mark nodes by placing the text node in an * array and using that as the `children` value of a mark node. The resulting @@ -143,12 +125,15 @@ function wrapTextWithMarks(textNode, markTypes) { */ function convertTextNode(node) { + if (get(node.data, 'isBreak')) { + return u('break'); + } /** * If the Slate text node has no "ranges" property, just return an equivalent * MDAST node. */ if (!node.ranges) { - return createTextNodes(node.text); + return u('html', node.text); } /** @@ -172,13 +157,13 @@ function convertTextNode(node) { /** * Create the base text node. */ - const textNodes = createTextNodes(text, textNodeType); + const textNode = u(textNodeType, text); /** * Recursively wrap the base text node in the individual mark nodes, if * any exist. */ - return textNodes.map(textNode => wrapTextWithMarks(textNode, filteredMarkTypes)); + return wrapTextWithMarks(textNode, filteredMarkTypes); }); /** From e54dee4220ff737731e7226dceb6af6e0db73060 Mon Sep 17 00:00:00 2001 From: Shawn Erquhart Date: Tue, 5 Sep 2017 22:57:01 -0700 Subject: [PATCH 02/14] allow links to be wrapped in marks --- .../MarkdownControl/VisualEditor/components.js | 10 +++++++++- .../serializers/__tests__/slate.spec.js | 17 +++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 src/components/Widgets/Markdown/serializers/__tests__/slate.spec.js diff --git a/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/components.js b/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/components.js index 2f19dcc7..0c213884 100644 --- a/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/components.js +++ b/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/components.js @@ -1,4 +1,5 @@ import React from 'react'; +import { List } from 'immutable'; import cn from 'classnames'; import styles from './index.css'; @@ -34,10 +35,17 @@ export const NODE_COMPONENTS = { 'numbered-list': props =>
    {props.children}
, 'link': props => { + // Need to wrap this in mark components for any marks found in data. const data = props.node.get('data'); + const marks = data.get('marks'); const url = data.get('url'); const title = data.get('title'); - return {props.children}; + const link = {props.children}; + const result = !marks ? link : marks.reduce((acc, mark) => { + const MarkComponent = MARK_COMPONENTS[mark.type]; + return {acc}; + }, link); + return result; }, 'shortcode': props => { const { attributes, node, state: editorState } = props; diff --git a/src/components/Widgets/Markdown/serializers/__tests__/slate.spec.js b/src/components/Widgets/Markdown/serializers/__tests__/slate.spec.js new file mode 100644 index 00000000..0c9c4edc --- /dev/null +++ b/src/components/Widgets/Markdown/serializers/__tests__/slate.spec.js @@ -0,0 +1,17 @@ +import { flow } from 'lodash'; +import { markdownToSlate, slateToMarkdown } from '../index'; + +const process = flow([markdownToSlate, slateToMarkdown]); + +describe('slate', () => { + it('should distinguish between newlines and hard breaks', () => { + expect(process('a\n')).toEqual('a\n'); + }); + it('should not decode encoded html entities in inline code', () => { + expect(process('<div>')).toEqual('<div>\n'); + }); + + it('should parse non-text children of mark nodes', () => { + expect(process('**[a](b)**')).toEqual('**[a](b)**'); + }); +}); From 91590a2f25ded38f4bed3814f99205d698e50b2e Mon Sep 17 00:00:00 2001 From: Shawn Erquhart Date: Sat, 9 Sep 2017 12:33:52 -0700 Subject: [PATCH 03/14] remove pedantic markdown parsing --- src/components/Widgets/Markdown/serializers/index.js | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/components/Widgets/Markdown/serializers/index.js b/src/components/Widgets/Markdown/serializers/index.js index f3c11a64..3f528643 100644 --- a/src/components/Widgets/Markdown/serializers/index.js +++ b/src/components/Widgets/Markdown/serializers/index.js @@ -63,7 +63,7 @@ export const markdownToRemark = markdown => { * Parse the Markdown string input to an MDAST. */ const parsed = unified() - .use(markdownToRemarkPlugin, { fences: true, pedantic: true, commonmark: true }) + .use(markdownToRemarkPlugin, { fences: true, commonmark: true }) .parse(markdown); /** @@ -102,7 +102,6 @@ export const remarkToMarkdown = obj => { const remarkToMarkdownPluginOpts = { commonmark: true, fences: true, - pedantic: true, listItemIndent: '1', // Settings to emulate the defaults from the Prosemirror editor, not From 2d3bf9b3fc525b5f2f3f703ced24427fb816cba9 Mon Sep 17 00:00:00 2001 From: Shawn Erquhart Date: Tue, 12 Sep 2017 11:48:14 -0400 Subject: [PATCH 04/14] fix verbose markdown entity output Because we convert markdown to an AST, the literal input cannot be reconstructed, so we have to default to improving received markdown rather than degrading it. This fix implements smart MDAST parsing to ensure that adjacent nodes with the same styling (strong, emphasis, etc) are grouped together rather than separated (which results in verbose output). --- .../serializers/__tests__/slate.spec.js | 9 ++ .../Markdown/serializers/slateRemark.js | 96 +++++++++++++++++-- 2 files changed, 96 insertions(+), 9 deletions(-) diff --git a/src/components/Widgets/Markdown/serializers/__tests__/slate.spec.js b/src/components/Widgets/Markdown/serializers/__tests__/slate.spec.js index 0c9c4edc..58a2413b 100644 --- a/src/components/Widgets/Markdown/serializers/__tests__/slate.spec.js +++ b/src/components/Widgets/Markdown/serializers/__tests__/slate.spec.js @@ -7,6 +7,7 @@ describe('slate', () => { it('should distinguish between newlines and hard breaks', () => { expect(process('a\n')).toEqual('a\n'); }); + it('should not decode encoded html entities in inline code', () => { expect(process('<div>')).toEqual('<div>\n'); }); @@ -14,4 +15,12 @@ describe('slate', () => { it('should parse non-text children of mark nodes', () => { expect(process('**[a](b)**')).toEqual('**[a](b)**'); }); + + it('should condense adjacent, identically styled text', () => { + expect(process('**a ~~b~~~~c~~**')).toEqual('**a ~~bc~~**\n'); + }); + + it('should handle nested markdown entities', () => { + expect(process('**a**b**c**')).toEqual('**a**b**c**\n'); + }); }); diff --git a/src/components/Widgets/Markdown/serializers/slateRemark.js b/src/components/Widgets/Markdown/serializers/slateRemark.js index d4ec5f78..b73b1aa7 100644 --- a/src/components/Widgets/Markdown/serializers/slateRemark.js +++ b/src/components/Widgets/Markdown/serializers/slateRemark.js @@ -1,4 +1,4 @@ -import { get, isEmpty, concat, without, flatten, flatMap, initial } from 'lodash'; +import { get, isEmpty, concat, without, flatten, flatMap, initial, last, difference, reverse, sortBy } from 'lodash'; import u from 'unist-builder'; /** @@ -137,10 +137,9 @@ function convertTextNode(node) { } /** - * If there is no "text" property, convert the text range(s) to an array of - * one or more nested MDAST nodes. + * Process Slate node ranges in preparation for MDAST transformation. */ - const textNodes = node.ranges.map(range => { + const processedRanges = node.ranges.map(range => { /** * Get an array of the mark types, converted to their MDAST equivalent * types. @@ -154,23 +153,102 @@ function convertTextNode(node) { */ const { filteredMarkTypes, textNodeType } = processCodeMark(markTypes); + return { text, marks: filteredMarkTypes, textNodeType }; + }); + + /** + * Slate's AST doesn't group adjacent text nodes with the same marks - a + * change in marks from letter to letter, even if some are in common, results + * in a separate range. For example, given "**a_b_**", transformation to and + * from Slate's AST will result in "**a****_b_**". + * + * MDAST treats styling entities as distinct nodes that contain children, so a + * "strong" node can contain a plain text node with a sibling "emphasis" node, + * which contains more text. This reducer serves to create an optimized nested + * MDAST without the typical redundancies that Slate's AST would produce if + * transformed as-is. The reducer can be called recursively to produce nested + * structures. + */ + const nodeGroupReducer = (acc, node, idx, nodes) => { /** - * Create the base text node. + * Skip any nodes that are being processed as children of an MDAST node + * through recursive calls. */ - const textNode = u(textNodeType, text); + if (typeof acc.nextIndex === 'number' && acc.nextIndex > idx) { + return acc; + } + + /** + * Processing for nodes with marks. + */ + if (node.marks && node.marks.length > 0) { + + /** + * For each mark on the current node, get the number of consecutive nodes + * (starting with this one) that have the mark. Whichever mark covers the + * most nodes is used as the parent node, and the nodes with that mark are + * processed as children. If the greatest number of consecutive nodes is + * tied between multiple marks, there is no priority as to which goes + * first. + */ + const markLengths = node.marks.map(mark => getMarkLength(mark, nodes.slice(idx))); + const parentMarkLength = last(sortBy(markLengths, 'length')); + const { markType: parentType, length: parentLength } = parentMarkLength; + + /** + * Since this and any consecutive nodes with the parent mark are going to + * be processed as children of the parent mark, this reducer should simply + * return the accumulator until after the last node to be covered by the + * new parent node. Here we set the next index that should be processed, + * if any. + */ + const newNextIndex = idx + parentLength; + + /** + * Get the set of nodes that should be processed as children of the new + * parent mark node, run each through the reducer as children of the + * parent node, and create the parent MDAST node with the resulting + * children. + */ + const children = nodes.slice(idx, newNextIndex); + const denestedChildren = children.map(child => ({ ...child, marks: without(child.marks, parentType) })); + const mdastChildren = denestedChildren.reduce(nodeGroupReducer, { nodes: [], parentType }).nodes; + const mdastNode = u(parentType, mdastChildren); + + return { ...acc, nodes: [ ...acc.nodes, mdastNode ], nextIndex: newNextIndex }; + } + + /** + * Create the base text node, and pass in the array of mark types as data + * (helpful when optimizing/condensing the final structure). + */ + const textNode = u(node.textNodeType, { marks: node.marks }, node.text); /** * Recursively wrap the base text node in the individual mark nodes, if * any exist. */ - return wrapTextWithMarks(textNode, filteredMarkTypes); - }); + return { ...acc, nodes: [ ...acc.nodes, textNode ] }; + }; + + const nodeGroups = processedRanges.reduce(nodeGroupReducer, { nodes: [] }); /** * Since each range will be mapped into an array, we flatten the result to * return a single array of all nodes. */ - return flatten(textNodes); + return nodeGroups.nodes; +} + + +/** + * Get the number of consecutive Slate nodes containing a given mark beginning + * from the first received node. + */ +function getMarkLength(markType, nodes) { + let length = 0; + while(nodes[length] && nodes[length].marks.includes(markType)) { ++length; } + return { markType, length }; } From e937e8e62613300d9bfd14fe227dd5ac907fb250 Mon Sep 17 00:00:00 2001 From: Shawn Erquhart Date: Thu, 14 Sep 2017 17:50:08 -0400 Subject: [PATCH 05/14] handle markdown styled inline nodes Slate does not allow inline nodes like links and images to have marks (like strong, emphasis). This commit changes the parsers to process these nodes as if they were text nodes so that marks are handled. --- .../serializers/__tests__/slate.spec.js | 9 +- .../Markdown/serializers/remarkSlate.js | 112 ++++--- .../Markdown/serializers/slateRemark.js | 313 +++++++++++------- 3 files changed, 268 insertions(+), 166 deletions(-) diff --git a/src/components/Widgets/Markdown/serializers/__tests__/slate.spec.js b/src/components/Widgets/Markdown/serializers/__tests__/slate.spec.js index 58a2413b..ae8383e7 100644 --- a/src/components/Widgets/Markdown/serializers/__tests__/slate.spec.js +++ b/src/components/Widgets/Markdown/serializers/__tests__/slate.spec.js @@ -13,14 +13,19 @@ describe('slate', () => { }); it('should parse non-text children of mark nodes', () => { - expect(process('**[a](b)**')).toEqual('**[a](b)**'); + expect(process('**a[b](c)d**')).toEqual('**a[b](c)d**\n'); + expect(process('**[a](b)**')).toEqual('**[a](b)**\n'); + expect(process('**![a](b)**')).toEqual('**![a](b)**\n'); + expect(process('_`a`_')).toEqual('_`a`_\n'); }); - it('should condense adjacent, identically styled text', () => { + it('should condense adjacent, identically styled text and inline nodes', () => { expect(process('**a ~~b~~~~c~~**')).toEqual('**a ~~bc~~**\n'); + expect(process('**a ~~b~~~~[c](d)~~**')).toEqual('**a ~~b[c](d)~~**\n'); }); it('should handle nested markdown entities', () => { expect(process('**a**b**c**')).toEqual('**a**b**c**\n'); + expect(process('**a _b_ c**')).toEqual('**a _b_ c**\n'); }); }); diff --git a/src/components/Widgets/Markdown/serializers/remarkSlate.js b/src/components/Widgets/Markdown/serializers/remarkSlate.js index 4e3a8236..00c6942e 100644 --- a/src/components/Widgets/Markdown/serializers/remarkSlate.js +++ b/src/components/Widgets/Markdown/serializers/remarkSlate.js @@ -1,6 +1,33 @@ -import { get, isEmpty, isArray } from 'lodash'; +import { get, isEmpty, isArray, last, flatMap } from 'lodash'; import u from 'unist-builder'; +/** + * A Remark plugin for converting an MDAST to Slate Raw AST. Remark plugins + * return a `transform` function that receives the MDAST as it's first argument. + */ +export default function remarkToSlate() { + return transform; +} + +function transform(node) { + + /** + * Call `transform` recursively on child nodes. + * + * If a node returns a falsey value, filter it out. Some nodes do not + * translate from MDAST to Slate, such as definitions for link/image + * references or footnotes. + */ + const children = !['strong', 'emphasis', 'delete'].includes(node.type) + && !isEmpty(node.children) + && flatMap(node.children, transform).filter(val => val); + + /** + * Run individual nodes through the conversion factory. + */ + return convertNode(node, children); +} + /** * Map of MDAST node types to Slate node types. */ @@ -63,8 +90,7 @@ function createText(value, data) { return {...node, text: value }; } -function convertMarkNode(node, parentMarks = []) { - +function processMarkNode(node, parentMarks = []) { /** * Add the current node's mark type to the marks collected from parent * mark nodes, if any. @@ -75,31 +101,57 @@ function convertMarkNode(node, parentMarks = []) { /** * Set an array to collect sections of text. */ - const ranges = []; + const slateNodes = []; node.children && node.children.forEach(childNode => { - /** * If a text node is a direct child of the current node, it should be * set aside as a range, and all marks that have been collected in the * `marks` array should apply to that specific range. */ if (['html', 'text'].includes(childNode.type)) { - ranges.push({ text: childNode.value, marks }); + slateNodes.push({ text: childNode.value, marks }); return; } /** - * Any non-text child node should be processed as a parent node. The - * recursive results should be pushed into the ranges array. This way, - * every MDAST nested text structure becomes a flat array of ranges - * that can serve as the value of a single Slate Raw text node. + * Process nested style nodes. The recursive results should be pushed into + * the ranges array. This way, every MDAST nested text structure becomes a + * flat array of ranges that can serve as the value of a single Slate Raw + * text node. */ - const nestedRanges = convertMarkNode(childNode, marks); - ranges.push(...nestedRanges); + if (['strong', 'emphasis', 'delete'].includes(childNode.type)) { + const nestedSlateNodes = processMarkNode(childNode, marks); + slateNodes.push(...nestedSlateNodes); + return; + } + + const nestedSlateNode = { ...childNode, data: { marks } }; + slateNodes.push(nestedSlateNode); }); - return ranges; + return slateNodes; +} + +function convertMarkNode(node) { + const slateNodes = processMarkNode(node); + + const convertedSlateNodes = slateNodes.reduce((acc, node, idx, nodes) => { + const lastConvertedNode = last(acc); + if (node.text && lastConvertedNode && lastConvertedNode.ranges) { + lastConvertedNode.ranges.push(node); + } + else if (node.text) { + acc.push(createText([node])); + } + else { + acc.push(transform(node)); + } + + return acc; + }, []); + + return convertedSlateNodes; } /** @@ -186,7 +238,7 @@ function convertNode(node, nodes) { case 'strong': case 'emphasis': case 'delete': { - return createText(convertMarkNode(node)); + return convertMarkNode(node); } /** @@ -258,9 +310,9 @@ function convertNode(node, nodes) { * schema references them in the data object. */ case 'link': { - const { title, url } = node; - const data = { title, url }; - return createInline(typeMap[type], nodes, { data }); + const { title, url, data } = node; + const newData = { ...data, title, url }; + return createInline(typeMap[type], nodes, { data: newData }); } /** @@ -275,29 +327,3 @@ function convertNode(node, nodes) { } } } - - -/** - * A Remark plugin for converting an MDAST to Slate Raw AST. Remark plugins - * return a `transform` function that receives the MDAST as it's first argument. - */ -export default function remarkToSlate() { - function transform(node) { - - /** - * Call `transform` recursively on child nodes. - * - * If a node returns a falsey value, filter it out. Some nodes do not - * translate from MDAST to Slate, such as definitions for link/image - * references or footnotes. - */ - const children = !isEmpty(node.children) && node.children.map(transform).filter(val => val); - - /** - * Run individual nodes through the conversion factory. - */ - return convertNode(node, children); - } - - return transform; -} diff --git a/src/components/Widgets/Markdown/serializers/slateRemark.js b/src/components/Widgets/Markdown/serializers/slateRemark.js index b73b1aa7..aca2d42c 100644 --- a/src/components/Widgets/Markdown/serializers/slateRemark.js +++ b/src/components/Widgets/Markdown/serializers/slateRemark.js @@ -37,6 +37,98 @@ const markMap = { code: 'inlineCode', }; +let shortcodePlugins; + +export default function slateToRemark(raw, opts) { + /** + * Set shortcode plugins in outer scope. + */ + ({ shortcodePlugins } = opts); + + /** + * The Slate Raw AST generally won't have a top level type, so we set it to + * "root" for clarity. + */ + raw.type = 'root'; + + return transform(raw); +} + + +/** + * The transform function mimics the approach of a Remark plugin for + * conformity with the other serialization functions. This function converts + * Slate nodes to MDAST nodes, and recursively calls itself to process child + * nodes to arbitrary depth. + */ +function transform(node) { + /** + * Combine adjacent text and inline nodes before processing so they can + * share marks. + */ + const combinedChildren = node.nodes && combineTextAndInline(node.nodes); + + /** + * Call `transform` recursively on child nodes, and flatten the resulting + * array. + */ + const children = !isEmpty(combinedChildren) && flatMap(combinedChildren, transform); + + /** + * Run individual nodes through conversion factories. + */ + return ['text'].includes(node.kind) + ? convertTextNode(node) + : convertNode(node, children, shortcodePlugins); +} + + +/** + * Includes inline nodes as ranges in adjacent text nodes where appropriate, so + * that mark node combining logic can apply to both text and inline nodes. This + * is necessary because Slate doesn't allow inline nodes to have marks while + * inline nodes in MDAST may be nested within mark nodes. Treating them as if + * they were text is a bit of a necessary hack. + */ +function combineTextAndInline(nodes) { + return nodes.reduce((acc, node, idx, nodes) => { + const prevNode = last(acc); + const prevNodeRanges = get(prevNode, 'ranges'); + const data = node.data || {}; + + /** + * If the previous node has ranges and the current node has marks in data + * (only happens when we place them on inline nodes here in the parser), or + * the current node also has ranges (because the previous node was + * originally an inline node that we've already squashed into a range) + * combine the current node into the previous. + */ + if (!isEmpty(prevNodeRanges) && !isEmpty(data.marks)) { + prevNodeRanges.push({ node, marks: data.marks }); + return acc; + } + + if (!isEmpty(prevNodeRanges) && !isEmpty(node.ranges)) { + prevNode.ranges = prevNodeRanges.concat(node.ranges); + return acc; + } + + /** + * Convert remaining inline nodes to standalone text nodes with ranges. + */ + if (node.kind === 'inline') { + acc.push({ kind: 'text', ranges: [{ node, marks: data.marks }] }); + return acc; + } + + /** + * Only remaining case is an actual text node, can be pushed as is. + */ + acc.push(node); + return acc; + }, []); +} + /** * Slate treats inline code decoration as a standard mark, but MDAST does @@ -124,120 +216,131 @@ function wrapTextWithMarks(textNode, markTypes) { * replaced with multiple MDAST nodes, so the resulting array must be flattened. */ function convertTextNode(node) { - + /** + * Translate soft breaks, which are just newline escape sequences. We track + * them with an `isBreak` boolean in the node data. + */ if (get(node.data, 'isBreak')) { return u('break'); } + /** - * If the Slate text node has no "ranges" property, just return an equivalent - * MDAST node. + * If the Slate text node has a "ranges" property, translate the Slate AST to + * a nested MDAST structure. Otherwise, just return an equivalent MDAST text + * node. */ - if (!node.ranges) { - return u('html', node.text); + if (node.ranges) { + const processedRanges = node.ranges.map(processRanges); + const condensedNodes = processedRanges.reduce(condenseNodesReducer, { nodes: [] }); + return condensedNodes.nodes; } - /** - * Process Slate node ranges in preparation for MDAST transformation. - */ - const processedRanges = node.ranges.map(range => { - /** - * Get an array of the mark types, converted to their MDAST equivalent - * types. - */ - const { marks = [], text } = range; - const markTypes = marks.map(mark => markMap[mark.type]); + if (node.kind === 'inline') { + return transform(node); + } + return u('html', node.text); +} + + +/** + * Process Slate node ranges in preparation for MDAST transformation. + */ +function processRanges(range) { + /** + * Get an array of the mark types, converted to their MDAST equivalent + * types. + */ + const { marks = [], text } = range; + const markTypes = marks.map(mark => markMap[mark.type]); + + if (typeof range.text === 'string') { /** * Code marks must be removed from the marks array, and the presence of a * code mark changes the text node type that should be used. */ const { filteredMarkTypes, textNodeType } = processCodeMark(markTypes); - return { text, marks: filteredMarkTypes, textNodeType }; - }); + } + + return { node: range.node, marks: markTypes }; +} + + +/** + * Slate's AST doesn't group adjacent text nodes with the same marks - a + * change in marks from letter to letter, even if some are in common, results + * in a separate range. For example, given "**a_b_**", transformation to and + * from Slate's AST will result in "**a****_b_**". + * + * MDAST treats styling entities as distinct nodes that contain children, so a + * "strong" node can contain a plain text node with a sibling "emphasis" node, + * which contains more text. This reducer serves to create an optimized nested + * MDAST without the typical redundancies that Slate's AST would produce if + * transformed as-is. The reducer can be called recursively to produce nested + * structures. + */ +function condenseNodesReducer(acc, node, idx, nodes) { + /** + * Skip any nodes that are being processed as children of an MDAST node + * through recursive calls. + */ + if (typeof acc.nextIndex === 'number' && acc.nextIndex > idx) { + return acc; + } /** - * Slate's AST doesn't group adjacent text nodes with the same marks - a - * change in marks from letter to letter, even if some are in common, results - * in a separate range. For example, given "**a_b_**", transformation to and - * from Slate's AST will result in "**a****_b_**". - * - * MDAST treats styling entities as distinct nodes that contain children, so a - * "strong" node can contain a plain text node with a sibling "emphasis" node, - * which contains more text. This reducer serves to create an optimized nested - * MDAST without the typical redundancies that Slate's AST would produce if - * transformed as-is. The reducer can be called recursively to produce nested - * structures. + * Processing for nodes with marks. */ - const nodeGroupReducer = (acc, node, idx, nodes) => { + if (node.marks && node.marks.length > 0) { /** - * Skip any nodes that are being processed as children of an MDAST node - * through recursive calls. + * For each mark on the current node, get the number of consecutive nodes + * (starting with this one) that have the mark. Whichever mark covers the + * most nodes is used as the parent node, and the nodes with that mark are + * processed as children. If the greatest number of consecutive nodes is + * tied between multiple marks, there is no priority as to which goes + * first. */ - if (typeof acc.nextIndex === 'number' && acc.nextIndex > idx) { - return acc; - } + const markLengths = node.marks.map(mark => getMarkLength(mark, nodes.slice(idx))); + const parentMarkLength = last(sortBy(markLengths, 'length')); + const { markType: parentType, length: parentLength } = parentMarkLength; /** - * Processing for nodes with marks. + * Since this and any consecutive nodes with the parent mark are going to + * be processed as children of the parent mark, this reducer should simply + * return the accumulator until after the last node to be covered by the + * new parent node. Here we set the next index that should be processed, + * if any. */ - if (node.marks && node.marks.length > 0) { - - /** - * For each mark on the current node, get the number of consecutive nodes - * (starting with this one) that have the mark. Whichever mark covers the - * most nodes is used as the parent node, and the nodes with that mark are - * processed as children. If the greatest number of consecutive nodes is - * tied between multiple marks, there is no priority as to which goes - * first. - */ - const markLengths = node.marks.map(mark => getMarkLength(mark, nodes.slice(idx))); - const parentMarkLength = last(sortBy(markLengths, 'length')); - const { markType: parentType, length: parentLength } = parentMarkLength; - - /** - * Since this and any consecutive nodes with the parent mark are going to - * be processed as children of the parent mark, this reducer should simply - * return the accumulator until after the last node to be covered by the - * new parent node. Here we set the next index that should be processed, - * if any. - */ - const newNextIndex = idx + parentLength; - - /** - * Get the set of nodes that should be processed as children of the new - * parent mark node, run each through the reducer as children of the - * parent node, and create the parent MDAST node with the resulting - * children. - */ - const children = nodes.slice(idx, newNextIndex); - const denestedChildren = children.map(child => ({ ...child, marks: without(child.marks, parentType) })); - const mdastChildren = denestedChildren.reduce(nodeGroupReducer, { nodes: [], parentType }).nodes; - const mdastNode = u(parentType, mdastChildren); - - return { ...acc, nodes: [ ...acc.nodes, mdastNode ], nextIndex: newNextIndex }; - } + const newNextIndex = idx + parentLength; /** - * Create the base text node, and pass in the array of mark types as data - * (helpful when optimizing/condensing the final structure). + * Get the set of nodes that should be processed as children of the new + * parent mark node, run each through the reducer as children of the + * parent node, and create the parent MDAST node with the resulting + * children. */ - const textNode = u(node.textNodeType, { marks: node.marks }, node.text); + const children = nodes.slice(idx, newNextIndex); + const denestedChildren = children.map(child => ({ ...child, marks: without(child.marks, parentType) })); + const mdastChildren = denestedChildren.reduce(condenseNodesReducer, { nodes: [], parentType }).nodes; + const mdastNode = u(parentType, mdastChildren); - /** - * Recursively wrap the base text node in the individual mark nodes, if - * any exist. - */ - return { ...acc, nodes: [ ...acc.nodes, textNode ] }; - }; - - const nodeGroups = processedRanges.reduce(nodeGroupReducer, { nodes: [] }); + return { ...acc, nodes: [ ...acc.nodes, mdastNode ], nextIndex: newNextIndex }; + } /** - * Since each range will be mapped into an array, we flatten the result to - * return a single array of all nodes. + * Create the base text node, and pass in the array of mark types as data + * (helpful when optimizing/condensing the final structure). */ - return nodeGroups.nodes; + const baseNode = typeof node.text === 'string' + ? u(node.textNodeType, { marks: node.marks }, node.text) + : transform(node.node); + + /** + * Recursively wrap the base text node in the individual mark nodes, if + * any exist. + */ + return { ...acc, nodes: [ ...acc.nodes, baseNode ] }; } @@ -330,8 +433,8 @@ function convertNode(node, children, shortcodePlugins) { */ case 'code': { const value = get(node.nodes, [0, 'text']); - const lang = get(node.data, 'lang'); - return u(typeMap[node.type], { lang }, value); + const { lang, ...data } = get(node, 'data', {}); + return u(typeMap[node.type], { lang, data }, value); } /** @@ -367,8 +470,8 @@ function convertNode(node, children, shortcodePlugins) { * the node for both Slate and Remark schemas. */ case 'link': { - const { url, title } = get(node, 'data', {}); - return u(typeMap[node.type], { url, title }, children); + const { url, title, ...data } = get(node, 'data', {}); + return u(typeMap[node.type], { url, title, data }, children); } /** @@ -377,35 +480,3 @@ function convertNode(node, children, shortcodePlugins) { */ } } - - -export default function slateToRemark(raw, { shortcodePlugins }) { - /** - * The transform function mimics the approach of a Remark plugin for - * conformity with the other serialization functions. This function converts - * Slate nodes to MDAST nodes, and recursively calls itself to process child - * nodes to arbitrary depth. - */ - function transform(node) { - - /** - * Call `transform` recursively on child nodes, and flatten the resulting - * array. - */ - const children = !isEmpty(node.nodes) && flatten(node.nodes.map(transform)); - - /** - * Run individual nodes through conversion factories. - */ - return node.kind === 'text' ? convertTextNode(node) : convertNode(node, children, shortcodePlugins); - } - - /** - * The Slate Raw AST generally won't have a top level type, so we set it to - * "root" for clarity. - */ - raw.type = 'root'; - - const mdast = transform(raw); - return mdast; -} From 70a4a51b97895f5b7c589930d8be7ec8c27df2c5 Mon Sep 17 00:00:00 2001 From: Shawn Erquhart Date: Tue, 19 Sep 2017 16:49:45 -0400 Subject: [PATCH 06/14] add inline image support for editor --- .../VisualEditor/components.js | 14 ++++++++++- .../serializers/__tests__/slate.spec.js | 4 ++++ .../serializers/remarkImagesToText.js | 24 ++++++++++++------- .../Markdown/serializers/remarkSlate.js | 18 ++++++++++++-- .../Markdown/serializers/slateRemark.js | 15 ++++++++++++ 5 files changed, 64 insertions(+), 11 deletions(-) diff --git a/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/components.js b/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/components.js index 0c213884..21c6421e 100644 --- a/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/components.js +++ b/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/components.js @@ -35,7 +35,6 @@ export const NODE_COMPONENTS = { 'numbered-list': props =>
    {props.children}
, 'link': props => { - // Need to wrap this in mark components for any marks found in data. const data = props.node.get('data'); const marks = data.get('marks'); const url = data.get('url'); @@ -47,6 +46,19 @@ export const NODE_COMPONENTS = { }, link); return result; }, + 'image': props => { + const data = props.node.get('data'); + const marks = data.get('marks'); + const url = data.get('url'); + const title = data.get('title'); + const alt = data.get('alt'); + const image = {alt}; + const result = !marks ? image : marks.reduce((acc, mark) => { + const MarkComponent = MARK_COMPONENTS[mark.type]; + return {acc}; + }, image); + return result; + }, 'shortcode': props => { const { attributes, node, state: editorState } = props; const isSelected = editorState.selection.hasFocusIn(node); diff --git a/src/components/Widgets/Markdown/serializers/__tests__/slate.spec.js b/src/components/Widgets/Markdown/serializers/__tests__/slate.spec.js index ae8383e7..768220e8 100644 --- a/src/components/Widgets/Markdown/serializers/__tests__/slate.spec.js +++ b/src/components/Widgets/Markdown/serializers/__tests__/slate.spec.js @@ -28,4 +28,8 @@ describe('slate', () => { expect(process('**a**b**c**')).toEqual('**a**b**c**\n'); expect(process('**a _b_ c**')).toEqual('**a _b_ c**\n'); }); + + it('should parse inline images as images', () => { + expect(process('a ![b](c)')).toEqual('a ![b](c)\n'); + }); }); diff --git a/src/components/Widgets/Markdown/serializers/remarkImagesToText.js b/src/components/Widgets/Markdown/serializers/remarkImagesToText.js index f63d3820..568e8a06 100644 --- a/src/components/Widgets/Markdown/serializers/remarkImagesToText.js +++ b/src/components/Widgets/Markdown/serializers/remarkImagesToText.js @@ -1,18 +1,26 @@ /** * Images must be parsed as shortcodes for asset proxying. This plugin converts - * MDAST image nodes back to text to allow shortcode pattern matching. + * MDAST image nodes back to text to allow shortcode pattern matching. Note that + * this transformation only occurs for images that are the sole child of a top + * level paragraph - any other image is left alone and treated as an inline + * image. */ export default function remarkImagesToText() { return transform; function transform(node) { - const children = node.children ? node.children.map(transform) : node.children; - if (node.type === 'image') { - const alt = node.alt || ''; - const url = node.url || ''; - const title = node.title ? ` "${node.title}"` : ''; - return { type: 'text', value: `![${alt}](${url}${title})` }; - } + const children = node.children.map(child => { + if ( + child.type === 'paragraph' + && child.children.length === 1 + && child.children[0].type === 'image' + ) { + const { alt = '', url = '', title = '' } = child.children[0]; + const value = `![${alt}](${url}${title ? ' title' : ''})`; + child.children = [{ type: 'text', value }]; + } + return child; + }); return { ...node, children }; } } diff --git a/src/components/Widgets/Markdown/serializers/remarkSlate.js b/src/components/Widgets/Markdown/serializers/remarkSlate.js index 00c6942e..ef28f570 100644 --- a/src/components/Widgets/Markdown/serializers/remarkSlate.js +++ b/src/components/Widgets/Markdown/serializers/remarkSlate.js @@ -74,7 +74,7 @@ function createBlock(type, nodes, props = {}) { /** * Create a Slate Block node. */ -function createInline(type, nodes, props = {}) { +function createInline(type, props = {}, nodes) { return { kind: 'inline', type, nodes, ...props }; } @@ -312,9 +312,23 @@ function convertNode(node, nodes) { case 'link': { const { title, url, data } = node; const newData = { ...data, title, url }; - return createInline(typeMap[type], nodes, { data: newData }); + return createInline(typeMap[type], { data: newData }, nodes); } + /** + * Images + * + * Identical to link nodes except for the lack of child nodes and addition + * of alt attribute data MDAST stores the link attributes directly on the + * node, while our Slate schema references them in the data object. + */ + case 'image': { + const { title, url, alt, data } = node; + const newData = { ...data, title, alt, url }; + return createInline(typeMap[type], { isVoid: true, data: newData }); + } + + /** * Tables * diff --git a/src/components/Widgets/Markdown/serializers/slateRemark.js b/src/components/Widgets/Markdown/serializers/slateRemark.js index aca2d42c..522a9b7a 100644 --- a/src/components/Widgets/Markdown/serializers/slateRemark.js +++ b/src/components/Widgets/Markdown/serializers/slateRemark.js @@ -474,6 +474,21 @@ function convertNode(node, children, shortcodePlugins) { return u(typeMap[node.type], { url, title, data }, children); } + /** + * Images + * + * This transformation is almost identical to that of links, except for the + * lack of child nodes and addition of `alt` attribute data. Currently the + * CMS handles block images by shortcode, so this case will only apply to + * inline images, which currently can only occur through raw markdown + * insertion. + */ + case 'image': { + const { url, title, alt, ...data } = get(node, 'data', {}); + return u(typeMap[node.type], { url, title, alt, data }); + } + + /** * No default case is supplied because an unhandled case should never * occur. In the event that it does, let the error throw (for now). From e25ec098f6328ba49e4914b57553649dbe78fee6 Mon Sep 17 00:00:00 2001 From: Shawn Erquhart Date: Tue, 19 Sep 2017 17:32:25 -0400 Subject: [PATCH 07/14] fix editor parsing of styled inline code --- .../serializers/__tests__/slate.spec.js | 1 + .../Markdown/serializers/remarkSlate.js | 67 +++++++++++-------- 2 files changed, 39 insertions(+), 29 deletions(-) diff --git a/src/components/Widgets/Markdown/serializers/__tests__/slate.spec.js b/src/components/Widgets/Markdown/serializers/__tests__/slate.spec.js index 768220e8..19bd61e6 100644 --- a/src/components/Widgets/Markdown/serializers/__tests__/slate.spec.js +++ b/src/components/Widgets/Markdown/serializers/__tests__/slate.spec.js @@ -17,6 +17,7 @@ describe('slate', () => { expect(process('**[a](b)**')).toEqual('**[a](b)**\n'); expect(process('**![a](b)**')).toEqual('**![a](b)**\n'); expect(process('_`a`_')).toEqual('_`a`_\n'); + expect(process('_`a`b_')).toEqual('_`a`b_\n'); }); it('should condense adjacent, identically styled text and inline nodes', () => { diff --git a/src/components/Widgets/Markdown/serializers/remarkSlate.js b/src/components/Widgets/Markdown/serializers/remarkSlate.js index ef28f570..4779ff99 100644 --- a/src/components/Widgets/Markdown/serializers/remarkSlate.js +++ b/src/components/Widgets/Markdown/serializers/remarkSlate.js @@ -98,39 +98,48 @@ function processMarkNode(node, parentMarks = []) { const markType = markMap[node.type]; const marks = markType ? [...parentMarks, { type: markMap[node.type] }] : parentMarks; - /** - * Set an array to collect sections of text. - */ - const slateNodes = []; + const children = flatMap(node.children, childNode => { + switch (childNode.type) { + /** + * If a text node is a direct child of the current node, it should be + * set aside as a range, and all marks that have been collected in the + * `marks` array should apply to that specific range. + */ + case 'html': + case 'text': + return { text: childNode.value, marks }; - node.children && node.children.forEach(childNode => { - /** - * If a text node is a direct child of the current node, it should be - * set aside as a range, and all marks that have been collected in the - * `marks` array should apply to that specific range. - */ - if (['html', 'text'].includes(childNode.type)) { - slateNodes.push({ text: childNode.value, marks }); - return; + /** + * MDAST inline code nodes don't have children, just a text value, similar + * to a text node, so it receives the same treatment as a text node, but we + * first add the inline code mark to the marks array. + */ + case 'inlineCode': { + const childMarks = [ ...marks, { type: markMap['inlineCode'] } ]; + return { text: childNode.value, marks: childMarks }; + } + + /** + * Process nested style nodes. The recursive results should be pushed into + * the ranges array. This way, every MDAST nested text structure becomes a + * flat array of ranges that can serve as the value of a single Slate Raw + * text node. + */ + case 'strong': + case 'emphasis': + case 'delete': + return processMarkNode(childNode, marks); + + /** + * Remaining nodes simply need mark data added to them, and to then be + * added into the cumulative children array. + */ + default: + return { ...childNode, data: { marks } }; } - - /** - * Process nested style nodes. The recursive results should be pushed into - * the ranges array. This way, every MDAST nested text structure becomes a - * flat array of ranges that can serve as the value of a single Slate Raw - * text node. - */ - if (['strong', 'emphasis', 'delete'].includes(childNode.type)) { - const nestedSlateNodes = processMarkNode(childNode, marks); - slateNodes.push(...nestedSlateNodes); - return; - } - - const nestedSlateNode = { ...childNode, data: { marks } }; - slateNodes.push(nestedSlateNode); }); - return slateNodes; + return children; } function convertMarkNode(node) { From 30a762cec10d816185d856ce71885668bae04781 Mon Sep 17 00:00:00 2001 From: Shawn Erquhart Date: Wed, 20 Sep 2017 18:10:46 -0400 Subject: [PATCH 08/14] improve markdown entity escaping for visual editor --- .../__snapshots__/parser.spec.js.snap | 27 +- .../remarkEscapeMarkdownEntities.spec.js | 47 +++- .../serializers/__tests__/slate.spec.js | 6 +- .../remarkEscapeMarkdownEntities.js | 243 ++++++++++++++++-- src/lib/regexHelper.js | 149 +++++++++++ 5 files changed, 451 insertions(+), 21 deletions(-) create mode 100644 src/lib/regexHelper.js diff --git a/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/__tests__/__snapshots__/parser.spec.js.snap b/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/__tests__/__snapshots__/parser.spec.js.snap index 341b42b3..725d69b9 100644 --- a/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/__tests__/__snapshots__/parser.spec.js.snap +++ b/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/__tests__/__snapshots__/parser.spec.js.snap @@ -443,7 +443,7 @@ become anything else.", Object { "data": undefined, "kind": "text", - "text": " is supported. This *can be ", + "text": " is supported. This ", }, Object { "data": undefined, @@ -451,18 +451,37 @@ become anything else.", "ranges": Array [ Object { "marks": Array [ + Object { + "type": "italic", + }, + ], + "text": "can be ", + }, + Object { + "marks": Array [ + Object { + "type": "italic", + }, Object { "type": "bold", }, ], "text": "nested", }, + Object { + "marks": Array [ + Object { + "type": "italic", + }, + ], + "text": " like", + }, ], }, Object { "data": undefined, "kind": "text", - "text": " like* so.", + "text": " so.", }, ], "type": "paragraph", @@ -1289,7 +1308,9 @@ Object { "text": "blue moon", }, Object { - "data": undefined, + "data": Object { + "isBreak": true, + }, "kind": "text", "text": " ", diff --git a/src/components/Widgets/Markdown/serializers/__tests__/remarkEscapeMarkdownEntities.spec.js b/src/components/Widgets/Markdown/serializers/__tests__/remarkEscapeMarkdownEntities.spec.js index 515ae75f..09af85db 100644 --- a/src/components/Widgets/Markdown/serializers/__tests__/remarkEscapeMarkdownEntities.spec.js +++ b/src/components/Widgets/Markdown/serializers/__tests__/remarkEscapeMarkdownEntities.spec.js @@ -13,12 +13,36 @@ const process = text => { describe('remarkEscapeMarkdownEntities', () => { it('should escape common markdown entities', () => { - expect(process('*~`[_')).toEqual('\\*\\~\\`\\[\\_'); + expect(process('*a*')).toEqual('\\*a\\*'); + expect(process('**a**')).toEqual('\\*\\*a\\*\\*'); + expect(process('***a***')).toEqual('\\*\\*\\*a\\*\\*\\*'); + expect(process('_a_')).toEqual('\\_a\\_'); + expect(process('__a__')).toEqual('\\_\\_a\\_\\_'); + expect(process('~~a~~')).toEqual('\\~\\~a\\~\\~'); + expect(process('[]')).toEqual('\\[]'); + expect(process('[]()')).toEqual('\\[]()'); + expect(process('[a](b)')).toEqual('\\[a](b)'); + expect(process('[Test sentence.](https://www.example.com)')) + .toEqual('\\[Test sentence.](https://www.example.com)'); + expect(process('![a](b)')).toEqual('!\\[a](b)'); + }); + + it('should not escape inactive, single markdown entities', () => { + expect(process('a*b')).toEqual('a*b'); + expect(process('_')).toEqual('_'); + expect(process('~')).toEqual('~'); + expect(process('[')).toEqual('['); }); it('should escape leading markdown entities', () => { expect(process('#')).toEqual('\\#'); expect(process('-')).toEqual('\\-'); + expect(process('*')).toEqual('\\*'); + expect(process('>')).toEqual('\\>'); + expect(process('=')).toEqual('\\='); + expect(process('|')).toEqual('\\|'); + expect(process('```')).toEqual('\\`\\``'); + expect(process(' ')).toEqual('\\ '); }); it('should escape leading markdown entities preceded by whitespace', () => { @@ -30,4 +54,25 @@ describe('remarkEscapeMarkdownEntities', () => { expect(process('a# # b #')).toEqual('a# # b #'); expect(process('a- - b -')).toEqual('a- - b -'); }); + + it('should not escape html tags', () => { + expect(process('')).toEqual(''); + expect(process('a b e')).toEqual('a b e'); + }); + + it('should escape the contents of html blocks', () => { + expect(process('
*a*
')).toEqual('
\\*a\\*
'); + }); + + it('should not escape the contents of preformatted html blocks', () => { + expect(process('
*a*
')).toEqual('
*a*
'); + expect(process('')).toEqual(''); + expect(process('')).toEqual(''); + expect(process('
\n*a*\n
')).toEqual('
\n*a*\n
'); + expect(process('a b
*c*
d e')).toEqual('a b
*c*
d e'); + }); + + it('should not parse footnotes', () => { + expect(process('[^a]')).toEqual('\\[^a]'); + }); }); diff --git a/src/components/Widgets/Markdown/serializers/__tests__/slate.spec.js b/src/components/Widgets/Markdown/serializers/__tests__/slate.spec.js index 19bd61e6..b602eca1 100644 --- a/src/components/Widgets/Markdown/serializers/__tests__/slate.spec.js +++ b/src/components/Widgets/Markdown/serializers/__tests__/slate.spec.js @@ -8,7 +8,7 @@ describe('slate', () => { expect(process('a\n')).toEqual('a\n'); }); - it('should not decode encoded html entities in inline code', () => { + xit('should not decode encoded html entities in inline code', () => { expect(process('<div>')).toEqual('<div>\n'); }); @@ -33,4 +33,8 @@ describe('slate', () => { it('should parse inline images as images', () => { expect(process('a ![b](c)')).toEqual('a ![b](c)\n'); }); + + it('should not escape markdown entities in html', () => { + expect(process('*')).toEqual('*\n'); + }); }); diff --git a/src/components/Widgets/Markdown/serializers/remarkEscapeMarkdownEntities.js b/src/components/Widgets/Markdown/serializers/remarkEscapeMarkdownEntities.js index d38bc8bd..8688057a 100644 --- a/src/components/Widgets/Markdown/serializers/remarkEscapeMarkdownEntities.js +++ b/src/components/Widgets/Markdown/serializers/remarkEscapeMarkdownEntities.js @@ -1,3 +1,143 @@ +import { flow, partial, flatMap, flatten } from 'lodash'; +import { joinPatternSegments, combinePatterns, replaceWhen } from '../../../../lib/regexHelper'; + +/** + * Reusable regular expressions segments. + */ +const patternSegments = { + /** + * Matches zero or more HTML attributes followed by the tag close bracket, + * which may be prepended by zero or more spaces. The attributes can use + * single or double quotes and may be prepended by zero or more spaces. + */ + htmlOpeningTagEnd: /(?: *\w+=(?:(?:"[^"]*")|(?:'[^']*')))* *>/, +}; + + +/** + * Patterns matching substrings that should not be escaped. Array values must be + * joined before use. + */ +const nonEscapePatterns = { + /** + * HTML Tags + * + * Matches HTML opening tags and any attributes. Does not check for contents + * between tags or closing tags. + */ + htmlTags: [ + /** + * Matches the beginning of an HTML tag, excluding preformatted tag types. + */ + /<(?!pre|style|script)[\w]+/, + + /** + * Matches attributes. + */ + patternSegments.htmlOpeningTagEnd, + ], + + + /** + * Preformatted HTML Blocks + * + * Matches HTML blocks with preformatted content. The content of these blocks, + * including the tags and attributes, should not be escaped at all. + */ + preformattedHtmlBlocks: [ + /** + * Matches the names of tags known to have preformatted content. The capture + * group is reused when matching the closing tag. + * + * NOTE: this pattern reuses a capture group, and could break if combined with + * other expressions using capture groups. + */ + /<(pre|style|script)/, + + /** + * Matches attributes. + */ + patternSegments.htmlOpeningTagEnd, + + /** + * Allow zero or more of any character (including line breaks) between the + * tags. Match lazily in case of subsequent blocks. + */ + /(.|[\n\r])*?/, + + /** + * Match closing tag via first capture group. + */ + /<\/\1>/, + ], +}; + + +/** + * Escape patterns + * + * Each escape pattern matches a markdown entity and captures up to two + * groups. These patterns must use one of the following formulas: + * + * - Single capture group followed by match content - /(...).../ + * The captured characters should be escaped and the remaining match should + * remain unchanged. + * + * - Two capture groups surrounding matched content - /(...)...(...)/ + * The captured characters in both groups should be escaped and the matched + * characters in between should remain unchanged. + */ +const escapePatterns = [ + /** + * Emphasis/Bold - Asterisk + * + * Match strings surrounded by one or more asterisks on both sides. + */ + /(\*+)[^\*]*(\1)/g, + + /** + * Emphasis - Underscore + * + * Match strings surrounded by a single underscore on both sides followed by + * a word boundary. Remark disregards whether a word boundary exists at the + * beginning of an emphasis node. + */ + /(_)[^_]+(_)\b/g, + + /** + * Bold - Underscore + * + * Match strings surrounded by multiple underscores on both sides. Remark + * disregards the absence of word boundaries on either side of a bold node. + */ + /(_{2,})[^_]*(\1)/g, + + /** + * Strikethrough + * + * Match strings surrounded by multiple tildes on both sides. + */ + /(~+)[^~]*(\1)/g, + + /** + * Inline Code + * + * Match strings surrounded by backticks. + */ + /(`+)[^`]*(\1)/g, + + /** + * Links, Images, References, and Footnotes + * + * Match strings surrounded by brackets. This could be improved to + * specifically match only the exact syntax of each covered entity, but + * doing so through current approach would incur a considerable performance + * penalty. + */ + /(\[)[^\]]*]/g, +]; + + /** * A Remark plugin for escaping markdown entities. * @@ -13,22 +153,6 @@ * stringification. */ export default function remarkEscapeMarkdownEntities() { - /** - * Escape all occurrences of '[', '*', '_', '`', and '~'. - */ - function escapeCommonChars(text) { - return text.replace(/[\[*_`~]/g, '\\$&'); - } - - /** - * Runs escapeCommonChars, and also escapes '#' and '-' when found at the - * beginning of any node's first child node. - */ - function escapeAllChars(text) { - const partiallyEscapedMarkdown = escapeCommonChars(text); - return partiallyEscapedMarkdown.replace(/^\s*([#-])/, '$`\\$1'); - } - const transform = (node, index) => { const children = node.children && node.children.map(transform); @@ -54,3 +178,90 @@ export default function remarkEscapeMarkdownEntities() { return transform; } + + +/** + * Executes both the `escapeCommonChars` and `escapeLeadingChars` functions. + */ +function escapeAllChars(text) { + const partiallyEscapedMarkdown = escapeCommonChars(text); + return escapeLeadingChars(partiallyEscapedMarkdown); +} + + +/** + * escapeLeadingChars + * + * Handles escaping for characters that must be positioned at the beginning of + * the string, such as headers and list items. + * + * Escapes '#', '*', '-', '>', '=', '|', and sequences of 3+ backticks or 4+ + * spaces when found at the beginning of a string, preceded by zero or more + * whitespace characters. + */ +function escapeLeadingChars(text) { + return text.replace(/^\s*([-#*>=|]| {4,}|`{3,})/, '$`\\$1'); +} + + +/** + * escapeCommonChars + * + * Escapes active markdown entities. See escape pattern groups for details on + * which entities are replaced. + */ +function escapeCommonChars(text) { + /** + * Generate new non-escape expression (must happen at execution time because + * we use `RegExp.exec`, which tracks it's own state internally). The + * non-escape expression matches substrings whose contents should not be + * processed for escaping. + */ + const { htmlTags, preformattedHtmlBlocks } = nonEscapePatterns; + const joinedNonEscapePatterns = [ htmlTags, preformattedHtmlBlocks ].map(p => joinPatternSegments(p)); + const nonEscapePattern = combinePatterns(joinedNonEscapePatterns, 'gm'); + + /** + * Create chain of successive escape functions for various markdown entities. + */ + const escapeFunctions = escapePatterns.map(pattern => partial(escapeDelimiters, pattern)); + const escapeAll = flow(escapeFunctions); + + /** + * Use `replaceWhen` to escape markdown entities only within substrings that + * are eligible for escaping. + */ + return replaceWhen(nonEscapePattern, escapeAll, text, true); +} + + +/** + * escapeDelimiters + * + * Executes `String.replace` for a given pattern, but only on the first two + * capture groups. Specifically intended for escaping opening (and optionally + * closing) markdown entities without escaping the content in between. + */ +function escapeDelimiters(pattern, text) { + return text.replace(pattern, (match, start, end) => { + const hasEnd = typeof end === 'string'; + const matchSliceEnd = hasEnd ? match.length - end.length : match.length; + const content = match.slice(start.length, matchSliceEnd); + return `${escape(start)}${content}${hasEnd ? escape(end) : ''}`; + }); +} + + +/** + * escape + * + * Simple replacement function for escaping markdown entities. Prepends every + * character in the received string with a backslash. + */ +function escape(delim) { + let result = ''; + for (const char of delim) { + result += `\\${char}`; + } + return result; +} diff --git a/src/lib/regexHelper.js b/src/lib/regexHelper.js new file mode 100644 index 00000000..eb7e0c30 --- /dev/null +++ b/src/lib/regexHelper.js @@ -0,0 +1,149 @@ +import { last } from 'lodash'; + +/** + * Joins an array of regular expressions into a single expression, without + * altering the received expressions. Only flags passed as an argument will + * apply to the resulting regular expression. + */ +export function joinPatternSegments(patterns, flags = '') { + const pattern = patterns.map(p => p.source).join(''); + return new RegExp(pattern, flags); +} + + +/** + * Combines an array of regular expressions into a single expression, wrapping + * each in a non-capturing group and interposing alternation characters (|) so + * that each expression is executed separately. Only flags passed as an argument + * will apply to the resulting regular expression. + */ +export function combinePatterns(patterns, flags = '') { + const pattern = patterns.map(p => `(?:${p.source})`).join('|'); + return new RegExp(pattern, flags); +} + + +/** + * Modify substrings within a string if they match a (global) pattern. Can be + * inverted to only modify non-matches. + * + * params: + * matchPattern - regexp - a regular expression to check for matches + * replaceFn - function - a replacement function that receives a matched + * substring and returns a replacement substring + * text - string - the string to process + * invertMatchPattern - boolean - if true, non-matching substrings are modified + * instead of matching substrings + */ +export function replaceWhen(matchPattern, replaceFn, text, invertMatchPattern) { + /** + * Splits the string into an array of objects with the following shape: + * + * { + * index: number - the index of the substring within the string + * text: string - the substring + * match: boolean - true if the substring matched `matchPattern` + * } + * + * Loops through matches via recursion (`RegExp.exec` tracks the loop + * internally). + */ + function split(exp, text, acc) { + /** + * Get the next match starting from the end of the last match or start of + * string. + */ + const match = exp.exec(text); + const lastEntry = last(acc); + + /** + * `match` will be null if there are no matches. + */ + if (!match) return acc; + + /** + * If the match is at the beginning of the input string, normalize to a data + * object with the `match` flag set to `true`, and add to the accumulator. + */ + if (match.index === 0) { + addSubstring(acc, 0, match[0], true); + } + + /** + * If there are no entries in the accumulator, convert the substring before + * the match to a data object (without the `match` flag set to true) and + * push to the accumulator, followed by a data object for the matching + * substring. + */ + else if (!lastEntry) { + addSubstring(acc, 0, match.input.slice(0, match.index)); + addSubstring(acc, match.index, match[0], true); + } + + /** + * If the last entry in the accumulator immediately preceded the current + * matched substring in the original string, just add the data object for + * the matching substring to the accumulator. + */ + else if (match.index === lastEntry.index + lastEntry.text.length) { + addSubstring(acc, match.index, match[0], true); + } + + /** + * Convert the substring before the match to a data object (without the + * `match` flag set to true), followed by a data object for the matching + * substring. + */ + else { + const nextIndex = lastEntry.index + lastEntry.text.length; + const nextText = match.input.slice(nextIndex, match.index); + addSubstring(acc, nextIndex, nextText); + addSubstring(acc, match.index, match[0], true); + } + + /** + * Continue executing the expression. + */ + return split(exp, text, acc); + } + + /** + * Factory for converting substrings to data objects and adding to an output + * array. + */ + function addSubstring(arr, index, text, match = false) { + arr.push({ index, text, match }); + } + + /** + * Split the input string to an array of data objects, each representing a + * matching or non-matching string. + */ + const acc = split(matchPattern, text, []); + + /** + * Process the trailing substring after the final match, if one exists. + */ + const lastEntry = last(acc); + if (!lastEntry) return replaceFn(text); + + const nextIndex = lastEntry.index + lastEntry.text.length; + if (text.length > nextIndex) { + acc.push({ index: nextIndex, text: text.slice(nextIndex) }); + } + + /** + * Map the data objects in the accumulator to their string values, modifying + * matched strings with the replacement function. Modifies non-matches if + * `invertMatchPattern` is truthy. + */ + const replacedText = acc.map(entry => { + const isMatch = invertMatchPattern ? !entry.match : entry.match; + return isMatch ? replaceFn(entry.text) : entry.text; + }); + + /** + * Return the joined string. + */ + return replacedText.join(''); +} From fddbf8f7f02c96fa4818cc5d2638e32531cc2491 Mon Sep 17 00:00:00 2001 From: Shawn Erquhart Date: Fri, 22 Sep 2017 13:23:39 -0400 Subject: [PATCH 09/14] disable auto-conversion of markdown urls to links --- src/components/Widgets/Markdown/serializers/index.js | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/components/Widgets/Markdown/serializers/index.js b/src/components/Widgets/Markdown/serializers/index.js index 3f528643..2d1de14a 100644 --- a/src/components/Widgets/Markdown/serializers/index.js +++ b/src/components/Widgets/Markdown/serializers/index.js @@ -64,6 +64,7 @@ export const markdownToRemark = markdown => { */ const parsed = unified() .use(markdownToRemarkPlugin, { fences: true, commonmark: true }) + .use(markdownToRemarkRemoveTokenizers, { inlineTokenizers: ['url'] }) .parse(markdown); /** @@ -79,6 +80,16 @@ export const markdownToRemark = markdown => { }; +/** + * Remove named tokenizers from the parser, effectively deactivating them. + */ +function markdownToRemarkRemoveTokenizers({ inlineTokenizers }) { + inlineTokenizers && inlineTokenizers.forEach(tokenizer => { + delete this.Parser.prototype.inlineTokenizers[tokenizer]; + }); +} + + /** * Serialize an MDAST to a Markdown string. */ From 7bcb16d5e6510610637a08aeefa3045365e77676 Mon Sep 17 00:00:00 2001 From: Shawn Erquhart Date: Mon, 25 Sep 2017 14:19:24 -0400 Subject: [PATCH 10/14] fix nested field updates --- src/components/Widgets/ObjectControl.js | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/components/Widgets/ObjectControl.js b/src/components/Widgets/ObjectControl.js index ae3dd43d..fea66f01 100644 --- a/src/components/Widgets/ObjectControl.js +++ b/src/components/Widgets/ObjectControl.js @@ -21,6 +21,13 @@ export default class ObjectControl extends Component { className: PropTypes.string, }; + /** + * In case the `onChange` function is frozen by a child widget implementation, + * e.g. when debounced, always get the latest object value instead of usin + * `this.props.value` directly. + */ + getObjectValue = () => this.props.value; + controlFor(field) { const { onAddAsset, onRemoveAsset, getAsset, value, onChange } = this.props; if (field.get('widget') === 'hidden') { @@ -38,7 +45,7 @@ export default class ObjectControl extends Component { field, value: fieldValue, onChange: (val, metadata) => { - onChange((value || Map()).set(field.get('name'), val), metadata); + onChange((this.getObjectValue() || Map()).set(field.get('name'), val), metadata); }, onAddAsset, onRemoveAsset, From d3c12db8eff859a14ae5e5d64f26bc2e263e5994 Mon Sep 17 00:00:00 2001 From: Shawn Erquhart Date: Mon, 25 Sep 2017 18:17:44 -0400 Subject: [PATCH 11/14] fix soft break support --- .../__snapshots__/parser.spec.js.snap | 14 +++-- .../MarkdownControl/VisualEditor/plugins.js | 11 +++- .../remarkStripTrailingBreaks.spec.js | 24 ++++++++ .../Widgets/Markdown/serializers/index.js | 4 +- .../Markdown/serializers/remarkSlate.js | 3 +- .../serializers/remarkStripTrailingBreaks.js | 56 +++++++++++++++++++ .../Markdown/serializers/slateRemark.js | 25 +++++---- 7 files changed, 117 insertions(+), 20 deletions(-) create mode 100644 src/components/Widgets/Markdown/serializers/__tests__/remarkStripTrailingBreaks.spec.js create mode 100644 src/components/Widgets/Markdown/serializers/remarkStripTrailingBreaks.js diff --git a/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/__tests__/__snapshots__/parser.spec.js.snap b/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/__tests__/__snapshots__/parser.spec.js.snap index 725d69b9..81948733 100644 --- a/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/__tests__/__snapshots__/parser.spec.js.snap +++ b/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/__tests__/__snapshots__/parser.spec.js.snap @@ -1308,12 +1308,16 @@ Object { "text": "blue moon", }, Object { - "data": Object { - "isBreak": true, - }, - "kind": "text", - "text": " + "kind": "inline", + "nodes": Array [ + Object { + "data": undefined, + "kind": "text", + "text": " ", + }, + ], + "type": "break", }, Object { "data": undefined, diff --git a/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/plugins.js b/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/plugins.js index efb7e029..6418e0d1 100644 --- a/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/plugins.js +++ b/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/plugins.js @@ -1,3 +1,4 @@ +import { Text, Inline } from 'slate'; import SlateSoftBreak from 'slate-soft-break'; import EditList from 'slate-edit-list'; import EditTable from 'slate-edit-table'; @@ -21,7 +22,13 @@ const SoftBreak = (options = {}) => ({ return unwrapped.insertBlock(defaultBlock).apply(); } - return state.transform().insertText('\n').apply(); + const textNode = Text.createFromString('\n'); + const breakNode = Inline.create({ type: 'break', nodes: [ textNode ] }); + return state.transform() + .insertInline(breakNode) + .insertText('') + .collapseToStartOfNextText() + .apply(); } }); @@ -32,7 +39,7 @@ const SoftBreakOpts = { export const SoftBreakConfigured = SoftBreak(SoftBreakOpts); -export const ParagraphSoftBreakConfigured = SlateSoftBreak({ onlyIn: ['paragraph'], shift: true }); +export const ParagraphSoftBreakConfigured = SoftBreak({ onlyIn: ['paragraph'], shift: true }); const BreakToDefaultBlock = ({ onlyIn = [], defaultBlock = 'paragraph' }) => ({ onKeyDown(e, data, state) { diff --git a/src/components/Widgets/Markdown/serializers/__tests__/remarkStripTrailingBreaks.spec.js b/src/components/Widgets/Markdown/serializers/__tests__/remarkStripTrailingBreaks.spec.js new file mode 100644 index 00000000..a7b0500f --- /dev/null +++ b/src/components/Widgets/Markdown/serializers/__tests__/remarkStripTrailingBreaks.spec.js @@ -0,0 +1,24 @@ +import unified from 'unified'; +import u from 'unist-builder'; +import remarkStripTrailingBreaks from '../remarkStripTrailingBreaks'; + +const process = children => { + const tree = u('root', children); + const strippedMdast = unified() + .use(remarkStripTrailingBreaks) + .runSync(tree); + + return strippedMdast.children; +}; + +describe('remarkStripTrailingBreaks', () => { + it('should remove trailing breaks at the end of a block', () => { + expect(process([u('break')])).toEqual([]); + expect(process([u('break'), u('text', '\n \n')])).toEqual([u('text', '\n \n')]); + expect(process([u('text', 'a'), u('break')])).toEqual([u('text', 'a')]); + }); + + it('should not remove trailing breaks that are not at the end of a block', () => { + expect(process([u('break'), u('text', 'a')])).toEqual([u('break'), u('text', 'a')]); + }); +}); diff --git a/src/components/Widgets/Markdown/serializers/index.js b/src/components/Widgets/Markdown/serializers/index.js index 2d1de14a..19e2a0e9 100644 --- a/src/components/Widgets/Markdown/serializers/index.js +++ b/src/components/Widgets/Markdown/serializers/index.js @@ -16,7 +16,8 @@ import remarkToSlate from './remarkSlate'; import remarkSquashReferences from './remarkSquashReferences'; import remarkImagesToText from './remarkImagesToText'; import remarkShortcodes from './remarkShortcodes'; -import remarkEscapeMarkdownEntities from './remarkEscapeMarkdownEntities' +import remarkEscapeMarkdownEntities from './remarkEscapeMarkdownEntities'; +import remarkStripTrailingBreaks from './remarkStripTrailingBreaks'; import slateToRemark from './slateRemark'; import registry from '../../../../lib/registry'; @@ -127,6 +128,7 @@ export const remarkToMarkdown = obj => { */ const escapedMdast = unified() .use(remarkEscapeMarkdownEntities) + .use(remarkStripTrailingBreaks) .runSync(mdast); const markdown = unified() diff --git a/src/components/Widgets/Markdown/serializers/remarkSlate.js b/src/components/Widgets/Markdown/serializers/remarkSlate.js index 4779ff99..7f403089 100644 --- a/src/components/Widgets/Markdown/serializers/remarkSlate.js +++ b/src/components/Widgets/Markdown/serializers/remarkSlate.js @@ -300,7 +300,8 @@ function convertNode(node, nodes) { * line breaks within a text node. */ case 'break': { - return createText('\n', { isBreak: true }); + const textNode = createText('\n'); + return createInline('break', {}, [ textNode ]); } /** diff --git a/src/components/Widgets/Markdown/serializers/remarkStripTrailingBreaks.js b/src/components/Widgets/Markdown/serializers/remarkStripTrailingBreaks.js new file mode 100644 index 00000000..67a37c3b --- /dev/null +++ b/src/components/Widgets/Markdown/serializers/remarkStripTrailingBreaks.js @@ -0,0 +1,56 @@ +import mdastToString from 'mdast-util-to-string'; + +/** + * Removes break nodes that are at the end of a block. + * + * When a trailing double space or backslash is encountered at the end of a + * markdown block, Remark will interpret the character(s) literally, as only + * break entities followed by text qualify as breaks. A manually created MDAST, + * however, may have such entities, and users of visual editors shouldn't see + * these artifacts in resulting markdown. + */ +export default function remarkStripTrailingBreaks() { + const transform = node => { + if (node.children) { + node.children = node.children + .map((child, idx, children) => { + + /** + * Only touch break nodes. Convert all subsequent nodes to their text + * value and exclude the break node if no non-whitespace characters + * are found. + */ + if (child.type === 'break') { + const subsequentNodes = children.slice(idx + 1); + + /** + * Create a small MDAST so that mdastToString can process all + * siblings as children of one node rather than making multiple + * calls. + */ + const fragment = { type: 'root', children: subsequentNodes }; + const subsequentText = mdastToString(fragment); + return subsequentText.trim() ? child : null; + } + + /** + * Always return the child if not a break. + */ + return child; + }) + + /** + * Because some break nodes may be excluded, we filter out the resulting + * null values. + */ + .filter(child => child) + + /** + * Recurse through the MDAST by transforming each individual child node. + */ + .map(transform); + } + return node; + }; + return transform; +}; diff --git a/src/components/Widgets/Markdown/serializers/slateRemark.js b/src/components/Widgets/Markdown/serializers/slateRemark.js index 522a9b7a..7812dfac 100644 --- a/src/components/Widgets/Markdown/serializers/slateRemark.js +++ b/src/components/Widgets/Markdown/serializers/slateRemark.js @@ -21,6 +21,7 @@ const typeMap = { 'table': 'table', 'table-row': 'tableRow', 'table-cell': 'tableCell', + 'break': 'break', 'thematic-break': 'thematicBreak', 'link': 'link', 'image': 'image', @@ -113,6 +114,16 @@ function combineTextAndInline(nodes) { return acc; } + /** + * Break nodes contain a single child text node with a newline character + * for visual purposes in the editor, but Remark break nodes have no + * children, so we remove the child node here. + */ + if (node.type === 'break') { + acc.push({ kind: 'inline', type: 'break' }); + return acc; + } + /** * Convert remaining inline nodes to standalone text nodes with ranges. */ @@ -216,14 +227,6 @@ function wrapTextWithMarks(textNode, markTypes) { * replaced with multiple MDAST nodes, so the resulting array must be flattened. */ function convertTextNode(node) { - /** - * Translate soft breaks, which are just newline escape sequences. We track - * them with an `isBreak` boolean in the node data. - */ - if (get(node.data, 'isBreak')) { - return u('break'); - } - /** * If the Slate text node has a "ranges" property, translate the Slate AST to * a nested MDAST structure. Otherwise, just return an equivalent MDAST text @@ -454,11 +457,11 @@ function convertNode(node, children, shortcodePlugins) { } /** - * Thematic Breaks + * Breaks * - * Thematic breaks don't have children. We parse them separately for - * clarity. + * Breaks don't have children. We parse them separately for clarity. */ + case 'break': case 'thematic-break': { return u(typeMap[node.type]); } From 9e0d7696eea45e29a59ff32349d85bcd0e53117a Mon Sep 17 00:00:00 2001 From: Shawn Erquhart Date: Tue, 26 Sep 2017 15:22:34 -0400 Subject: [PATCH 12/14] stop remark from decoding HTML entities --- .../__tests__/remarkAllowHtmlEntities.spec.js | 24 ++++++++ .../serializers/__tests__/slate.spec.js | 2 +- .../Widgets/Markdown/serializers/index.js | 2 + .../serializers/remarkAllowHtmlEntities.js | 59 +++++++++++++++++++ 4 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 src/components/Widgets/Markdown/serializers/__tests__/remarkAllowHtmlEntities.spec.js create mode 100644 src/components/Widgets/Markdown/serializers/remarkAllowHtmlEntities.js diff --git a/src/components/Widgets/Markdown/serializers/__tests__/remarkAllowHtmlEntities.spec.js b/src/components/Widgets/Markdown/serializers/__tests__/remarkAllowHtmlEntities.spec.js new file mode 100644 index 00000000..a7eed4c4 --- /dev/null +++ b/src/components/Widgets/Markdown/serializers/__tests__/remarkAllowHtmlEntities.spec.js @@ -0,0 +1,24 @@ +import unified from 'unified'; +import markdownToRemark from 'remark-parse'; +import remarkAllowHtmlEntities from '../remarkAllowHtmlEntities'; + +const process = markdown => { + const mdast = unified().use(markdownToRemark).use(remarkAllowHtmlEntities).parse(markdown); + + /** + * The MDAST will look like: + * + * { type: 'root', children: [ + * { type: 'paragraph', children: [ + * // results here + * ]} + * ]} + */ + return mdast.children[0].children[0].value; +}; + +describe('remarkAllowHtmlEntities', () => { + it('should not decode HTML entities', () => { + expect(process('<div>')).toEqual('<div>'); + }); +}); diff --git a/src/components/Widgets/Markdown/serializers/__tests__/slate.spec.js b/src/components/Widgets/Markdown/serializers/__tests__/slate.spec.js index b602eca1..fd3bda41 100644 --- a/src/components/Widgets/Markdown/serializers/__tests__/slate.spec.js +++ b/src/components/Widgets/Markdown/serializers/__tests__/slate.spec.js @@ -8,7 +8,7 @@ describe('slate', () => { expect(process('a\n')).toEqual('a\n'); }); - xit('should not decode encoded html entities in inline code', () => { + it('should not decode encoded html entities in inline code', () => { expect(process('<div>')).toEqual('<div>\n'); }); diff --git a/src/components/Widgets/Markdown/serializers/index.js b/src/components/Widgets/Markdown/serializers/index.js index 19e2a0e9..5b00150d 100644 --- a/src/components/Widgets/Markdown/serializers/index.js +++ b/src/components/Widgets/Markdown/serializers/index.js @@ -18,6 +18,7 @@ import remarkImagesToText from './remarkImagesToText'; import remarkShortcodes from './remarkShortcodes'; import remarkEscapeMarkdownEntities from './remarkEscapeMarkdownEntities'; import remarkStripTrailingBreaks from './remarkStripTrailingBreaks'; +import remarkAllowHtmlEntities from './remarkAllowHtmlEntities'; import slateToRemark from './slateRemark'; import registry from '../../../../lib/registry'; @@ -66,6 +67,7 @@ export const markdownToRemark = markdown => { const parsed = unified() .use(markdownToRemarkPlugin, { fences: true, commonmark: true }) .use(markdownToRemarkRemoveTokenizers, { inlineTokenizers: ['url'] }) + .use(remarkAllowHtmlEntities) .parse(markdown); /** diff --git a/src/components/Widgets/Markdown/serializers/remarkAllowHtmlEntities.js b/src/components/Widgets/Markdown/serializers/remarkAllowHtmlEntities.js new file mode 100644 index 00000000..62e4d3be --- /dev/null +++ b/src/components/Widgets/Markdown/serializers/remarkAllowHtmlEntities.js @@ -0,0 +1,59 @@ +export default function remarkAllowHtmlEntities() { + this.Parser.prototype.inlineTokenizers.text = text; + + /** + * This is a port of the `remark-parse` text tokenizer, adapted to exclude + * HTML entity decoding. + */ + function text(eat, value, silent) { + var self = this; + var methods; + var tokenizers; + var index; + var length; + var subvalue; + var position; + var tokenizer; + var name; + var min; + var now; + + /* istanbul ignore if - never used (yet) */ + if (silent) { + return true; + } + + methods = self.inlineMethods; + length = methods.length; + tokenizers = self.inlineTokenizers; + index = -1; + min = value.length; + + while (++index < length) { + name = methods[index]; + + if (name === 'text' || !tokenizers[name]) { + continue; + } + + tokenizer = tokenizers[name].locator; + + if (!tokenizer) { + eat.file.fail('Missing locator: `' + name + '`'); + } + + position = tokenizer.call(self, value, 1); + + if (position !== -1 && position < min) { + min = position; + } + } + + subvalue = value.slice(0, min); + + eat(subvalue)({ + type: 'text', + value: subvalue, + }); + } +}; From 516a5e4c7fe6309867b63b2a77e3b05f4e6eda24 Mon Sep 17 00:00:00 2001 From: Shawn Erquhart Date: Tue, 26 Sep 2017 15:52:50 -0400 Subject: [PATCH 13/14] improve markdown editor serialization debounce --- .../Widgets/Markdown/MarkdownControl/RawEditor/index.js | 8 +++----- .../Markdown/MarkdownControl/VisualEditor/index.js | 8 +++----- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/src/components/Widgets/Markdown/MarkdownControl/RawEditor/index.js b/src/components/Widgets/Markdown/MarkdownControl/RawEditor/index.js index d3ccda2d..80b4295a 100644 --- a/src/components/Widgets/Markdown/MarkdownControl/RawEditor/index.js +++ b/src/components/Widgets/Markdown/MarkdownControl/RawEditor/index.js @@ -22,16 +22,14 @@ export default class RawEditor extends React.Component { this.setState({ editorState }); } - onChange = debounce(this.props.onChange, 250); - /** * When the document value changes, serialize from Slate's AST back to plain * text (which is Markdown) and pass that up as the new value. */ - handleDocumentChange = (doc, editorState) => { + handleDocumentChange = debounce((doc, editorState) => { const value = Plain.serialize(editorState); - this.onChange(value); - }; + this.props.onChange(value); + }, 150); /** * If a paste contains plain text, deserialize it to Slate's AST and insert diff --git a/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/index.js b/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/index.js index ee18459d..34d2ceab 100644 --- a/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/index.js +++ b/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/index.js @@ -44,14 +44,12 @@ export default class Editor extends Component { return state.transform().insertFragment(doc).apply(); } - onChange = debounce(this.props.onChange, 250); - - handleDocumentChange = (doc, editorState) => { + handleDocumentChange = debounce((doc, editorState) => { const raw = Raw.serialize(editorState, { terse: true }); const plugins = this.state.shortcodePlugins; const markdown = slateToMarkdown(raw, plugins); - this.onChange(markdown); - }; + this.props.onChange(markdown); + }, 150); hasMark = type => this.state.editorState.marks.some(mark => mark.type === type); hasBlock = type => this.state.editorState.blocks.some(node => node.type === type); From e2232e1067d0b3f84d3f25d04632dc059dcd24e8 Mon Sep 17 00:00:00 2001 From: Shawn Erquhart Date: Tue, 26 Sep 2017 16:35:01 -0400 Subject: [PATCH 14/14] improve markdown entity escaping perf --- .../remarkEscapeMarkdownEntities.js | 183 +++++++++--------- src/lib/regexHelper.js | 14 +- 2 files changed, 100 insertions(+), 97 deletions(-) diff --git a/src/components/Widgets/Markdown/serializers/remarkEscapeMarkdownEntities.js b/src/components/Widgets/Markdown/serializers/remarkEscapeMarkdownEntities.js index 8688057a..6e7903c5 100644 --- a/src/components/Widgets/Markdown/serializers/remarkEscapeMarkdownEntities.js +++ b/src/components/Widgets/Markdown/serializers/remarkEscapeMarkdownEntities.js @@ -1,4 +1,4 @@ -import { flow, partial, flatMap, flatten } from 'lodash'; +import { flow, partial, flatMap, flatten, map } from 'lodash'; import { joinPatternSegments, combinePatterns, replaceWhen } from '../../../../lib/regexHelper'; /** @@ -138,6 +138,100 @@ const escapePatterns = [ ]; +/** + * Generate new non-escape expression. The non-escape expression matches + * substrings whose contents should not be processed for escaping. + */ +const joinedNonEscapePatterns = map(nonEscapePatterns, pattern => { + return new RegExp(joinPatternSegments(pattern)); +}); +const nonEscapePattern = combinePatterns(joinedNonEscapePatterns); + + +/** + * Create chain of successive escape functions for various markdown entities. + */ +const escapeFunctions = escapePatterns.map(pattern => partial(escapeDelimiters, pattern)); +const escapeAll = flow(escapeFunctions); + + +/** + * Executes both the `escapeCommonChars` and `escapeLeadingChars` functions. + */ +function escapeAllChars(text) { + const partiallyEscapedMarkdown = escapeCommonChars(text); + return escapeLeadingChars(partiallyEscapedMarkdown); +} + + +/** + * escapeLeadingChars + * + * Handles escaping for characters that must be positioned at the beginning of + * the string, such as headers and list items. + * + * Escapes '#', '*', '-', '>', '=', '|', and sequences of 3+ backticks or 4+ + * spaces when found at the beginning of a string, preceded by zero or more + * whitespace characters. + */ +function escapeLeadingChars(text) { + return text.replace(/^\s*([-#*>=|]| {4,}|`{3,})/, '$`\\$1'); +} + + +/** + * escapeCommonChars + * + * Escapes active markdown entities. See escape pattern groups for details on + * which entities are replaced. + */ +function escapeCommonChars(text) { + /** + * Generate new non-escape expression (must happen at execution time because + * we use `RegExp.exec`, which tracks it's own state internally). + */ + const nonEscapeExpression = new RegExp(nonEscapePattern, 'gm'); + + /** + * Use `replaceWhen` to escape markdown entities only within substrings that + * are eligible for escaping. + */ + return replaceWhen(nonEscapeExpression, escapeAll, text, true); +} + + +/** + * escapeDelimiters + * + * Executes `String.replace` for a given pattern, but only on the first two + * capture groups. Specifically intended for escaping opening (and optionally + * closing) markdown entities without escaping the content in between. + */ +function escapeDelimiters(pattern, text) { + return text.replace(pattern, (match, start, end) => { + const hasEnd = typeof end === 'string'; + const matchSliceEnd = hasEnd ? match.length - end.length : match.length; + const content = match.slice(start.length, matchSliceEnd); + return `${escape(start)}${content}${hasEnd ? escape(end) : ''}`; + }); +} + + +/** + * escape + * + * Simple replacement function for escaping markdown entities. Prepends every + * character in the received string with a backslash. + */ +function escape(delim) { + let result = ''; + for (const char of delim) { + result += `\\${char}`; + } + return result; +} + + /** * A Remark plugin for escaping markdown entities. * @@ -178,90 +272,3 @@ export default function remarkEscapeMarkdownEntities() { return transform; } - - -/** - * Executes both the `escapeCommonChars` and `escapeLeadingChars` functions. - */ -function escapeAllChars(text) { - const partiallyEscapedMarkdown = escapeCommonChars(text); - return escapeLeadingChars(partiallyEscapedMarkdown); -} - - -/** - * escapeLeadingChars - * - * Handles escaping for characters that must be positioned at the beginning of - * the string, such as headers and list items. - * - * Escapes '#', '*', '-', '>', '=', '|', and sequences of 3+ backticks or 4+ - * spaces when found at the beginning of a string, preceded by zero or more - * whitespace characters. - */ -function escapeLeadingChars(text) { - return text.replace(/^\s*([-#*>=|]| {4,}|`{3,})/, '$`\\$1'); -} - - -/** - * escapeCommonChars - * - * Escapes active markdown entities. See escape pattern groups for details on - * which entities are replaced. - */ -function escapeCommonChars(text) { - /** - * Generate new non-escape expression (must happen at execution time because - * we use `RegExp.exec`, which tracks it's own state internally). The - * non-escape expression matches substrings whose contents should not be - * processed for escaping. - */ - const { htmlTags, preformattedHtmlBlocks } = nonEscapePatterns; - const joinedNonEscapePatterns = [ htmlTags, preformattedHtmlBlocks ].map(p => joinPatternSegments(p)); - const nonEscapePattern = combinePatterns(joinedNonEscapePatterns, 'gm'); - - /** - * Create chain of successive escape functions for various markdown entities. - */ - const escapeFunctions = escapePatterns.map(pattern => partial(escapeDelimiters, pattern)); - const escapeAll = flow(escapeFunctions); - - /** - * Use `replaceWhen` to escape markdown entities only within substrings that - * are eligible for escaping. - */ - return replaceWhen(nonEscapePattern, escapeAll, text, true); -} - - -/** - * escapeDelimiters - * - * Executes `String.replace` for a given pattern, but only on the first two - * capture groups. Specifically intended for escaping opening (and optionally - * closing) markdown entities without escaping the content in between. - */ -function escapeDelimiters(pattern, text) { - return text.replace(pattern, (match, start, end) => { - const hasEnd = typeof end === 'string'; - const matchSliceEnd = hasEnd ? match.length - end.length : match.length; - const content = match.slice(start.length, matchSliceEnd); - return `${escape(start)}${content}${hasEnd ? escape(end) : ''}`; - }); -} - - -/** - * escape - * - * Simple replacement function for escaping markdown entities. Prepends every - * character in the received string with a backslash. - */ -function escape(delim) { - let result = ''; - for (const char of delim) { - result += `\\${char}`; - } - return result; -} diff --git a/src/lib/regexHelper.js b/src/lib/regexHelper.js index eb7e0c30..c4b14e41 100644 --- a/src/lib/regexHelper.js +++ b/src/lib/regexHelper.js @@ -2,24 +2,20 @@ import { last } from 'lodash'; /** * Joins an array of regular expressions into a single expression, without - * altering the received expressions. Only flags passed as an argument will - * apply to the resulting regular expression. + * altering the received expressions. */ -export function joinPatternSegments(patterns, flags = '') { - const pattern = patterns.map(p => p.source).join(''); - return new RegExp(pattern, flags); +export function joinPatternSegments(patterns) { + return patterns.map(p => p.source).join(''); } /** * Combines an array of regular expressions into a single expression, wrapping * each in a non-capturing group and interposing alternation characters (|) so - * that each expression is executed separately. Only flags passed as an argument - * will apply to the resulting regular expression. + * that each expression is executed separately. */ export function combinePatterns(patterns, flags = '') { - const pattern = patterns.map(p => `(?:${p.source})`).join('|'); - return new RegExp(pattern, flags); + return patterns.map(p => `(?:${p.source})`).join('|'); }