handle markdown styled inline nodes

Slate does not allow inline nodes like links and images to have marks (like strong, emphasis). This commit changes the parsers to process these nodes as if they were text nodes so that marks are handled.
2017-09-14 17:50:08 -04:00
parent 2d3bf9b3fc
commit e937e8e626
3 changed files with 268 additions and 166 deletions
--- a/src/components/Widgets/Markdown/serializers/tests/slate.spec.js
+++ b/src/components/Widgets/Markdown/serializers/tests/slate.spec.js
@ -13,14 +13,19 @@ describe('slate', () => {
  });

  it('should parse non-text children of mark nodes', () => {
-    expect(process('**[a](b)**')).toEqual('**[a](b)**');
+    expect(process('**a[b](c)d**')).toEqual('**a[b](c)d**\n');
+    expect(process('**[a](b)**')).toEqual('**[a](b)**\n');
+    expect(process('**![a](b)**')).toEqual('**![a](b)**\n');
+    expect(process('_`a`_')).toEqual('_`a`_\n');
  });

-  it('should condense adjacent, identically styled text', () => {
+  it('should condense adjacent, identically styled text and inline nodes', () => {
    expect(process('**a ~~b~~~~c~~**')).toEqual('**a ~~bc~~**\n');
+    expect(process('**a ~~b~~~~[c](d)~~**')).toEqual('**a ~~b[c](d)~~**\n');
  });

  it('should handle nested markdown entities', () => {
    expect(process('**a**b**c**')).toEqual('**a**b**c**\n');
+    expect(process('**a _b_ c**')).toEqual('**a _b_ c**\n');
  });
 });
--- a/src/components/Widgets/Markdown/serializers/remarkSlate.js
+++ b/src/components/Widgets/Markdown/serializers/remarkSlate.js
@ -1,6 +1,33 @@
-import { get, isEmpty, isArray } from 'lodash';
+import { get, isEmpty, isArray, last, flatMap } from 'lodash';
 import u from 'unist-builder';

+/**
+ * A Remark plugin for converting an MDAST to Slate Raw AST. Remark plugins
+ * return a `transform` function that receives the MDAST as it's first argument.
+ */
+export default function remarkToSlate() {
+  return transform;
+}
+
+function transform(node) {
+
+  /**
+   * Call `transform` recursively on child nodes.
+   *
+   * If a node returns a falsey value, filter it out. Some nodes do not
+   * translate from MDAST to Slate, such as definitions for link/image
+   * references or footnotes.
+   */
+  const children = !['strong', 'emphasis', 'delete'].includes(node.type)
+    && !isEmpty(node.children)
+    && flatMap(node.children, transform).filter(val => val);
+
+  /**
+   * Run individual nodes through the conversion factory.
+   */
+  return convertNode(node, children);
+}
+
 /**
 * Map of MDAST node types to Slate node types.
 */
@ -63,8 +90,7 @@ function createText(value, data) {
  return {...node, text: value };
 }

-function convertMarkNode(node, parentMarks = []) {
-
+function processMarkNode(node, parentMarks = []) {
  /**
   * Add the current node's mark type to the marks collected from parent
   * mark nodes, if any.
@ -75,31 +101,57 @@ function convertMarkNode(node, parentMarks = []) {
  /**
   * Set an array to collect sections of text.
   */
-  const ranges = [];
+  const slateNodes = [];

  node.children && node.children.forEach(childNode => {
-
    /**
     * If a text node is a direct child of the current node, it should be
     * set aside as a range, and all marks that have been collected in the
     * `marks` array should apply to that specific range.
     */
    if (['html', 'text'].includes(childNode.type)) {
-      ranges.push({ text: childNode.value, marks });
+      slateNodes.push({ text: childNode.value, marks });
      return;
    }

    /**
-     * Any non-text child node should be processed as a parent node. The
-     * recursive results should be pushed into the ranges array. This way,
-     * every MDAST nested text structure becomes a flat array of ranges
-     * that can serve as the value of a single Slate Raw text node.
+     * Process nested style nodes. The recursive results should be pushed into
+     * the ranges array. This way, every MDAST nested text structure becomes a
+     * flat array of ranges that can serve as the value of a single Slate Raw
+     * text node.
     */
-    const nestedRanges = convertMarkNode(childNode, marks);
-    ranges.push(...nestedRanges);
+    if (['strong', 'emphasis', 'delete'].includes(childNode.type)) {
+      const nestedSlateNodes = processMarkNode(childNode, marks);
+      slateNodes.push(...nestedSlateNodes);
+      return;
+    }
+
+    const nestedSlateNode = { ...childNode, data: { marks } };
+    slateNodes.push(nestedSlateNode);
  });

-  return ranges;
+  return slateNodes;
+}
+
+function convertMarkNode(node) {
+  const slateNodes = processMarkNode(node);
+
+  const convertedSlateNodes = slateNodes.reduce((acc, node, idx, nodes) => {
+    const lastConvertedNode = last(acc);
+    if (node.text && lastConvertedNode && lastConvertedNode.ranges) {
+      lastConvertedNode.ranges.push(node);
+    }
+    else if (node.text) {
+      acc.push(createText([node]));
+    }
+    else {
+      acc.push(transform(node));
+    }
+
+    return acc;
+  }, []);
+
+  return convertedSlateNodes;
 }

 /**
@ -186,7 +238,7 @@ function convertNode(node, nodes) {
    case 'strong':
    case 'emphasis':
    case 'delete': {
-      return createText(convertMarkNode(node));
+      return convertMarkNode(node);
    }

    /**
@ -258,9 +310,9 @@ function convertNode(node, nodes) {
     * schema references them in the data object.
     */
    case 'link': {
-      const { title, url } = node;
-      const data = { title, url };
-      return createInline(typeMap[type], nodes, { data });
+      const { title, url, data } = node;
+      const newData = { ...data, title, url };
+      return createInline(typeMap[type], nodes, { data: newData });
    }

    /**
@ -275,29 +327,3 @@ function convertNode(node, nodes) {
    }
  }
 }
-
-
-/**
- * A Remark plugin for converting an MDAST to Slate Raw AST. Remark plugins
- * return a `transform` function that receives the MDAST as it's first argument.
- */
-export default function remarkToSlate() {
-  function transform(node) {
-
-    /**
-     * Call `transform` recursively on child nodes.
-     *
-     * If a node returns a falsey value, filter it out. Some nodes do not
-     * translate from MDAST to Slate, such as definitions for link/image
-     * references or footnotes.
-     */
-    const children = !isEmpty(node.children) && node.children.map(transform).filter(val => val);
-
-    /**
-     * Run individual nodes through the conversion factory.
-     */
-    return convertNode(node, children);
-  }
-
-  return transform;
-}
--- a/src/components/Widgets/Markdown/serializers/slateRemark.js
+++ b/src/components/Widgets/Markdown/serializers/slateRemark.js
@ -37,6 +37,98 @@ const markMap = {
  code: 'inlineCode',
 };

+let shortcodePlugins;
+
+export default function slateToRemark(raw, opts) {
+  /**
+   * Set shortcode plugins in outer scope.
+   */
+  ({ shortcodePlugins } = opts);
+
+  /**
+   * The Slate Raw AST generally won't have a top level type, so we set it to
+   * "root" for clarity.
+   */
+  raw.type = 'root';
+
+  return transform(raw);
+}
+
+
+/**
+ * The transform function mimics the approach of a Remark plugin for
+ * conformity with the other serialization functions. This function converts
+ * Slate nodes to MDAST nodes, and recursively calls itself to process child
+ * nodes to arbitrary depth.
+ */
+function transform(node) {
+  /**
+   * Combine adjacent text and inline nodes before processing so they can
+   * share marks.
+   */
+  const combinedChildren = node.nodes && combineTextAndInline(node.nodes);
+
+  /**
+   * Call `transform` recursively on child nodes, and flatten the resulting
+   * array.
+   */
+  const children = !isEmpty(combinedChildren) && flatMap(combinedChildren, transform);
+
+  /**
+   * Run individual nodes through conversion factories.
+   */
+  return ['text'].includes(node.kind)
+    ? convertTextNode(node)
+    : convertNode(node, children, shortcodePlugins);
+}
+
+
+/**
+ * Includes inline nodes as ranges in adjacent text nodes where appropriate, so
+ * that mark node combining logic can apply to both text and inline nodes. This
+ * is necessary because Slate doesn't allow inline nodes to have marks while
+ * inline nodes in MDAST may be nested within mark nodes. Treating them as if
+ * they were text is a bit of a necessary hack.
+ */
+function combineTextAndInline(nodes) {
+  return nodes.reduce((acc, node, idx, nodes) => {
+    const prevNode = last(acc);
+    const prevNodeRanges = get(prevNode, 'ranges');
+    const data = node.data || {};
+
+    /**
+     * If the previous node has ranges and the current node has marks in data
+     * (only happens when we place them on inline nodes here in the parser), or
+     * the current node also has ranges (because the previous node was
+     * originally an inline node that we've already squashed into a range)
+     * combine the current node into the previous.
+     */
+    if (!isEmpty(prevNodeRanges) && !isEmpty(data.marks)) {
+      prevNodeRanges.push({ node, marks: data.marks });
+      return acc;
+    }
+
+    if (!isEmpty(prevNodeRanges) && !isEmpty(node.ranges)) {
+      prevNode.ranges = prevNodeRanges.concat(node.ranges);
+      return acc;
+    }
+
+    /**
+     * Convert remaining inline nodes to standalone text nodes with ranges.
+     */
+    if (node.kind === 'inline') {
+      acc.push({ kind: 'text', ranges: [{ node, marks: data.marks }] });
+      return acc;
+    }
+
+    /**
+     * Only remaining case is an actual text node, can be pushed as is.
+     */
+    acc.push(node);
+    return acc;
+  }, []);
+}
+

 /**
 * Slate treats inline code decoration as a standard mark, but MDAST does
@ -124,120 +216,131 @@ function wrapTextWithMarks(textNode, markTypes) {
 * replaced with multiple MDAST nodes, so the resulting array must be flattened.
 */
 function convertTextNode(node) {
-
+  /**
+   * Translate soft breaks, which are just newline escape sequences. We track
+   * them with an `isBreak` boolean in the node data.
+   */
  if (get(node.data, 'isBreak')) {
    return u('break');
  }
+
  /**
-   * If the Slate text node has no "ranges" property, just return an equivalent
-   * MDAST node.
+   * If the Slate text node has a "ranges" property, translate the Slate AST to
+   * a nested MDAST structure. Otherwise, just return an equivalent MDAST text
+   * node.
   */
-  if (!node.ranges) {
-    return u('html', node.text);
+  if (node.ranges) {
+    const processedRanges = node.ranges.map(processRanges);
+    const condensedNodes = processedRanges.reduce(condenseNodesReducer, { nodes: [] });
+    return condensedNodes.nodes;
  }

-  /**
-   * Process Slate node ranges in preparation for MDAST transformation.
-   */
-  const processedRanges = node.ranges.map(range => {
-    /**
-     * Get an array of the mark types, converted to their MDAST equivalent
-     * types.
-     */
-    const { marks = [], text } = range;
-    const markTypes = marks.map(mark => markMap[mark.type]);
+  if (node.kind === 'inline') {
+    return transform(node);
+  }

+  return u('html', node.text);
+}
+
+
+/**
+ * Process Slate node ranges in preparation for MDAST transformation.
+ */
+function processRanges(range) {
+  /**
+   * Get an array of the mark types, converted to their MDAST equivalent
+   * types.
+   */
+  const { marks = [], text } = range;
+  const markTypes = marks.map(mark => markMap[mark.type]);
+
+  if (typeof range.text === 'string') {
    /**
     * Code marks must be removed from the marks array, and the presence of a
     * code mark changes the text node type that should be used.
     */
    const { filteredMarkTypes, textNodeType } = processCodeMark(markTypes);
-
    return { text, marks: filteredMarkTypes, textNodeType };
-  });
+  }
+
+  return { node: range.node, marks: markTypes };
+}
+
+
+/**
+ * Slate's AST doesn't group adjacent text nodes with the same marks - a
+ * change in marks from letter to letter, even if some are in common, results
+ * in a separate range. For example, given "**a_b_**", transformation to and
+ * from Slate's AST will result in "**a****_b_**".
+ *
+ * MDAST treats styling entities as distinct nodes that contain children, so a
+ * "strong" node can contain a plain text node with a sibling "emphasis" node,
+ * which contains more text. This reducer serves to create an optimized nested
+ * MDAST without the typical redundancies that Slate's AST would produce if
+ * transformed as-is. The reducer can be called recursively to produce nested
+ * structures.
+ */
+function condenseNodesReducer(acc, node, idx, nodes) {
+  /**
+   * Skip any nodes that are being processed as children of an MDAST node
+   * through recursive calls.
+   */
+  if (typeof acc.nextIndex === 'number' && acc.nextIndex > idx) {
+    return acc;
+  }

  /**
-   * Slate's AST doesn't group adjacent text nodes with the same marks - a
-   * change in marks from letter to letter, even if some are in common, results
-   * in a separate range. For example, given "**a_b_**", transformation to and
-   * from Slate's AST will result in "**a****_b_**".
-   *
-   * MDAST treats styling entities as distinct nodes that contain children, so a
-   * "strong" node can contain a plain text node with a sibling "emphasis" node,
-   * which contains more text. This reducer serves to create an optimized nested
-   * MDAST without the typical redundancies that Slate's AST would produce if
-   * transformed as-is. The reducer can be called recursively to produce nested
-   * structures.
+   * Processing for nodes with marks.
   */
-  const nodeGroupReducer = (acc, node, idx, nodes) => {
+  if (node.marks && node.marks.length > 0) {
    /**
-     * Skip any nodes that are being processed as children of an MDAST node
-     * through recursive calls.
+     * For each mark on the current node, get the number of consecutive nodes
+     * (starting with this one) that have the mark. Whichever mark covers the
+     * most nodes is used as the parent node, and the nodes with that mark are
+     * processed as children. If the greatest number of consecutive nodes is
+     * tied between multiple marks, there is no priority as to which goes
+     * first.
     */
-    if (typeof acc.nextIndex === 'number' && acc.nextIndex > idx) {
-      return acc;
-    }
+    const markLengths = node.marks.map(mark => getMarkLength(mark, nodes.slice(idx)));
+    const parentMarkLength = last(sortBy(markLengths, 'length'));
+    const { markType: parentType, length: parentLength } = parentMarkLength;

    /**
-     * Processing for nodes with marks.
+     * Since this and any consecutive nodes with the parent mark are going to
+     * be processed as children of the parent mark, this reducer should simply
+     * return the accumulator until after the last node to be covered by the
+     * new parent node. Here we set the next index that should be processed,
+     * if any.
     */
-    if (node.marks && node.marks.length > 0) {
-
-      /**
-       * For each mark on the current node, get the number of consecutive nodes
-       * (starting with this one) that have the mark. Whichever mark covers the
-       * most nodes is used as the parent node, and the nodes with that mark are
-       * processed as children. If the greatest number of consecutive nodes is
-       * tied between multiple marks, there is no priority as to which goes
-       * first.
-       */
-      const markLengths = node.marks.map(mark => getMarkLength(mark, nodes.slice(idx)));
-      const parentMarkLength = last(sortBy(markLengths, 'length'));
-      const { markType: parentType, length: parentLength } = parentMarkLength;
-
-      /**
-       * Since this and any consecutive nodes with the parent mark are going to
-       * be processed as children of the parent mark, this reducer should simply
-       * return the accumulator until after the last node to be covered by the
-       * new parent node. Here we set the next index that should be processed,
-       * if any.
-       */
-      const newNextIndex = idx + parentLength;
-
-      /**
-       * Get the set of nodes that should be processed as children of the new
-       * parent mark node, run each through the reducer as children of the
-       * parent node, and create the parent MDAST node with the resulting
-       * children.
-       */
-      const children = nodes.slice(idx, newNextIndex);
-      const denestedChildren = children.map(child => ({ ...child, marks: without(child.marks, parentType) }));
-      const mdastChildren = denestedChildren.reduce(nodeGroupReducer, { nodes: [], parentType }).nodes;
-      const mdastNode = u(parentType, mdastChildren);
-
-      return { ...acc, nodes: [ ...acc.nodes, mdastNode ], nextIndex: newNextIndex };
-    }
+    const newNextIndex = idx + parentLength;

    /**
-     * Create the base text node, and pass in the array of mark types as data
-     * (helpful when optimizing/condensing the final structure).
+     * Get the set of nodes that should be processed as children of the new
+     * parent mark node, run each through the reducer as children of the
+     * parent node, and create the parent MDAST node with the resulting
+     * children.
     */
-    const textNode = u(node.textNodeType, { marks: node.marks }, node.text);
+    const children = nodes.slice(idx, newNextIndex);
+    const denestedChildren = children.map(child => ({ ...child, marks: without(child.marks, parentType) }));
+    const mdastChildren = denestedChildren.reduce(condenseNodesReducer, { nodes: [], parentType }).nodes;
+    const mdastNode = u(parentType, mdastChildren);

-    /**
-     * Recursively wrap the base text node in the individual mark nodes, if
-     * any exist.
-     */
-    return { ...acc, nodes: [ ...acc.nodes, textNode ] };
-  };
-
-  const nodeGroups = processedRanges.reduce(nodeGroupReducer, { nodes: [] });
+    return { ...acc, nodes: [ ...acc.nodes, mdastNode ], nextIndex: newNextIndex };
+  }

  /**
-   * Since each range will be mapped into an array, we flatten the result to
-   * return a single array of all nodes.
+   * Create the base text node, and pass in the array of mark types as data
+   * (helpful when optimizing/condensing the final structure).
   */
-  return nodeGroups.nodes;
+  const baseNode = typeof node.text === 'string'
+    ? u(node.textNodeType, { marks: node.marks }, node.text)
+    : transform(node.node);
+
+  /**
+   * Recursively wrap the base text node in the individual mark nodes, if
+   * any exist.
+   */
+  return { ...acc, nodes: [ ...acc.nodes, baseNode ] };
 }


@ -330,8 +433,8 @@ function convertNode(node, children, shortcodePlugins) {
     */
    case 'code': {
      const value = get(node.nodes, [0, 'text']);
-      const lang = get(node.data, 'lang');
-      return u(typeMap[node.type], { lang }, value);
+      const { lang, ...data } = get(node, 'data', {});
+      return u(typeMap[node.type], { lang, data }, value);
    }

    /**
@ -367,8 +470,8 @@ function convertNode(node, children, shortcodePlugins) {
     * the node for both Slate and Remark schemas.
     */
    case 'link': {
-      const { url, title } = get(node, 'data', {});
-      return u(typeMap[node.type], { url, title }, children);
+      const { url, title, ...data } = get(node, 'data', {});
+      return u(typeMap[node.type], { url, title, data }, children);
    }

    /**
@ -377,35 +480,3 @@ function convertNode(node, children, shortcodePlugins) {
     */
  }
 }
-
-
-export default function slateToRemark(raw, { shortcodePlugins }) {
-  /**
-   * The transform function mimics the approach of a Remark plugin for
-   * conformity with the other serialization functions. This function converts
-   * Slate nodes to MDAST nodes, and recursively calls itself to process child
-   * nodes to arbitrary depth.
-   */
-  function transform(node) {
-
-    /**
-     * Call `transform` recursively on child nodes, and flatten the resulting
-     * array.
-     */
-    const children = !isEmpty(node.nodes) && flatten(node.nodes.map(transform));
-
-    /**
-     * Run individual nodes through conversion factories.
-     */
-    return node.kind === 'text' ? convertTextNode(node) : convertNode(node, children, shortcodePlugins);
-  }
-
-  /**
-   * The Slate Raw AST generally won't have a top level type, so we set it to
-   * "root" for clarity.
-   */
-  raw.type = 'root';
-
-  const mdast = transform(raw);
-  return mdast;
-}