From 14b6292eab0f7f37283c8cfe50aa81114f001e15 Mon Sep 17 00:00:00 2001
From: Shawn Erquhart <shawn@erquh.art>
Date: Wed, 10 Apr 2019 15:23:53 -0400
Subject: [PATCH] feat(editor-components): match any characters with shortcodes
 (#2268)

---
 .../__snapshots__/parser.spec.js.snap         |  30 ++--
 .../__snapshots__/renderer.spec.js.snap       |   4 +-
 .../src/serializers/index.js                  |   9 +-
 .../src/serializers/remarkShortcodes.js       | 129 ++++++------------
 .../src/serializers/remarkSlate.js            |  25 ++--
 .../src/serializers/slateRemark.js            |  36 ++---
 6 files changed, 78 insertions(+), 155 deletions(-)
diff --git a/packages/netlify-cms-widget-markdown/src/MarkdownControl/__tests__/__snapshots__/parser.spec.js.snap b/packages/netlify-cms-widget-markdown/src/MarkdownControl/__tests__/__snapshots__/parser.spec.js.snap
index a7b66cc0..e5e13004 100644
--- a/packages/netlify-cms-widget-markdown/src/MarkdownControl/__tests__/__snapshots__/parser.spec.js.snap
+++ b/packages/netlify-cms-widget-markdown/src/MarkdownControl/__tests__/__snapshots__/parser.spec.js.snap
@@ -313,13 +313,14 @@ Object {
     Object {
       "nodes": Array [
         Object {
-          "data": undefined,
-          "leaves": Array [
-            Object {
-              "text": "![super](duper.jpg)",
-            },
-          ],
-          "object": "text",
+          "data": Object {
+            "alt": "super",
+            "title": null,
+            "url": "duper.jpg",
+          },
+          "isVoid": true,
+          "object": "inline",
+          "type": "image",
         },
       ],
       "object": "block",
@@ -1520,13 +1521,14 @@ Object {
     Object {
       "nodes": Array [
         Object {
-          "data": undefined,
-          "leaves": Array [
-            Object {
-              "text": "![test](test.png)",
-            },
-          ],
-          "object": "text",
+          "data": Object {
+            "alt": "test",
+            "title": null,
+            "url": "test.png",
+          },
+          "isVoid": true,
+          "object": "inline",
+          "type": "image",
         },
       ],
       "object": "block",
diff --git a/packages/netlify-cms-widget-markdown/src/__tests__/__snapshots__/renderer.spec.js.snap b/packages/netlify-cms-widget-markdown/src/__tests__/__snapshots__/renderer.spec.js.snap
index b01b761f..f9c4ebc1 100644
--- a/packages/netlify-cms-widget-markdown/src/__tests__/__snapshots__/renderer.spec.js.snap
+++ b/packages/netlify-cms-widget-markdown/src/__tests__/__snapshots__/renderer.spec.js.snap
@@ -70,9 +70,9 @@ exports[`Markdown Preview renderer Markdown rendering General should render mark
 <h4>H4</h4>
 <p><a href=\\"http://google.com\\">link title</a></p>
 <h5>H5</h5>
-<p>![alt text](https://pbs.twimg.com/profile_images/678903331176214528/TQTdqGwD.jpg)</p>
+<p><img src=\\"https://pbs.twimg.com/profile_images/678903331176214528/TQTdqGwD.jpg\\" alt=\\"alt text\\"></p>
 <h6>H6</h6>
-<p>![](https://pbs.twimg.com/profile_images/678903331176214528/TQTdqGwD.jpg)</p>",
+<p><img src=\\"https://pbs.twimg.com/profile_images/678903331176214528/TQTdqGwD.jpg\\"></p>",
     }
   }
 />
diff --git a/packages/netlify-cms-widget-markdown/src/serializers/index.js b/packages/netlify-cms-widget-markdown/src/serializers/index.js
index d01dd72c..43bbacdf 100644
--- a/packages/netlify-cms-widget-markdown/src/serializers/index.js
+++ b/packages/netlify-cms-widget-markdown/src/serializers/index.js
@@ -14,8 +14,7 @@ import remarkPaddedLinks from './remarkPaddedLinks';
 import remarkWrapHtml from './remarkWrapHtml';
 import remarkToSlate from './remarkSlate';
 import remarkSquashReferences from './remarkSquashReferences';
-import remarkImagesToText from './remarkImagesToText';
-import remarkShortcodes from './remarkShortcodes';
+import { remarkParseShortcodes, createRemarkShortcodeStringifier } from './remarkShortcodes';
 import remarkEscapeMarkdownEntities from './remarkEscapeMarkdownEntities';
 import remarkStripTrailingBreaks from './remarkStripTrailingBreaks';
 import remarkAllowHtmlEntities from './remarkAllowHtmlEntities';
@@ -66,6 +65,7 @@ export const markdownToRemark = markdown => {
   const parsed = unified()
     .use(markdownToRemarkPlugin, { fences: true, commonmark: true })
     .use(markdownToRemarkRemoveTokenizers, { inlineTokenizers: ['url'] })
+    .use(remarkParseShortcodes, { plugins: getEditorComponents() })
     .use(remarkAllowHtmlEntities)
     .parse(markdown);
 
@@ -74,8 +74,6 @@ export const markdownToRemark = markdown => {
    */
   const result = unified()
     .use(remarkSquashReferences)
-    .use(remarkImagesToText)
-    .use(remarkShortcodes, { plugins: getEditorComponents() })
     .runSync(parsed);
 
   return result;
@@ -136,6 +134,7 @@ export const remarkToMarkdown = obj => {
   const markdown = unified()
     .use(remarkToMarkdownPlugin, remarkToMarkdownPluginOpts)
     .use(remarkAllowAllText)
+    .use(createRemarkShortcodeStringifier({ plugins: getEditorComponents() }))
     .stringify(processedMdast);
 
   /**
@@ -179,8 +178,6 @@ export const htmlToSlate = html => {
   const slateRaw = unified()
     .use(remarkAssertParents)
     .use(remarkPaddedLinks)
-    .use(remarkImagesToText)
-    .use(remarkShortcodes, { plugins: getEditorComponents() })
     .use(remarkWrapHtml)
     .use(remarkToSlate)
     .runSync(mdast);
diff --git a/packages/netlify-cms-widget-markdown/src/serializers/remarkShortcodes.js b/packages/netlify-cms-widget-markdown/src/serializers/remarkShortcodes.js
index ea953039..9402eeef 100644
--- a/packages/netlify-cms-widget-markdown/src/serializers/remarkShortcodes.js
+++ b/packages/netlify-cms-widget-markdown/src/serializers/remarkShortcodes.js
@@ -1,99 +1,48 @@
-import { map, every } from 'lodash';
-import u from 'unist-builder';
-import mdastToString from 'mdast-util-to-string';
+export function remarkParseShortcodes({ plugins }) {
+  const Parser = this.Parser;
+  const tokenizers = Parser.prototype.blockTokenizers;
+  const methods = Parser.prototype.blockMethods;
 
-/**
- * Parse shortcodes from an MDAST.
- *
- * Shortcodes are plain text, and must be the lone content of a paragraph. The
- * paragraph must also be a direct child of the root node. When a shortcode is
- * found, we just need to add data to the node so the shortcode can be
- * identified and processed when serializing to a new format. The paragraph
- * containing the node is also recreated to ensure normalization.
- */
-export default function remarkShortcodes({ plugins }) {
-  return transform;
+  tokenizers.shortcode = createShortcodeTokenizer({ plugins });
 
-  /**
-   * Map over children of the root node and convert any found shortcode nodes.
-   */
-  function transform(root) {
-    const transformedChildren = map(root.children, processShortcodes);
-    return { ...root, children: transformedChildren };
-  }
+  methods.unshift('shortcode');
+}
 
-  /**
-   * Mapping function to transform nodes that contain shortcodes.
-   */
-  function processShortcodes(node) {
-    /**
-     * If the node is not eligible to contain a shortcode, return the original
-     * node unchanged.
-     */
-    if (!nodeMayContainShortcode(node)) return node;
-
-    /**
-     * Combine the text values of all children to a single string, check the
-     * string for a shortcode pattern match, and validate the match.
-     */
-    const text = mdastToString(node).trim();
-    const { plugin, match } = matchTextToPlugin(text);
-    const matchIsValid = validateMatch(text, match);
-
-    /**
-     * If a valid match is found, return a new node with shortcode data
-     * included. Otherwise, return the original node.
-     */
-    return matchIsValid ? createShortcodeNode(text, plugin, match) : node;
-  }
-
-  /**
-   * Ensure that the node and it's children are acceptable types to contain
-   * shortcodes. Currently, only a paragraph containing text and/or html nodes
-   * may contain shortcodes.
-   */
-  function nodeMayContainShortcode(node) {
-    const validNodeTypes = ['paragraph'];
-    const validChildTypes = ['text', 'html'];
-
-    if (validNodeTypes.includes(node.type)) {
-      return every(node.children, child => {
-        return validChildTypes.includes(child.type);
-      });
-    }
-  }
-
-  /**
-   * Return the plugin and RegExp.match result from the first plugin with a
-   * pattern that matches the given text.
-   */
-  function matchTextToPlugin(text) {
+function createShortcodeTokenizer({ plugins }) {
+  return function tokenizeShortcode(eat, value, silent) {
+    const potentialMatchValue = value.split('\n\n')[0];
     let match;
-    const plugin = plugins.find(p => {
-      match = text.match(p.pattern);
+    const plugin = plugins.find(plugin => {
+      match = potentialMatchValue.trim().match(plugin.pattern);
       return !!match;
     });
-    return { plugin, match };
-  }
 
-  /**
-   * A match is only valid if it takes up the entire paragraph.
-   */
-  function validateMatch(text, match) {
-    return match && match[0].length === text.length;
-  }
+    if (match) {
+      if (silent) {
+        return true;
+      }
 
-  /**
-   * Create a new node with shortcode data included. Use an 'html' node instead
-   * of a 'text' node as the child to ensure the node content is not parsed by
-   * Remark or Rehype. Include the child as an array because an MDAST paragraph
-   * node must have it's children in an array.
-   */
-  function createShortcodeNode(text, plugin, match) {
-    const shortcode = plugin.id;
-    const shortcodeData = plugin.fromBlock(match);
-    const data = { shortcode, shortcodeData };
-    const textNode = u('html', text);
-    return u('paragraph', { data }, [textNode]);
-  }
+      const shortcodeData = plugin.fromBlock(match);
+
+      return eat(match[0])({
+        type: 'shortcode',
+        data: { shortcode: plugin.id, shortcodeData },
+      });
+    }
+  };
+}
+
+export function createRemarkShortcodeStringifier({ plugins }) {
+  return function remarkStringifyShortcodes() {
+    const Compiler = this.Compiler;
+    const visitors = Compiler.prototype.visitors;
+
+    visitors.shortcode = shortcode;
+
+    function shortcode(node) {
+      const { data } = node;
+      const plugin = plugins.find(plugin => data.shortcode === plugin.id);
+      return plugin.toBlock(data.shortcodeData);
+    }
+  };
 }
diff --git a/packages/netlify-cms-widget-markdown/src/serializers/remarkSlate.js b/packages/netlify-cms-widget-markdown/src/serializers/remarkSlate.js
index 02fb7590..2e12a9cf 100644
--- a/packages/netlify-cms-widget-markdown/src/serializers/remarkSlate.js
+++ b/packages/netlify-cms-widget-markdown/src/serializers/remarkSlate.js
@@ -1,4 +1,4 @@
-import { get, isEmpty, isArray, last, flatMap } from 'lodash';
+import { isEmpty, isArray, last, flatMap } from 'lodash';
 
 /**
  * A Remark plugin for converting an MDAST to Slate Raw AST. Remark plugins
@@ -169,14 +169,7 @@ function convertMarkNode(node) {
  * transformer.
  */
 function convertNode(node, nodes) {
-  /**
-   * Unified/Remark processors use mutable operations, so we don't want to
-   * change a node's type directly for conversion purposes, as that tends to
-   * unexpected errors.
-   */
-  const type = get(node, ['data', 'shortcode']) ? 'shortcode' : node.type;
-
-  switch (type) {
+  switch (node.type) {
     /**
      * General
      *
@@ -189,7 +182,7 @@ function convertNode(node, nodes) {
     case 'blockquote':
     case 'tableRow':
     case 'tableCell': {
-      return createBlock(typeMap[type], nodes);
+      return createBlock(typeMap[node.type], nodes);
     }
 
     /**
@@ -202,7 +195,7 @@ function convertNode(node, nodes) {
     case 'shortcode': {
       const { data } = node;
       const nodes = [createText('')];
-      return createBlock(typeMap[type], nodes, { data, isVoid: true });
+      return createBlock(typeMap[node.type], nodes, { data, isVoid: true });
     }
 
     /**
@@ -272,7 +265,7 @@ function convertNode(node, nodes) {
       const data = { lang: node.lang };
       const text = createText(node.value);
       const nodes = [text];
-      return createBlock(typeMap[type], nodes, { data });
+      return createBlock(typeMap[node.type], nodes, { data });
     }
 
     /**
@@ -307,7 +300,7 @@ function convertNode(node, nodes) {
      * Thematic breaks are void nodes in the Slate schema.
      */
     case 'thematicBreak': {
-      return createBlock(typeMap[type], { isVoid: true });
+      return createBlock(typeMap[node.type], { isVoid: true });
     }
 
     /**
@@ -319,7 +312,7 @@ function convertNode(node, nodes) {
     case 'link': {
       const { title, url, data } = node;
       const newData = { ...data, title, url };
-      return createInline(typeMap[type], { data: newData }, nodes);
+      return createInline(typeMap[node.type], { data: newData }, nodes);
     }
 
     /**
@@ -332,7 +325,7 @@ function convertNode(node, nodes) {
     case 'image': {
       const { title, url, alt, data } = node;
       const newData = { ...data, title, alt, url };
-      return createInline(typeMap[type], { isVoid: true, data: newData });
+      return createInline(typeMap[node.type], { isVoid: true, data: newData });
     }
 
     /**
@@ -343,7 +336,7 @@ function convertNode(node, nodes) {
      */
     case 'table': {
       const data = { align: node.align };
-      return createBlock(typeMap[type], nodes, { data });
+      return createBlock(typeMap[node.type], nodes, { data });
     }
   }
 }
diff --git a/packages/netlify-cms-widget-markdown/src/serializers/slateRemark.js b/packages/netlify-cms-widget-markdown/src/serializers/slateRemark.js
index 381ff730..8cde5ea3 100644
--- a/packages/netlify-cms-widget-markdown/src/serializers/slateRemark.js
+++ b/packages/netlify-cms-widget-markdown/src/serializers/slateRemark.js
@@ -25,6 +25,7 @@ const typeMap = {
   'thematic-break': 'thematicBreak',
   link: 'link',
   image: 'image',
+  shortcode: 'shortcode',
 };
 
 /**
@@ -37,14 +38,7 @@ const markMap = {
   code: 'inlineCode',
 };
 
-let shortcodePlugins;
-
-export default function slateToRemark(raw, opts) {
-  /**
-   * Set shortcode plugins in outer scope.
-   */
-  ({ shortcodePlugins } = opts);
-
+export default function slateToRemark(raw) {
   /**
    * The Slate Raw AST generally won't have a top level type, so we set it to
    * "root" for clarity.
@@ -76,9 +70,7 @@ function transform(node) {
   /**
    * Run individual nodes through conversion factories.
    */
-  return ['text'].includes(node.object)
-    ? convertTextNode(node)
-    : convertNode(node, children, shortcodePlugins);
+  return ['text'].includes(node.object) ? convertTextNode(node) : convertNode(node, children);
 }
 
 /**
@@ -388,9 +380,9 @@ function getMarkLength(markType, nodes) {
 
 /**
  * Convert a single Slate Raw node to an MDAST node. Uses the unist-builder `u`
- * function to create MDAST nodes and parses shortcodes.
+ * function to create MDAST nodes.
  */
-function convertNode(node, children, shortcodePlugins) {
+function convertNode(node, children) {
   switch (node.type) {
     /**
      * General
@@ -418,19 +410,12 @@ function convertNode(node, children, shortcodePlugins) {
      * property contains the data received from the shortcode plugin's
      * `fromBlock` method when the shortcode node was created.
      *
-     * Here we get the shortcode plugin from the registry and use it's
-     * `toBlock` method to recreate the original markdown shortcode. We then
-     * insert that text into a new "html" type node (a "text" type node
-     * might get encoded or escaped by remark-stringify). Finally, we wrap
-     * the "html" node in a "paragraph" type node, as shortcode nodes must
-     * be alone in their own paragraph.
+     * Here we create a `shortcode` MDAST node that contains only the shortcode
+     * data.
      */
     case 'shortcode': {
       const { data } = node;
-      const plugin = shortcodePlugins.get(data.shortcode);
-      const text = plugin.toBlock(data.shortcodeData);
-      const textNode = u('html', text);
-      return u('paragraph', { data }, [textNode]);
+      return u(typeMap[node.type], { data });
     }
 
     /**
@@ -510,10 +495,7 @@ function convertNode(node, children, shortcodePlugins) {
      * Images
      *
      * This transformation is almost identical to that of links, except for the
-     * lack of child nodes and addition of `alt` attribute data. Currently the
-     * CMS handles block images by shortcode, so this case will only apply to
-     * inline images, which currently can only occur through raw markdown
-     * insertion.
+     * lack of child nodes and addition of `alt` attribute data.
      */
     case 'image': {
       const { url, title, alt, ...data } = get(node, 'data', {});