From 361c3d5284cec6412d09e2b93219dec799c44068 Mon Sep 17 00:00:00 2001
From: Shawn Erquhart <shawn@erquh.art>
Date: Wed, 7 Jun 2017 22:23:06 -0400
Subject: [PATCH] improve prosemirror parser, fix new doc creation

---
 src/components/MarkupItReactRenderer/index.js |  11 +-
 .../VisualEditor/parser.js                    | 323 ++++++++++--------
 2 files changed, 189 insertions(+), 145 deletions(-)
diff --git a/src/components/MarkupItReactRenderer/index.js b/src/components/MarkupItReactRenderer/index.js
index b152187d..f48f5888 100644
--- a/src/components/MarkupItReactRenderer/index.js
+++ b/src/components/MarkupItReactRenderer/index.js
@@ -30,15 +30,8 @@ const renderEditorPluginsProcessor = (node, getAsset) => {
       if (plugin) {
         const data = plugin.get('fromBlock')(value.match(plugin.get('pattern')));
         const preview = plugin.get('toPreview')(data);
-        const output = typeof preview === 'string' ?
-          <div dangerouslySetInnerHTML={{ __html: preview }}/> :
-          preview;
-
-        const result = unified()
-          .use(parseHtml, { fragment: true })
-          .parse(renderToStaticMarkup(output));
-
-        return result.children[0];
+        const output = `<div>${typeof preview === 'string' ? preview : renderToStaticMarkup(preview)}</div>`;
+        return unified().use(parseHtml, { fragment: true }).parse(output);
       }
     }
   }
diff --git a/src/components/Widgets/MarkdownControlElements/VisualEditor/parser.js b/src/components/Widgets/MarkdownControlElements/VisualEditor/parser.js
index 96c8c3af..6cebc077 100644
--- a/src/components/Widgets/MarkdownControlElements/VisualEditor/parser.js
+++ b/src/components/Widgets/MarkdownControlElements/VisualEditor/parser.js
@@ -1,161 +1,212 @@
-/* eslint-disable */
-/*
-  Based closely on
-  https://github.com/ProseMirror/prosemirror-markdown/blob/master/src/from_markdown.js
-*/
-
 import unified from 'unified';
 import markdown from 'remark-parse';
 import { Mark } from 'prosemirror-model';
+import isEmpty from 'lodash/isEmpty';
 
 let schema;
 let plugins
 let activeMarks = Mark.none;
 let textsArray = [];
 
-const processMdastNode = node => {
-  if (node.type === "root") {
-    const content = node.children.map(childNode => processMdastNode(childNode));
-    return schema.node("doc", {}, content);
+/**
+ * A remark plugin for converting an MDAST to a ProseMirror tree.
+ * @returns {function} a transformer function
+ */
+function markdownToProseMirror() {
+  return transform;
+}
+
+/**
+ * The MDAST transformer function.
+ * @param {object} node an MDAST node
+ * @returns {Node} a ProseMirror Node
+ */
+function transform(node) {
+  if (node.type === 'text') {
+    processText(node.value);
+    return;
   }
 
-  /***
-   * Block nodes
-   ***/
-  // heading and paragraph nodes contain raw text so we need to collect
-  // the flat list of text nodes. Other node types contain paragraph nodes.
-  if (node.type === "heading") {
-    node.children.forEach(childNode => processMdastNode(childNode));
-    const pNode = schema.node("heading", { level: node.depth }, textsArray);
+  const nodeDef = getNodeDef(node);
+
+  if (!nodeDef) {
+    return node;
+  }
+
+  return (nodeDef.block ? processBlock : processInline)(nodeDef, node.children, node.value);
+}
+
+/**
+ * Provides required information for converting an MDAST node into a ProseMirror
+ * Node.
+ *
+ * @param {object} node - an MDAST node
+ * @returns {object} conversion data node with the following shape:
+ *   {string} pmType - the equivalent node type in the ProseMirror schema
+ *   {boolean} block - true if the node is block level, otherwise false
+ *   {object} attrs - passed to ProseMirror's schema mark/node creation methods
+ *   {object} content - overrides `node.children` as node content
+ *   {Node} defaultContent - content to use if node has no content (default: null)
+ *   {boolean} canContainPlugins true for nodes that may contain plugins
+ */
+function getNodeDef({ type, ordered, lang, value, depth, url, alt }) {
+  switch (type) {
+    case 'root':
+      return { pmType: 'doc', block: true, defaultContent: schema.node('paragraph') };
+    case 'heading':
+      return { pmType: type, attrs: { level: depth }, hasText: true, block: true };
+    case 'paragraph':
+      return { pmType: type, hasText: true, block: true, canContainPlugins: true };
+    case 'blockquote':
+      return { pmType: type, block: true };
+    case 'list':
+      return { pmType: ordered ? 'ordered_list' : 'bullet_list', attrs: { tight: true }, block: true };
+    case 'listItem':
+      return { pmType: 'list_item', block: true };
+    case 'thematicBreak':
+      return { pmType: 'horizontal_rule', block: true };
+    case 'break':
+      return { pmType: 'hard_break', block: true };
+    case 'image':
+      return { pmType: type, block: true, attrs: { src: url, alt } };
+    case 'code':
+      return { pmType: 'code_block', attrs: { params: lang },  content: schema.text(value), block: true };
+    case 'emphasis':
+      return { pmType: 'em' };
+    case 'strong':
+      return { pmType: type };
+    case 'link':
+      return { pmType: 'strong' };
+    case 'inlineCode':
+      return { pmType: 'code' };
+  }
+}
+
+/**
+ * Derives content from block nodes. Block nodes containing raw text, such as
+ * headings and paragraphs, are processed differently than block nodes
+ * containing other node types.
+ * @param {array} children child nodes
+ * @param {boolean} hasText if true, the node contains raw text nodes
+ * @returns {array} processed child nodes
+ */
+function getBlockContent(children, hasText) {
+  // children.map will return undefined for text nodes, so we filter those out
+  const processedChildren = children.map(transform).filter(val => val);
+
+  if (hasText) {
+    const content = textsArray;
     textsArray = [];
-    return pNode;
-  } else if (node.type === "paragraph") {
-
-    // TODO: improve plugin handling
-
-    // Handle externally defined plugins (they'll be wrapped in paragraphs)
-    if (node.children.length === 1 && node.children[0].type === 'text') {
-      const value = node.children[0].value;
-      const plugin = plugins.find(plugin => plugin.get('pattern').test(value));
-      if (plugin) {
-        const nodeType = schema.nodes[`plugin_${plugin.get('id')}`];
-        const data = plugin.get('fromBlock').call(plugin, value.match(plugin.get('pattern')));
-        return nodeType.create(data);
-      }
-    }
-
-    // Handle the internally defined image plugin. At this point the token has
-    // already been parsed as an image by Remark, so we have to catch it by
-    // checking for the 'image' type.
-    if (node.children.length === 1 && node.children[0].type === 'image') {
-      const { url, alt } = node.children[0];
-
-      // Until we improve the editor components API for built in components,
-      // we'll mock the result of String.prototype.match to pass in to the image
-      // plugin's fromBlock method.
-      const matches = [ , alt, url ];
-      const plugin = plugins.find(plugin => plugin.id === 'image');
-      if (plugin) {
-        const nodeType = schema.nodes.plugin_image;
-        const data = plugin.get('fromBlock').call(plugin, matches);
-        return nodeType.create(data);
-      }
-    }
-
-    node.children.forEach(childNode => processMdastNode(childNode));
-    const pNode = schema.node("paragraph", {}, textsArray);
-    textsArray = [];
-    return pNode;
-  } else if (node.type === "blockquote") {
-    const content = node.children.map(childNode => processMdastNode(childNode));
-    return schema.node("blockquote", {}, content);
-  } else if (node.type === "list") {
-    const content = node.children.map(childNode => processMdastNode(childNode));
-    if (node.ordered) {
-      return schema.node("ordered_list", { tight: true, order: 1 }, content);
-    } else {
-      return schema.node("bullet_list", { tight: true }, content);
-    }
-  } else if (node.type === "listItem") {
-    const content = node.children.map(childNode => processMdastNode(childNode));
-    return schema.node("list_item", {}, content);
-  } else if (node.type === "thematicBreak") {
-    return schema.node("horizontal_rule");
-  } else if (node.type === "break") {
-    return schema.node("hard_break");
-  } else if (node.type === "image") {
-    return schema.node("image", { src: node.url, alt: node.alt });
-  } else if (node.type === "code") {
-    return schema.node(
-      "code_block",
-      {
-        params: node.lang,
-      },
-      schema.text(node.value)
-    );
-  }
-  /***
-   * End block items
-   ***/
-
-  // Inline
-  if (node.type === "text") {
-    textsArray.push(schema.text(node.value, activeMarks));
-    return;
-  } else if (node.type === "emphasis") {
-    const mark = schema.marks["em"].create();
-    activeMarks = mark.addToSet(activeMarks);
-    node.children.forEach(childNode => processMdastNode(childNode));
-    activeMarks = mark.removeFromSet(activeMarks);
-    return;
-  } else if (node.type === "strong") {
-    const mark = schema.marks["strong"].create();
-    activeMarks = mark.addToSet(activeMarks);
-    node.children.forEach(childNode => processMdastNode(childNode));
-    activeMarks = mark.removeFromSet(activeMarks);
-    return;
-  } else if (node.type === "link") {
-    const mark = schema.marks["strong"].create({
-      title: node.title,
-      href: node.url,
-    });
-    activeMarks = mark.addToSet(activeMarks);
-    node.children.forEach(childNode => processMdastNode(childNode));
-    activeMarks = mark.removeFromSet(activeMarks);
-    return;
-  } else if (node.type === "inlineCode") {
-    // Inline code is like a text node in that it can't have children
-    // so we add it to textsArray immediately.
-    const mark = schema.marks["code"].create();
-    activeMarks = mark.addToSet(activeMarks);
-    textsArray.push(schema.text(node.value, activeMarks));
-    activeMarks = mark.removeFromSet(activeMarks);
-    return;
+    return content;
   }
 
-  return node;
-};
+  return processedChildren;
+}
 
-const compileMarkdownToProseMirror = src => {
-  // Clear out any old state.
-  let activeMarks = Mark.none;
-  let textsArray = [];
+/**
+ * Processes text nodes.
+ * @param {string} value the node's text content
+ * @returns {undefined}
+ */
+function processText(value) {
+  textsArray.push(schema.text(value, activeMarks));
+  return;
+}
 
+/**
+ * Processes block nodes.
+ * @param {object} nodeModel the nodeModel for this node type via nodeModelGetters
+ * @param {array} children the node's child nodes
+ * @return {Node} a ProseMirror node
+ */
+function processBlock({ pmType, attrs, content, defaultContent = null, hasText, canContainPlugins }, children) {
+  // Plugins are just text shortcodes, so they're rendered as a text node within
+  // a paragraph node in the MDAST. We use a regex to determine if the text
+  // represents a plugin, so for performance reasons we only test text nodes that
+  // are the only child of a node that can contain plugins. Currently, only
+  // paragraphs may contain plugins.
+  //
+  // Additionally, images are handled via plugin. Because images already have a
+  // markdown pattern, they're represented as 'image' type in the MDAST. We
+  // check for those here, too.
+  if (canContainPlugins && children.length === 1 && ['text', 'image'].includes(children[0].type)) {
+    const processedPlugin = processPlugin(children[0]);
+    if (processedPlugin) {
+      return processedPlugin;
+    }
+  }
+
+  const nodeContent = content || (isEmpty(children) ? defaultContent : getBlockContent(children, hasText));
+  return schema.node(pmType, attrs, nodeContent);
+}
+
+/**
+ * Processes inline nodes.
+ * @param {object} nodeModel the nodeModel for this node type via nodeModelGetters
+ * @param {array} children the node's child nodes
+ * @return {undefined}
+ */
+function processInline({ pmType, attrs }, children, value) {
+  const mark = schema.marks[pmType].create(attrs);
+  activeMarks = mark.addToSet(activeMarks);
+
+  if (isEmpty(children)) {
+    textsArray.push(schema.text(value, activeMarks));
+  } else {
+    children.forEach(childNode => transform(childNode));
+  }
+
+  activeMarks = mark.removeFromSet(activeMarks);
+  return;
+}
+
+/**
+ * Processes plugins, which are represented as user-defined text shortcodes.
+ *
+ * The built in image plugin is handled differently because it overrides
+ * remark/rehype's handling of a recognized markdown/html entity. Ideally, would
+ * stop remark from parsing images at all, so that no special logic would be
+ * required, but overriding this way would require a plugin to indicate what
+ * entity it's overriding.
+ *
+ * @param {object} a remark node representing a user defined plugin
+ * @return {Node} a ProseMirror Node
+ */
+function processPlugin({ type, value, alt, url }) {
+  const isImage = type === 'image';
+  const plugin = isImage ? plugins.get('image') : plugins.find(plugin => plugin.get('pattern').test(value));
+  if (plugin) {
+    const matches = isImage ? [ , alt, url ] : value.match(plugin.get('pattern'));
+    const nodeType = schema.nodes[`plugin_${plugin.get('id')}`];
+    const data = plugin.get('fromBlock').call(plugin, matches);
+    return nodeType.create(data);
+  }
+}
+
+/**
+ * Uses unified to parse markdown and apply plugins.
+ * @param {string} src raw markdown
+ * @returns {Node} a ProseMirror Node
+ */
+function parser(src) {
   const result = unified()
     .use(markdown, { commonmark: true, footnotes: true, pedantic: true })
     .parse(src);
 
-  const output = unified()
-    .use(() => processMdastNode)
+  return unified()
+    .use(markdownToProseMirror)
     .runSync(result);
+}
 
-  return output;
-};
-
-const parser = (s, p) => {
+/**
+ * Gets the parser and makes schema and plugins available at top scope.
+ * @param {Schema} s a ProseMirror schema
+ * @param {Map} p Immutable Map of registered plugins
+ */
+function parserGetter(s, p) {
   schema = s;
   plugins = p;
-  return compileMarkdownToProseMirror;
-};
+  return parser;
+}
 
-export default parser;
+export default parserGetter;