static-cms/src/components/Widgets/Markdown/serializers/slateRemark.js

import { get, isEmpty, concat, without, flatten, flatMap, initial } from 'lodash';
import u from 'unist-builder';

/**
 * Map of Slate node types to MDAST/Remark node types.
 */
const typeMap = {
  'root': 'root',
  'paragraph': 'paragraph',
  'heading-one': 'heading',
  'heading-two': 'heading',
  'heading-three': 'heading',
  'heading-four': 'heading',
  'heading-five': 'heading',
  'heading-six': 'heading',
  'quote': 'blockquote',
  'code': 'code',
  'numbered-list': 'list',
  'bulleted-list': 'list',
  'list-item': 'listItem',
  'table': 'table',
  'table-row': 'tableRow',
  'table-cell': 'tableCell',
  'thematic-break': 'thematicBreak',
  'link': 'link',
  'image': 'image',
};


/**
 * Map of Slate mark types to MDAST/Remark node types.
 */
const markMap = {
  bold: 'strong',
  italic: 'emphasis',
  strikethrough: 'delete',
  code: 'inlineCode',
};


/**
 * Slate treats inline code decoration as a standard mark, but MDAST does
 * not allow inline code nodes to contain children, only a single text
 * value. An MDAST inline code node can be nested within mark nodes such
 * as "emphasis" and "strong", but it cannot contain them.
 *
 * Because of this, if a "code" mark (translated to MDAST "inlineCode") is
 * in the markTypes array, we make the base text node an "inlineCode" type
 * instead of a standard text node.
 */
function processCodeMark(markTypes) {
  const isInlineCode = markTypes.includes('inlineCode');
  const filteredMarkTypes = isInlineCode ? without(markTypes, 'inlineCode') : markTypes;
  const textNodeType = isInlineCode ? 'inlineCode' : 'html';
  return { filteredMarkTypes, textNodeType };
}


/**
 * Returns an array of one or more MDAST text nodes of the given type, derived
 * from the text received. Certain transformations, such as line breaks, cause
 * multiple nodes to be returned.
 */
function createTextNodes(text, type = 'html') {
  /**
   * Split the text string at line breaks, then map each substring to an array
   * pair consisting of an MDAST text node followed by a break node. This will
   * result in nested arrays, so we use `flatMap` to produce a flattened array,
   * and `initial` to leave off the superfluous trailing break.
   */
  const brokenText = text.split('\n');
  const toPair = str => [u(type, str), u('break')];
  return initial(flatMap(brokenText, toPair));
}


/**
 * Wraps a text node in one or more mark nodes by placing the text node in an
 * array and using that as the `children` value of a mark node. The resulting
 * mark node is then placed in an array and used as the child of a mark node for
 * the next mark type in `markTypes`. This continues for each member of
 * `markTypes`. If `markTypes` is empty, the original text node is returned.
 */
function wrapTextWithMarks(textNode, markTypes) {
  const wrapTextWithMark = (childNode, markType) => u(markType, [childNode]);
  return markTypes.reduce(wrapTextWithMark, textNode);
}

/**
 * Converts a Slate Raw text node to an MDAST text node.
 *
 * Slate text nodes without marks often simply have a "text" property with
 * the value. In this case the conversion to MDAST is simple. If a Slate
 * text node does not have a "text" property, it will instead have a
 * "ranges" property containing an array of objects, each with an array of
 * marks, such as "bold" or "italic", along with a "text" property.
 *
 * MDAST instead expresses such marks in a nested structure, with individual
 * nodes for each mark type nested until the deepest mark node, which will
 * contain the text node.
 *
 * To convert a Slate text node's marks to MDAST, we treat each "range" as a
 * separate text node, convert the text node itself to an MDAST text node,
 * and then recursively wrap the text node for each mark, collecting the results
 * of each range in a single array of child nodes.
 *
 * For example, this Slate text node:
 *
 * {
 *   kind: 'text',
 *   ranges: [
 *     {
 *       text: 'test',
 *       marks: ['bold', 'italic']
 *     },
 *     {
 *       text: 'test two'
 *     }
 *   ]
 * }
 *
 * ...would be converted to this MDAST nested structure:
 *
 * [
 *   {
 *     type: 'strong',
 *     children: [{
 *       type: 'emphasis',
 *       children: [{
 *         type: 'text',
 *         value: 'test'
 *       }]
 *     }]
 *   },
 *   {
 *     type: 'text',
 *     value: 'test two'
 *   }
 * ]
 *
 * This example also demonstrates how a single Slate node may need to be
 * replaced with multiple MDAST nodes, so the resulting array must be flattened.
 */
function convertTextNode(node) {

  /**
   * If the Slate text node has no "ranges" property, just return an equivalent
   * MDAST node.
   */
  if (!node.ranges) {
    return createTextNodes(node.text);
  }

  /**
   * If there is no "text" property, convert the text range(s) to an array of
   * one or more nested MDAST nodes.
   */
  const textNodes = node.ranges.map(range => {
    /**
     * Get an array of the mark types, converted to their MDAST equivalent
     * types.
     */
    const { marks = [], text } = range;
    const markTypes = marks.map(mark => markMap[mark.type]);

    /**
     * Code marks must be removed from the marks array, and the presence of a
     * code mark changes the text node type that should be used.
     */
    const { filteredMarkTypes, textNodeType } = processCodeMark(markTypes);

    /**
     * Create the base text node.
     */
    const textNodes = createTextNodes(text, textNodeType);

    /**
     * Recursively wrap the base text node in the individual mark nodes, if
     * any exist.
     */
    return textNodes.map(textNode => wrapTextWithMarks(textNode, filteredMarkTypes));
  });

  /**
   * Since each range will be mapped into an array, we flatten the result to
   * return a single array of all nodes.
   */
  return flatten(textNodes);
}


/**
 * Convert a single Slate Raw node to an MDAST node. Uses the unist-builder `u`
 * function to create MDAST nodes and parses shortcodes.
 */
function convertNode(node, children, shortcodePlugins) {
  switch (node.type) {

    /**
     * General
     *
     * Convert simple cases that only require a type and children, with no
     * additional properties.
     */
    case 'root':
    case 'paragraph':
    case 'quote':
    case 'list-item':
    case 'table':
    case 'table-row':
    case 'table-cell': {
      return u(typeMap[node.type], children);
    }

    /**
     * Shortcodes
     *
     * Shortcode nodes only exist in Slate's Raw AST if they were inserted
     * via the plugin toolbar in memory, so they should always have
     * shortcode data attached. The "shortcode" data property contains the
     * name of the registered shortcode plugin, and the "shortcodeData" data
     * property contains the data received from the shortcode plugin's
     * `fromBlock` method when the shortcode node was created.
     *
     * Here we get the shortcode plugin from the registry and use it's
     * `toBlock` method to recreate the original markdown shortcode. We then
     * insert that text into a new "html" type node (a "text" type node
     * might get encoded or escaped by remark-stringify). Finally, we wrap
     * the "html" node in a "paragraph" type node, as shortcode nodes must
     * be alone in their own paragraph.
     */
    case 'shortcode': {
      const { data } = node;
      const plugin = shortcodePlugins.get(data.shortcode);
      const text = plugin.toBlock(data.shortcodeData);
      const textNode = u('html', text);
      return u('paragraph', { data }, [ textNode ]);
    }

    /**
     * Headings
     *
     * Slate schemas don't usually infer basic type info from data, so each
     * level of heading is a separately named type. The MDAST schema just
     * has a single "heading" type with the depth stored in a "depth"
     * property on the node. Here we derive the depth from the Slate node
     * type - e.g., for "heading-two", we need a depth value of "2".
     */
    case 'heading-one':
    case 'heading-two':
    case 'heading-three':
    case 'heading-four':
    case 'heading-five':
    case 'heading-six': {
      const depthMap = { one: 1, two: 2, three: 3, four: 4, five: 5, six: 6 };
      const depthText = node.type.split('-')[1];
      const depth = depthMap[depthText];
      return u(typeMap[node.type], { depth }, children);
    }

    /**
     * Code Blocks
     *
     * Code block nodes have a single text child, and may have a code language
     * stored in the "lang" data property. Here we transfer both the node
     * value and the "lang" data property to the new MDAST node.
     */
    case 'code': {
      const value = get(node.nodes, [0, 'text']);
      const lang = get(node.data, 'lang');
      return u(typeMap[node.type], { lang }, value);
    }

    /**
     * Lists
     *
     * Our Slate schema has separate node types for ordered and unordered
     * lists, but the MDAST spec uses a single type with a boolean "ordered"
     * property to indicate whether the list is numbered. The MDAST spec also
     * allows for a "start" property to indicate the first number used for an
     * ordered list. Here we translate both values to our Slate schema.
     */
    case 'numbered-list':
    case 'bulleted-list': {
      const ordered = node.type === 'numbered-list';
      const props = { ordered, start: get(node.data, 'start') || 1 };
      return u(typeMap[node.type], props, children);
    }

    /**
     * Thematic Breaks
     *
     * Thematic breaks don't have children. We parse them separately for
     * clarity.
     */
    case 'thematic-break': {
      return u(typeMap[node.type]);
    }

    /**
     * Links
     *
     * The url and title attributes of link nodes are stored in properties on
     * the node for both Slate and Remark schemas.
     */
    case 'link': {
      const { url, title } = get(node, 'data', {});
      return u(typeMap[node.type], { url, title }, children);
    }

    /**
     * No default case is supplied because an unhandled case should never
     * occur. In the event that it does, let the error throw (for now).
     */
  }
}


export default function slateToRemark(raw, { shortcodePlugins }) {
  /**
   * The transform function mimics the approach of a Remark plugin for
   * conformity with the other serialization functions. This function converts
   * Slate nodes to MDAST nodes, and recursively calls itself to process child
   * nodes to arbitrary depth.
   */
  function transform(node) {

    /**
     * Call `transform` recursively on child nodes, and flatten the resulting
     * array.
     */
    const children = !isEmpty(node.nodes) && flatten(node.nodes.map(transform));

    /**
     * Run individual nodes through conversion factories.
     */
    return node.kind === 'text' ? convertTextNode(node) : convertNode(node, children, shortcodePlugins);
  }

  /**
   * The Slate Raw AST generally won't have a top level type, so we set it to
   * "root" for clarity.
   */
  raw.type = 'root';

  const mdast = transform(raw);
  return mdast;
}
add markdown editor soft break support 2017-08-31 11:03:08 -04:00			`import { get, isEmpty, concat, without, flatten, flatMap, initial } from 'lodash';`
refactor and document rte serializers 2017-07-31 16:41:40 -04:00			`import u from 'unist-builder';`

			`/**`
			`* Map of Slate node types to MDAST/Remark node types.`
			`*/`
			`const typeMap = {`
			`'root': 'root',`
			`'paragraph': 'paragraph',`
			`'heading-one': 'heading',`
			`'heading-two': 'heading',`
			`'heading-three': 'heading',`
			`'heading-four': 'heading',`
			`'heading-five': 'heading',`
			`'heading-six': 'heading',`
			`'quote': 'blockquote',`
			`'code': 'code',`
			`'numbered-list': 'list',`
			`'bulleted-list': 'list',`
			`'list-item': 'listItem',`
			`'table': 'table',`
			`'table-row': 'tableRow',`
			`'table-cell': 'tableCell',`
			`'thematic-break': 'thematicBreak',`
			`'link': 'link',`
			`'image': 'image',`
			`};`


			`/**`
			`* Map of Slate mark types to MDAST/Remark node types.`
			`*/`
			`const markMap = {`
			`bold: 'strong',`
			`italic: 'emphasis',`
			`strikethrough: 'delete',`
			`code: 'inlineCode',`
			`};`


			`/**`
			`* Slate treats inline code decoration as a standard mark, but MDAST does`
			`* not allow inline code nodes to contain children, only a single text`
			`* value. An MDAST inline code node can be nested within mark nodes such`
			`* as "emphasis" and "strong", but it cannot contain them.`
			`*`
			`* Because of this, if a "code" mark (translated to MDAST "inlineCode") is`
			`* in the markTypes array, we make the base text node an "inlineCode" type`
			`* instead of a standard text node.`
			`*/`
			`function processCodeMark(markTypes) {`
			`const isInlineCode = markTypes.includes('inlineCode');`
			`const filteredMarkTypes = isInlineCode ? without(markTypes, 'inlineCode') : markTypes;`
			`const textNodeType = isInlineCode ? 'inlineCode' : 'html';`
			`return { filteredMarkTypes, textNodeType };`
			`}`


add markdown editor soft break support 2017-08-31 11:03:08 -04:00			`/**`
			`* Returns an array of one or more MDAST text nodes of the given type, derived`
			`* from the text received. Certain transformations, such as line breaks, cause`
			`* multiple nodes to be returned.`
			`*/`
			`function createTextNodes(text, type = 'html') {`
			`/**`
			`* Split the text string at line breaks, then map each substring to an array`
			`* pair consisting of an MDAST text node followed by a break node. This will`
			* result in nested arrays, so we use `flatMap` to produce a flattened array,
			* and `initial` to leave off the superfluous trailing break.
			`*/`
			`const brokenText = text.split('\n');`
			`const toPair = str => [u(type, str), u('break')];`
			`return initial(flatMap(brokenText, toPair));`
			`}`


refactor and document rte serializers 2017-07-31 16:41:40 -04:00			`/**`
			`* Wraps a text node in one or more mark nodes by placing the text node in an`
			* array and using that as the `children` value of a mark node. The resulting
			`* mark node is then placed in an array and used as the child of a mark node for`
			* the next mark type in `markTypes`. This continues for each member of
			* `markTypes`. If `markTypes` is empty, the original text node is returned.
			`*/`
			`function wrapTextWithMarks(textNode, markTypes) {`
			`const wrapTextWithMark = (childNode, markType) => u(markType, [childNode]);`
			`return markTypes.reduce(wrapTextWithMark, textNode);`
			`}`

			`/**`
			`* Converts a Slate Raw text node to an MDAST text node.`
			`*`
			`* Slate text nodes without marks often simply have a "text" property with`
			`* the value. In this case the conversion to MDAST is simple. If a Slate`
			`* text node does not have a "text" property, it will instead have a`
			`* "ranges" property containing an array of objects, each with an array of`
			`* marks, such as "bold" or "italic", along with a "text" property.`
			`*`
			`* MDAST instead expresses such marks in a nested structure, with individual`
			`* nodes for each mark type nested until the deepest mark node, which will`
			`* contain the text node.`
			`*`
			`* To convert a Slate text node's marks to MDAST, we treat each "range" as a`
			`* separate text node, convert the text node itself to an MDAST text node,`
			`* and then recursively wrap the text node for each mark, collecting the results`
			`* of each range in a single array of child nodes.`
			`*`
			`* For example, this Slate text node:`
			`*`
			`* {`
			`* kind: 'text',`
			`* ranges: [`
			`* {`
			`* text: 'test',`
			`* marks: ['bold', 'italic']`
			`* },`
			`* {`
			`* text: 'test two'`
			`* }`
			`* ]`
			`* }`
			`*`
			`* ...would be converted to this MDAST nested structure:`
			`*`
			`* [`
			`* {`
			`* type: 'strong',`
			`* children: [{`
			`* type: 'emphasis',`
			`* children: [{`
			`* type: 'text',`
			`* value: 'test'`
			`* }]`
			`* }]`
			`* },`
			`* {`
			`* type: 'text',`
			`* value: 'test two'`
			`* }`
			`* ]`
			`*`
			`* This example also demonstrates how a single Slate node may need to be`
			`* replaced with multiple MDAST nodes, so the resulting array must be flattened.`
			`*/`
			`function convertTextNode(node) {`

			`/**`
			`* If the Slate text node has no "ranges" property, just return an equivalent`
			`* MDAST node.`
			`*/`
			`if (!node.ranges) {`
add markdown editor soft break support 2017-08-31 11:03:08 -04:00			`return createTextNodes(node.text);`
refactor and document rte serializers 2017-07-31 16:41:40 -04:00			`}`

			`/**`
			`* If there is no "text" property, convert the text range(s) to an array of`
			`* one or more nested MDAST nodes.`
			`*/`
			`const textNodes = node.ranges.map(range => {`
			`/**`
			`* Get an array of the mark types, converted to their MDAST equivalent`
			`* types.`
			`*/`
			`const { marks = [], text } = range;`
			`const markTypes = marks.map(mark => markMap[mark.type]);`

			`/**`
			`* Code marks must be removed from the marks array, and the presence of a`
			`* code mark changes the text node type that should be used.`
			`*/`
			`const { filteredMarkTypes, textNodeType } = processCodeMark(markTypes);`

			`/**`
			`* Create the base text node.`
			`*/`
add markdown editor soft break support 2017-08-31 11:03:08 -04:00			`const textNodes = createTextNodes(text, textNodeType);`
refactor and document rte serializers 2017-07-31 16:41:40 -04:00
			`/**`
			`* Recursively wrap the base text node in the individual mark nodes, if`
			`* any exist.`
			`*/`
add markdown editor soft break support 2017-08-31 11:03:08 -04:00			`return textNodes.map(textNode => wrapTextWithMarks(textNode, filteredMarkTypes));`
refactor and document rte serializers 2017-07-31 16:41:40 -04:00			`});`

			`/**`
			`* Since each range will be mapped into an array, we flatten the result to`
			`* return a single array of all nodes.`
			`*/`
			`return flatten(textNodes);`
			`}`


			`/**`
			* Convert a single Slate Raw node to an MDAST node. Uses the unist-builder `u`
			`* function to create MDAST nodes and parses shortcodes.`
			`*/`
			`function convertNode(node, children, shortcodePlugins) {`
			`switch (node.type) {`

			`/**`
			`* General`
			`*`
			`* Convert simple cases that only require a type and children, with no`
			`* additional properties.`
			`*/`
			`case 'root':`
			`case 'paragraph':`
			`case 'quote':`
			`case 'list-item':`
			`case 'table':`
			`case 'table-row':`
			`case 'table-cell': {`
			`return u(typeMap[node.type], children);`
			`}`

			`/**`
			`* Shortcodes`
			`*`
			`* Shortcode nodes only exist in Slate's Raw AST if they were inserted`
			`* via the plugin toolbar in memory, so they should always have`
			`* shortcode data attached. The "shortcode" data property contains the`
			`* name of the registered shortcode plugin, and the "shortcodeData" data`
			`* property contains the data received from the shortcode plugin's`
			* `fromBlock` method when the shortcode node was created.
			`*`
			`* Here we get the shortcode plugin from the registry and use it's`
			* `toBlock` method to recreate the original markdown shortcode. We then
			`* insert that text into a new "html" type node (a "text" type node`
			`* might get encoded or escaped by remark-stringify). Finally, we wrap`
			`* the "html" node in a "paragraph" type node, as shortcode nodes must`
			`* be alone in their own paragraph.`
			`*/`
			`case 'shortcode': {`
			`const { data } = node;`
			`const plugin = shortcodePlugins.get(data.shortcode);`
			`const text = plugin.toBlock(data.shortcodeData);`
			`const textNode = u('html', text);`
			`return u('paragraph', { data }, [ textNode ]);`
			`}`

			`/**`
			`* Headings`
			`*`
			`* Slate schemas don't usually infer basic type info from data, so each`
			`* level of heading is a separately named type. The MDAST schema just`
			`* has a single "heading" type with the depth stored in a "depth"`
			`* property on the node. Here we derive the depth from the Slate node`
			`* type - e.g., for "heading-two", we need a depth value of "2".`
			`*/`
			`case 'heading-one':`
			`case 'heading-two':`
			`case 'heading-three':`
			`case 'heading-four':`
			`case 'heading-five':`
			`case 'heading-six': {`
			`const depthMap = { one: 1, two: 2, three: 3, four: 4, five: 5, six: 6 };`
			`const depthText = node.type.split('-')[1];`
			`const depth = depthMap[depthText];`
			`return u(typeMap[node.type], { depth }, children);`
			`}`

			`/**`
			`* Code Blocks`
			`*`
			`* Code block nodes have a single text child, and may have a code language`
			`* stored in the "lang" data property. Here we transfer both the node`
			`* value and the "lang" data property to the new MDAST node.`
			`*/`
			`case 'code': {`
			`const value = get(node.nodes, [0, 'text']);`
			`const lang = get(node.data, 'lang');`
			`return u(typeMap[node.type], { lang }, value);`
			`}`

			`/**`
			`* Lists`
			`*`
			`* Our Slate schema has separate node types for ordered and unordered`
			`* lists, but the MDAST spec uses a single type with a boolean "ordered"`
			`* property to indicate whether the list is numbered. The MDAST spec also`
			`* allows for a "start" property to indicate the first number used for an`
			`* ordered list. Here we translate both values to our Slate schema.`
			`*/`
			`case 'numbered-list':`
			`case 'bulleted-list': {`
			`const ordered = node.type === 'numbered-list';`
			`const props = { ordered, start: get(node.data, 'start') \|\| 1 };`
			`return u(typeMap[node.type], props, children);`
			`}`

			`/**`
			`* Thematic Breaks`
			`*`
			`* Thematic breaks don't have children. We parse them separately for`
			`* clarity.`
			`*/`
			`case 'thematic-break': {`
			`return u(typeMap[node.type]);`
			`}`

			`/**`
			`* Links`
			`*`
			`* The url and title attributes of link nodes are stored in properties on`
			`* the node for both Slate and Remark schemas.`
			`*/`
			`case 'link': {`
			`const { url, title } = get(node, 'data', {});`
			`return u(typeMap[node.type], { url, title }, children);`
			`}`

			`/**`
			`* No default case is supplied because an unhandled case should never`
			`* occur. In the event that it does, let the error throw (for now).`
			`*/`
			`}`
			`}`


			`export default function slateToRemark(raw, { shortcodePlugins }) {`
			`/**`
			`* The transform function mimics the approach of a Remark plugin for`
			`* conformity with the other serialization functions. This function converts`
			`* Slate nodes to MDAST nodes, and recursively calls itself to process child`
			`* nodes to arbitrary depth.`
			`*/`
			`function transform(node) {`

			`/**`
			* Call `transform` recursively on child nodes, and flatten the resulting
			`* array.`
			`*/`
			`const children = !isEmpty(node.nodes) && flatten(node.nodes.map(transform));`

			`/**`
			`* Run individual nodes through conversion factories.`
			`*/`
			`return node.kind === 'text' ? convertTextNode(node) : convertNode(node, children, shortcodePlugins);`
			`}`

			`/**`
			`* The Slate Raw AST generally won't have a top level type, so we set it to`
			`* "root" for clarity.`
			`*/`
			`raw.type = 'root';`

			`const mdast = transform(raw);`
			`return mdast;`
			`}`