static-cms/packages/netlify-cms-widget-markdown/src/serializers/slateRemark.js

import { get, without, last, map, intersection, omit } from 'lodash';
import u from 'unist-builder';
import mdastToString from 'mdast-util-to-string';

/**
 * Map of Slate node types to MDAST/Remark node types.
 */
const typeMap = {
  root: 'root',
  paragraph: 'paragraph',
  'heading-one': 'heading',
  'heading-two': 'heading',
  'heading-three': 'heading',
  'heading-four': 'heading',
  'heading-five': 'heading',
  'heading-six': 'heading',
  quote: 'blockquote',
  'code-block': 'code',
  'numbered-list': 'list',
  'bulleted-list': 'list',
  'list-item': 'listItem',
  table: 'table',
  'table-row': 'tableRow',
  'table-cell': 'tableCell',
  break: 'break',
  'thematic-break': 'thematicBreak',
  link: 'link',
  image: 'image',
  shortcode: 'shortcode',
};

/**
 * Map of Slate mark types to MDAST/Remark node types.
 */
const markMap = {
  bold: 'strong',
  italic: 'emphasis',
  strikethrough: 'delete',
  code: 'inlineCode',
};

const leadingWhitespaceExp = /^\s+\S/;
const trailingWhitespaceExp = /(?!\S)\s+$/;

export default function slateToRemark(raw, { voidCodeBlock }) {
  /**
   * The Slate Raw AST generally won't have a top level type, so we set it to
   * "root" for clarity.
   */
  raw.type = 'root';

  return transform(raw);

  /**
   * The transform function mimics the approach of a Remark plugin for
   * conformity with the other serialization functions. This function converts
   * Slate nodes to MDAST nodes, and recursively calls itself to process child
   * nodes to arbitrary depth.
   */
  function transform(node) {
    /**
     * Combine adjacent text and inline nodes before processing so they can
     * share marks.
     */
    const hasBlockChildren = node.nodes && node.nodes[0] && node.nodes[0].object === 'block';
    const children = hasBlockChildren
      ? node.nodes.map(transform).filter(v => v)
      : convertInlineAndTextChildren(node.nodes);

    const output = convertBlockNode(node, children);
    //console.log(JSON.stringify(output, null, 2));
    return output;
  }

  function removeMarkFromNodes(nodes, markType) {
    return nodes.map(node => {
      switch (node.type) {
        case 'link': {
          const updatedNodes = removeMarkFromNodes(node.nodes, markType);
          return {
            ...node,
            nodes: updatedNodes,
          };
        }

        case 'image':
        case 'break': {
          const data = omit(node.data, 'marks');
          return { ...node, data };
        }

        default:
          return {
            ...node,
            marks: node.marks.filter(({ type }) => type !== markType),
          };
      }
    });
  }

  function getNodeMarks(node) {
    switch (node.type) {
      case 'link': {
        // Code marks can't always be condensed together. If all text in a link
        // is wrapped in a mark, this function returns that mark and the node
        // ends up nested inside of that mark. Code marks sometimes can't do
        // that, like when they wrap all of the text content of a link. Here we
        // remove code marks before processing so that they stay put.
        const nodesWithoutCode = node.nodes.map(n => ({
          ...n,
          marks: n.marks ? n.marks.filter(({ type }) => type !== 'code') : n.marks,
        }));
        const childMarks = map(nodesWithoutCode, getNodeMarks);
        return intersection(...childMarks);
      }

      case 'break':
      case 'image':
        return map(get(node, ['data', 'marks']), mark => mark.type);

      default:
        return map(node.marks, mark => mark.type);
    }
  }

  function getSharedMarks(marks, node) {
    const nodeMarks = getNodeMarks(node);
    const sharedMarks = intersection(marks, nodeMarks);
    if (sharedMarks[0] === 'code') {
      return nodeMarks.length === 1 ? marks : [];
    }
    return sharedMarks;
  }

  function extractFirstMark(nodes) {
    let firstGroupMarks = getNodeMarks(nodes[0]) || [];

    // If code mark is present, but there are other marks, process others first.
    // If only the code mark is present, don't allow it to be shared with other
    // nodes.
    if (firstGroupMarks[0] === 'code' && firstGroupMarks.length > 1) {
      firstGroupMarks = [...without('firstGroupMarks', 'code'), 'code'];
    }

    let splitIndex = 1;

    if (firstGroupMarks.length > 0) {
      while (splitIndex < nodes.length) {
        if (nodes[splitIndex]) {
          const sharedMarks = getSharedMarks(firstGroupMarks, nodes[splitIndex]);
          if (sharedMarks.length > 0) {
            firstGroupMarks = sharedMarks;
          } else {
            break;
          }
        }
        splitIndex += 1;
      }
    }

    const markType = firstGroupMarks[0];
    const childNodes = nodes.slice(0, splitIndex);
    const updatedChildNodes = markType ? removeMarkFromNodes(childNodes, markType) : childNodes;
    const remainingNodes = nodes.slice(splitIndex);

    return [markType, updatedChildNodes, remainingNodes];
  }

  /**
   * Converts the strings returned from `splitToNamedParts` to Slate nodes.
   */
  function splitWhitespace(node, { trailing } = {}) {
    if (!node.text) {
      return { trimmedNode: node };
    }
    const exp = trailing ? trailingWhitespaceExp : leadingWhitespaceExp;
    const index = node.text.search(exp);
    if (index > -1) {
      const substringIndex = trailing ? index : index + 1;
      const firstSplit = node.text.substring(0, substringIndex);
      const secondSplit = node.text.substring(substringIndex);
      const whitespace = trailing ? secondSplit : firstSplit;
      const text = trailing ? firstSplit : secondSplit;
      return { whitespace, trimmedNode: { ...node, text } };
    }
    return { trimmedNode: node };
  }

  function collectCenterNodes(nodes, leadingNode, trailingNode) {
    switch (nodes.length) {
      case 0:
        return [];
      case 1:
        return [trailingNode];
      case 2:
        return [leadingNode, trailingNode];
      default:
        return [leadingNode, ...nodes.slice(1, -1), trailingNode];
    }
  }

  function normalizeFlankingWhitespace(nodes) {
    const { whitespace: leadingWhitespace, trimmedNode: leadingNode } = splitWhitespace(nodes[0]);
    const lastNode = nodes.length > 1 ? last(nodes) : leadingNode;
    const trailingSplitResult = splitWhitespace(lastNode, { trailing: true });
    const { whitespace: trailingWhitespace, trimmedNode: trailingNode } = trailingSplitResult;
    const centerNodes = collectCenterNodes(nodes, leadingNode, trailingNode).filter(val => val);
    return { leadingWhitespace, centerNodes, trailingWhitespace };
  }

  function convertInlineAndTextChildren(nodes = []) {
    const convertedNodes = [];
    let remainingNodes = nodes;

    while (remainingNodes.length > 0) {
      const nextNode = remainingNodes[0];
      if (nextNode.object === 'inline' || (nextNode.marks && nextNode.marks.length > 0)) {
        const [markType, markNodes, remainder] = extractFirstMark(remainingNodes);
        /**
         * A node with a code mark will be a text node, and will not be adjacent
         * to a sibling code node as the Slate schema requires them to be
         * merged. Markdown also requires at least a space between inline code
         * nodes.
         */
        if (markType === 'code') {
          const node = markNodes[0];
          convertedNodes.push(u(markMap[markType], node.data, node.text));
        } else if (!markType && markNodes.length === 1 && markNodes[0].object === 'inline') {
          const node = markNodes[0];
          convertedNodes.push(convertInlineNode(node, convertInlineAndTextChildren(node.nodes)));
        } else {
          const {
            leadingWhitespace,
            trailingWhitespace,
            centerNodes,
          } = normalizeFlankingWhitespace(markNodes);
          const children = convertInlineAndTextChildren(centerNodes);
          const markNode = u(markMap[markType], children);

          // Filter out empty marks, otherwise their output literally by
          // remark-stringify, eg. an empty bold node becomes "****"
          if (mdastToString(markNode) === '') {
            remainingNodes = remainder;
            continue;
          }
          const createText = text => text && u('html', text);
          const normalizedNodes = [
            createText(leadingWhitespace),
            markNode,
            createText(trailingWhitespace),
          ].filter(val => val);
          convertedNodes.push(...normalizedNodes);
        }
        remainingNodes = remainder;
      } else {
        remainingNodes.shift();
        convertedNodes.push(u('html', nextNode.text));
      }
    }

    return convertedNodes;
  }

  function convertBlockNode(node, children) {
    switch (node.type) {
      /**
       * General
       *
       * Convert simple cases that only require a type and children, with no
       * additional properties.
       */
      case 'root':
      case 'paragraph':
      case 'quote':
      case 'list-item':
      case 'table':
      case 'table-row':
      case 'table-cell': {
        return u(typeMap[node.type], children);
      }

      /**
       * Shortcodes
       *
       * Shortcode nodes only exist in Slate's Raw AST if they were inserted
       * via the plugin toolbar in memory, so they should always have
       * shortcode data attached. The "shortcode" data property contains the
       * name of the registered shortcode plugin, and the "shortcodeData" data
       * property contains the data received from the shortcode plugin's
       * `fromBlock` method when the shortcode node was created.
       *
       * Here we create a `shortcode` MDAST node that contains only the shortcode
       * data.
       */
      case 'shortcode': {
        const { data } = node;
        return u(typeMap[node.type], { data });
      }

      /**
       * Headings
       *
       * Slate schemas don't usually infer basic type info from data, so each
       * level of heading is a separately named type. The MDAST schema just
       * has a single "heading" type with the depth stored in a "depth"
       * property on the node. Here we derive the depth from the Slate node
       * type - e.g., for "heading-two", we need a depth value of "2".
       */
      case 'heading-one':
      case 'heading-two':
      case 'heading-three':
      case 'heading-four':
      case 'heading-five':
      case 'heading-six': {
        const depthMap = { one: 1, two: 2, three: 3, four: 4, five: 5, six: 6 };
        const depthText = node.type.split('-')[1];
        const depth = depthMap[depthText];
        const mdastNode = u(typeMap[node.type], { depth }, children);
        if (mdastToString(mdastNode)) {
          return mdastNode;
        }
        return;
      }

      /**
       * Code Blocks
       *
       * Code block nodes may have a single text child, or instead be void and
       * store their value in `data.code`. They also may have a code language
       * stored in the "lang" data property. Here we transfer both the node value
       * and the "lang" data property to the new MDAST node, and spread any
       * remaining data as `data`.
       */
      case 'code-block': {
        const { lang, code, ...data } = get(node, 'data', {});
        const value = voidCodeBlock ? code : children[0]?.value;
        return u(typeMap[node.type], { lang, data }, value || '');
      }

      /**
       * Lists
       *
       * Our Slate schema has separate node types for ordered and unordered
       * lists, but the MDAST spec uses a single type with a boolean "ordered"
       * property to indicate whether the list is numbered. The MDAST spec also
       * allows for a "start" property to indicate the first number used for an
       * ordered list. Here we translate both values to our Slate schema.
       */
      case 'numbered-list':
      case 'bulleted-list': {
        const ordered = node.type === 'numbered-list';
        const props = { ordered, start: get(node.data, 'start') || 1 };
        return u(typeMap[node.type], props, children);
      }

      /**
       * Thematic Break
       *
       * Thematic break is a block level break. They cannot have children.
       */
      case 'thematic-break': {
        return u(typeMap[node.type]);
      }
    }
  }

  function convertInlineNode(node, children) {
    switch (node.type) {
      /**
       * Break
       *
       * Breaks are phrasing level breaks. They cannot have children.
       */
      case 'break': {
        return u(typeMap[node.type]);
      }

      /**
       * Links
       *
       * The url and title attributes of link nodes are stored in properties on
       * the node for both Slate and Remark schemas.
       */
      case 'link': {
        const { url, title, ...data } = get(node, 'data', {});
        return u(typeMap[node.type], { url, title, data }, children);
      }

      /**
       * Images
       *
       * This transformation is almost identical to that of links, except for the
       * lack of child nodes and addition of `alt` attribute data.
       */
      case 'image': {
        const { url, title, alt, ...data } = get(node, 'data', {});
        return u(typeMap[node.type], { url, title, alt, data });
      }
    }
  }
}