2019-12-16 12:17:37 -05:00
|
|
|
import { get, without, last, map, intersection, omit } from 'lodash';
|
2017-07-31 16:41:40 -04:00
|
|
|
import u from 'unist-builder';
|
2019-12-16 12:17:37 -05:00
|
|
|
import mdastToString from 'mdast-util-to-string';
|
2017-07-31 16:41:40 -04:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Map of Slate node types to MDAST/Remark node types.
|
|
|
|
*/
|
|
|
|
const typeMap = {
|
2018-08-07 14:46:54 -06:00
|
|
|
root: 'root',
|
|
|
|
paragraph: 'paragraph',
|
2017-07-31 16:41:40 -04:00
|
|
|
'heading-one': 'heading',
|
|
|
|
'heading-two': 'heading',
|
|
|
|
'heading-three': 'heading',
|
|
|
|
'heading-four': 'heading',
|
|
|
|
'heading-five': 'heading',
|
|
|
|
'heading-six': 'heading',
|
2018-08-07 14:46:54 -06:00
|
|
|
quote: 'blockquote',
|
2019-12-16 12:17:37 -05:00
|
|
|
'code-block': 'code',
|
2017-07-31 16:41:40 -04:00
|
|
|
'numbered-list': 'list',
|
|
|
|
'bulleted-list': 'list',
|
|
|
|
'list-item': 'listItem',
|
2018-08-07 14:46:54 -06:00
|
|
|
table: 'table',
|
2017-07-31 16:41:40 -04:00
|
|
|
'table-row': 'tableRow',
|
|
|
|
'table-cell': 'tableCell',
|
2018-08-07 14:46:54 -06:00
|
|
|
break: 'break',
|
2017-07-31 16:41:40 -04:00
|
|
|
'thematic-break': 'thematicBreak',
|
2018-08-07 14:46:54 -06:00
|
|
|
link: 'link',
|
|
|
|
image: 'image',
|
2019-04-10 15:23:53 -04:00
|
|
|
shortcode: 'shortcode',
|
2017-07-31 16:41:40 -04:00
|
|
|
};
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Map of Slate mark types to MDAST/Remark node types.
|
|
|
|
*/
|
|
|
|
const markMap = {
|
|
|
|
bold: 'strong',
|
|
|
|
italic: 'emphasis',
|
|
|
|
strikethrough: 'delete',
|
|
|
|
code: 'inlineCode',
|
|
|
|
};
|
|
|
|
|
2019-12-16 12:17:37 -05:00
|
|
|
const leadingWhitespaceExp = /^\s+\S/;
|
|
|
|
const trailingWhitespaceExp = /(?!\S)\s+$/;
|
|
|
|
|
|
|
|
export default function slateToRemark(raw, { voidCodeBlock }) {
|
2017-09-14 17:50:08 -04:00
|
|
|
/**
|
|
|
|
* The Slate Raw AST generally won't have a top level type, so we set it to
|
|
|
|
* "root" for clarity.
|
|
|
|
*/
|
|
|
|
raw.type = 'root';
|
|
|
|
|
|
|
|
return transform(raw);
|
|
|
|
|
|
|
|
/**
|
2019-12-16 12:17:37 -05:00
|
|
|
* The transform function mimics the approach of a Remark plugin for
|
|
|
|
* conformity with the other serialization functions. This function converts
|
|
|
|
* Slate nodes to MDAST nodes, and recursively calls itself to process child
|
|
|
|
* nodes to arbitrary depth.
|
2017-09-14 17:50:08 -04:00
|
|
|
*/
|
2019-12-16 12:17:37 -05:00
|
|
|
function transform(node) {
|
|
|
|
/**
|
|
|
|
* Combine adjacent text and inline nodes before processing so they can
|
|
|
|
* share marks.
|
|
|
|
*/
|
|
|
|
const hasBlockChildren = node.nodes && node.nodes[0] && node.nodes[0].object === 'block';
|
|
|
|
const children = hasBlockChildren
|
|
|
|
? node.nodes.map(transform).filter(v => v)
|
|
|
|
: convertInlineAndTextChildren(node.nodes);
|
|
|
|
|
|
|
|
const output = convertBlockNode(node, children);
|
|
|
|
//console.log(JSON.stringify(output, null, 2));
|
|
|
|
return output;
|
|
|
|
}
|
2017-09-14 17:50:08 -04:00
|
|
|
|
2019-12-16 12:17:37 -05:00
|
|
|
function removeMarkFromNodes(nodes, markType) {
|
|
|
|
return nodes.map(node => {
|
|
|
|
switch (node.type) {
|
|
|
|
case 'link': {
|
|
|
|
const updatedNodes = removeMarkFromNodes(node.nodes, markType);
|
|
|
|
return {
|
|
|
|
...node,
|
|
|
|
nodes: updatedNodes,
|
|
|
|
};
|
|
|
|
}
|
2017-09-14 17:50:08 -04:00
|
|
|
|
2019-12-16 12:17:37 -05:00
|
|
|
case 'image':
|
|
|
|
case 'break': {
|
|
|
|
const data = omit(node.data, 'marks');
|
|
|
|
return { ...node, data };
|
|
|
|
}
|
2017-09-14 17:50:08 -04:00
|
|
|
|
2019-12-16 12:17:37 -05:00
|
|
|
default:
|
|
|
|
return {
|
|
|
|
...node,
|
|
|
|
marks: node.marks.filter(({ type }) => type !== markType),
|
|
|
|
};
|
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
2017-09-14 17:50:08 -04:00
|
|
|
|
2019-12-16 12:17:37 -05:00
|
|
|
function getNodeMarks(node) {
|
|
|
|
switch (node.type) {
|
|
|
|
case 'link': {
|
|
|
|
// Code marks can't always be condensed together. If all text in a link
|
|
|
|
// is wrapped in a mark, this function returns that mark and the node
|
|
|
|
// ends up nested inside of that mark. Code marks sometimes can't do
|
|
|
|
// that, like when they wrap all of the text content of a link. Here we
|
|
|
|
// remove code marks before processing so that they stay put.
|
|
|
|
const nodesWithoutCode = node.nodes.map(n => ({
|
|
|
|
...n,
|
|
|
|
marks: n.marks ? n.marks.filter(({ type }) => type !== 'code') : n.marks,
|
|
|
|
}));
|
|
|
|
const childMarks = map(nodesWithoutCode, getNodeMarks);
|
|
|
|
return intersection(...childMarks);
|
|
|
|
}
|
2017-09-14 17:50:08 -04:00
|
|
|
|
2019-12-16 12:17:37 -05:00
|
|
|
case 'break':
|
|
|
|
case 'image':
|
|
|
|
return map(get(node, ['data', 'marks']), mark => mark.type);
|
2017-09-14 17:50:08 -04:00
|
|
|
|
2019-12-16 12:17:37 -05:00
|
|
|
default:
|
|
|
|
return map(node.marks, mark => mark.type);
|
2017-09-25 18:17:44 -04:00
|
|
|
}
|
2019-12-16 12:17:37 -05:00
|
|
|
}
|
2017-09-25 18:17:44 -04:00
|
|
|
|
2019-12-16 12:17:37 -05:00
|
|
|
function getSharedMarks(marks, node) {
|
|
|
|
const nodeMarks = getNodeMarks(node);
|
|
|
|
const sharedMarks = intersection(marks, nodeMarks);
|
|
|
|
if (sharedMarks[0] === 'code') {
|
|
|
|
return nodeMarks.length === 1 ? marks : [];
|
2017-09-14 17:50:08 -04:00
|
|
|
}
|
2019-12-16 12:17:37 -05:00
|
|
|
return sharedMarks;
|
|
|
|
}
|
2017-09-14 17:50:08 -04:00
|
|
|
|
2019-12-16 12:17:37 -05:00
|
|
|
function extractFirstMark(nodes) {
|
|
|
|
let firstGroupMarks = getNodeMarks(nodes[0]) || [];
|
2017-09-14 17:50:08 -04:00
|
|
|
|
2019-12-16 12:17:37 -05:00
|
|
|
// If code mark is present, but there are other marks, process others first.
|
|
|
|
// If only the code mark is present, don't allow it to be shared with other
|
|
|
|
// nodes.
|
|
|
|
if (firstGroupMarks[0] === 'code' && firstGroupMarks.length > 1) {
|
|
|
|
firstGroupMarks = [...without('firstGroupMarks', 'code'), 'code'];
|
|
|
|
}
|
2017-07-31 16:41:40 -04:00
|
|
|
|
2019-12-16 12:17:37 -05:00
|
|
|
let splitIndex = 1;
|
|
|
|
|
|
|
|
if (firstGroupMarks.length > 0) {
|
|
|
|
while (splitIndex < nodes.length) {
|
|
|
|
if (nodes[splitIndex]) {
|
|
|
|
const sharedMarks = getSharedMarks(firstGroupMarks, nodes[splitIndex]);
|
|
|
|
if (sharedMarks.length > 0) {
|
|
|
|
firstGroupMarks = sharedMarks;
|
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
2019-02-02 02:39:12 +01:00
|
|
|
}
|
2019-12-16 12:17:37 -05:00
|
|
|
splitIndex += 1;
|
2019-02-02 02:39:12 +01:00
|
|
|
}
|
|
|
|
}
|
2017-09-14 17:50:08 -04:00
|
|
|
|
2019-12-16 12:17:37 -05:00
|
|
|
const markType = firstGroupMarks[0];
|
|
|
|
const childNodes = nodes.slice(0, splitIndex);
|
|
|
|
const updatedChildNodes = markType ? removeMarkFromNodes(childNodes, markType) : childNodes;
|
|
|
|
const remainingNodes = nodes.slice(splitIndex);
|
2017-07-31 16:41:40 -04:00
|
|
|
|
2019-12-16 12:17:37 -05:00
|
|
|
return [markType, updatedChildNodes, remainingNodes];
|
2017-09-14 17:50:08 -04:00
|
|
|
}
|
2017-09-12 11:48:14 -04:00
|
|
|
|
|
|
|
/**
|
2019-12-16 12:17:37 -05:00
|
|
|
* Converts the strings returned from `splitToNamedParts` to Slate nodes.
|
2017-09-12 11:48:14 -04:00
|
|
|
*/
|
2019-12-16 12:17:37 -05:00
|
|
|
function splitWhitespace(node, { trailing } = {}) {
|
|
|
|
if (!node.text) {
|
|
|
|
return { trimmedNode: node };
|
|
|
|
}
|
|
|
|
const exp = trailing ? trailingWhitespaceExp : leadingWhitespaceExp;
|
|
|
|
const index = node.text.search(exp);
|
|
|
|
if (index > -1) {
|
|
|
|
const substringIndex = trailing ? index : index + 1;
|
|
|
|
const firstSplit = node.text.substring(0, substringIndex);
|
|
|
|
const secondSplit = node.text.substring(substringIndex);
|
|
|
|
const whitespace = trailing ? secondSplit : firstSplit;
|
|
|
|
const text = trailing ? firstSplit : secondSplit;
|
|
|
|
return { whitespace, trimmedNode: { ...node, text } };
|
|
|
|
}
|
|
|
|
return { trimmedNode: node };
|
2017-09-14 17:50:08 -04:00
|
|
|
}
|
2017-09-12 11:48:14 -04:00
|
|
|
|
2019-12-16 12:17:37 -05:00
|
|
|
function collectCenterNodes(nodes, leadingNode, trailingNode) {
|
|
|
|
switch (nodes.length) {
|
|
|
|
case 0:
|
|
|
|
return [];
|
|
|
|
case 1:
|
|
|
|
return [trailingNode];
|
|
|
|
case 2:
|
|
|
|
return [leadingNode, trailingNode];
|
|
|
|
default:
|
|
|
|
return [leadingNode, ...nodes.slice(1, -1), trailingNode];
|
|
|
|
}
|
2017-09-14 17:50:08 -04:00
|
|
|
}
|
2017-09-12 11:48:14 -04:00
|
|
|
|
2019-12-16 12:17:37 -05:00
|
|
|
function normalizeFlankingWhitespace(nodes) {
|
|
|
|
const { whitespace: leadingWhitespace, trimmedNode: leadingNode } = splitWhitespace(nodes[0]);
|
|
|
|
const lastNode = nodes.length > 1 ? last(nodes) : leadingNode;
|
|
|
|
const trailingSplitResult = splitWhitespace(lastNode, { trailing: true });
|
|
|
|
const { whitespace: trailingWhitespace, trimmedNode: trailingNode } = trailingSplitResult;
|
|
|
|
const centerNodes = collectCenterNodes(nodes, leadingNode, trailingNode).filter(val => val);
|
|
|
|
return { leadingWhitespace, centerNodes, trailingWhitespace };
|
|
|
|
}
|
2017-07-31 16:41:40 -04:00
|
|
|
|
2019-12-16 12:17:37 -05:00
|
|
|
function convertInlineAndTextChildren(nodes = []) {
|
|
|
|
const convertedNodes = [];
|
|
|
|
let remainingNodes = nodes;
|
|
|
|
|
|
|
|
while (remainingNodes.length > 0) {
|
|
|
|
const nextNode = remainingNodes[0];
|
|
|
|
if (nextNode.object === 'inline' || (nextNode.marks && nextNode.marks.length > 0)) {
|
|
|
|
const [markType, markNodes, remainder] = extractFirstMark(remainingNodes);
|
|
|
|
/**
|
|
|
|
* A node with a code mark will be a text node, and will not be adjacent
|
|
|
|
* to a sibling code node as the Slate schema requires them to be
|
|
|
|
* merged. Markdown also requires at least a space between inline code
|
|
|
|
* nodes.
|
|
|
|
*/
|
|
|
|
if (markType === 'code') {
|
|
|
|
const node = markNodes[0];
|
|
|
|
convertedNodes.push(u(markMap[markType], node.data, node.text));
|
|
|
|
} else if (!markType && markNodes.length === 1 && markNodes[0].object === 'inline') {
|
|
|
|
const node = markNodes[0];
|
|
|
|
convertedNodes.push(convertInlineNode(node, convertInlineAndTextChildren(node.nodes)));
|
|
|
|
} else {
|
|
|
|
const {
|
|
|
|
leadingWhitespace,
|
|
|
|
trailingWhitespace,
|
|
|
|
centerNodes,
|
|
|
|
} = normalizeFlankingWhitespace(markNodes);
|
|
|
|
const children = convertInlineAndTextChildren(centerNodes);
|
|
|
|
const markNode = u(markMap[markType], children);
|
|
|
|
|
|
|
|
// Filter out empty marks, otherwise their output literally by
|
|
|
|
// remark-stringify, eg. an empty bold node becomes "****"
|
|
|
|
if (mdastToString(markNode) === '') {
|
|
|
|
remainingNodes = remainder;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
const createText = text => text && u('html', text);
|
|
|
|
const normalizedNodes = [
|
|
|
|
createText(leadingWhitespace),
|
|
|
|
markNode,
|
|
|
|
createText(trailingWhitespace),
|
|
|
|
].filter(val => val);
|
|
|
|
convertedNodes.push(...normalizedNodes);
|
|
|
|
}
|
|
|
|
remainingNodes = remainder;
|
|
|
|
} else {
|
|
|
|
remainingNodes.shift();
|
|
|
|
convertedNodes.push(u('html', nextNode.text));
|
|
|
|
}
|
|
|
|
}
|
2017-09-12 11:48:14 -04:00
|
|
|
|
2019-12-16 12:17:37 -05:00
|
|
|
return convertedNodes;
|
2018-08-07 14:46:54 -06:00
|
|
|
}
|
2017-07-31 16:41:40 -04:00
|
|
|
|
2019-12-16 12:17:37 -05:00
|
|
|
function convertBlockNode(node, children) {
|
|
|
|
switch (node.type) {
|
|
|
|
/**
|
|
|
|
* General
|
|
|
|
*
|
|
|
|
* Convert simple cases that only require a type and children, with no
|
|
|
|
* additional properties.
|
|
|
|
*/
|
|
|
|
case 'root':
|
|
|
|
case 'paragraph':
|
|
|
|
case 'quote':
|
|
|
|
case 'list-item':
|
|
|
|
case 'table':
|
|
|
|
case 'table-row':
|
|
|
|
case 'table-cell': {
|
|
|
|
return u(typeMap[node.type], children);
|
|
|
|
}
|
2017-07-31 16:41:40 -04:00
|
|
|
|
2019-12-16 12:17:37 -05:00
|
|
|
/**
|
|
|
|
* Shortcodes
|
|
|
|
*
|
|
|
|
* Shortcode nodes only exist in Slate's Raw AST if they were inserted
|
|
|
|
* via the plugin toolbar in memory, so they should always have
|
|
|
|
* shortcode data attached. The "shortcode" data property contains the
|
|
|
|
* name of the registered shortcode plugin, and the "shortcodeData" data
|
|
|
|
* property contains the data received from the shortcode plugin's
|
|
|
|
* `fromBlock` method when the shortcode node was created.
|
|
|
|
*
|
|
|
|
* Here we create a `shortcode` MDAST node that contains only the shortcode
|
|
|
|
* data.
|
|
|
|
*/
|
|
|
|
case 'shortcode': {
|
|
|
|
const { data } = node;
|
|
|
|
return u(typeMap[node.type], { data });
|
|
|
|
}
|
2017-07-31 16:41:40 -04:00
|
|
|
|
2019-12-16 12:17:37 -05:00
|
|
|
/**
|
|
|
|
* Headings
|
|
|
|
*
|
|
|
|
* Slate schemas don't usually infer basic type info from data, so each
|
|
|
|
* level of heading is a separately named type. The MDAST schema just
|
|
|
|
* has a single "heading" type with the depth stored in a "depth"
|
|
|
|
* property on the node. Here we derive the depth from the Slate node
|
|
|
|
* type - e.g., for "heading-two", we need a depth value of "2".
|
|
|
|
*/
|
|
|
|
case 'heading-one':
|
|
|
|
case 'heading-two':
|
|
|
|
case 'heading-three':
|
|
|
|
case 'heading-four':
|
|
|
|
case 'heading-five':
|
|
|
|
case 'heading-six': {
|
|
|
|
const depthMap = { one: 1, two: 2, three: 3, four: 4, five: 5, six: 6 };
|
|
|
|
const depthText = node.type.split('-')[1];
|
|
|
|
const depth = depthMap[depthText];
|
|
|
|
const mdastNode = u(typeMap[node.type], { depth }, children);
|
|
|
|
if (mdastToString(mdastNode)) {
|
|
|
|
return mdastNode;
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
2017-07-31 16:41:40 -04:00
|
|
|
|
2019-12-16 12:17:37 -05:00
|
|
|
/**
|
|
|
|
* Code Blocks
|
|
|
|
*
|
|
|
|
* Code block nodes may have a single text child, or instead be void and
|
|
|
|
* store their value in `data.code`. They also may have a code language
|
|
|
|
* stored in the "lang" data property. Here we transfer both the node value
|
|
|
|
* and the "lang" data property to the new MDAST node, and spread any
|
|
|
|
* remaining data as `data`.
|
|
|
|
*/
|
|
|
|
case 'code-block': {
|
|
|
|
const { lang, code, ...data } = get(node, 'data', {});
|
|
|
|
const value = voidCodeBlock ? code : children[0]?.value;
|
|
|
|
return u(typeMap[node.type], { lang, data }, value || '');
|
|
|
|
}
|
2017-07-31 16:41:40 -04:00
|
|
|
|
2019-12-16 12:17:37 -05:00
|
|
|
/**
|
|
|
|
* Lists
|
|
|
|
*
|
|
|
|
* Our Slate schema has separate node types for ordered and unordered
|
|
|
|
* lists, but the MDAST spec uses a single type with a boolean "ordered"
|
|
|
|
* property to indicate whether the list is numbered. The MDAST spec also
|
|
|
|
* allows for a "start" property to indicate the first number used for an
|
|
|
|
* ordered list. Here we translate both values to our Slate schema.
|
|
|
|
*/
|
|
|
|
case 'numbered-list':
|
|
|
|
case 'bulleted-list': {
|
|
|
|
const ordered = node.type === 'numbered-list';
|
|
|
|
const props = { ordered, start: get(node.data, 'start') || 1 };
|
|
|
|
return u(typeMap[node.type], props, children);
|
|
|
|
}
|
2017-07-31 16:41:40 -04:00
|
|
|
|
2019-12-16 12:17:37 -05:00
|
|
|
/**
|
|
|
|
* Thematic Break
|
|
|
|
*
|
|
|
|
* Thematic break is a block level break. They cannot have children.
|
|
|
|
*/
|
|
|
|
case 'thematic-break': {
|
|
|
|
return u(typeMap[node.type]);
|
|
|
|
}
|
2017-07-31 16:41:40 -04:00
|
|
|
}
|
2019-12-16 12:17:37 -05:00
|
|
|
}
|
2017-07-31 16:41:40 -04:00
|
|
|
|
2019-12-16 12:17:37 -05:00
|
|
|
function convertInlineNode(node, children) {
|
|
|
|
switch (node.type) {
|
|
|
|
/**
|
|
|
|
* Break
|
|
|
|
*
|
|
|
|
* Breaks are phrasing level breaks. They cannot have children.
|
|
|
|
*/
|
|
|
|
case 'break': {
|
|
|
|
return u(typeMap[node.type]);
|
|
|
|
}
|
2017-07-31 16:41:40 -04:00
|
|
|
|
2019-12-16 12:17:37 -05:00
|
|
|
/**
|
|
|
|
* Links
|
|
|
|
*
|
|
|
|
* The url and title attributes of link nodes are stored in properties on
|
|
|
|
* the node for both Slate and Remark schemas.
|
|
|
|
*/
|
|
|
|
case 'link': {
|
|
|
|
const { url, title, ...data } = get(node, 'data', {});
|
|
|
|
return u(typeMap[node.type], { url, title, data }, children);
|
|
|
|
}
|
2017-09-19 16:49:45 -04:00
|
|
|
|
2019-12-16 12:17:37 -05:00
|
|
|
/**
|
|
|
|
* Images
|
|
|
|
*
|
|
|
|
* This transformation is almost identical to that of links, except for the
|
|
|
|
* lack of child nodes and addition of `alt` attribute data.
|
|
|
|
*/
|
|
|
|
case 'image': {
|
|
|
|
const { url, title, alt, ...data } = get(node, 'data', {});
|
|
|
|
return u(typeMap[node.type], { url, title, alt, data });
|
|
|
|
}
|
|
|
|
}
|
2017-07-31 16:41:40 -04:00
|
|
|
}
|
|
|
|
}
|