refactor and document rte serializers

This commit is contained in:
Shawn Erquhart 2017-07-31 16:41:40 -04:00
parent 406ae57d3e
commit cf2b7be25f
12 changed files with 898 additions and 389 deletions

View File

@ -1,4 +1,4 @@
import { get, isEmpty, reduce } from 'lodash'; import { get, isEmpty, reduce, pull } from 'lodash';
import unified from 'unified'; import unified from 'unified';
import u from 'unist-builder'; import u from 'unist-builder';
import markdownToRemarkPlugin from 'remark-parse'; import markdownToRemarkPlugin from 'remark-parse';
@ -7,51 +7,133 @@ import remarkToRehype from 'remark-rehype';
import rehypeToHtml from 'rehype-stringify'; import rehypeToHtml from 'rehype-stringify';
import htmlToRehype from 'rehype-parse'; import htmlToRehype from 'rehype-parse';
import rehypeToRemark from 'rehype-remark'; import rehypeToRemark from 'rehype-remark';
import rehypeMinifyWhitespace from 'rehype-minify-whitespace'; import remarkToRehypeShortcodes from './remarkRehypeShortcodes';
import remarkToRehypeShortcodes from './remark-rehype-shortcodes'; import rehypePaperEmoji from './rehypePaperEmoji';
import rehypeRemoveEmpty from './rehype-remove-empty'; import remarkWrapHtml from './remarkWrapHtml';
import rehypePaperEmoji from './rehype-paper-emoji'; import remarkToSlatePlugin from './remarkSlate';
import remarkNestedList from './remark-nested-list'; import remarkSquashReferences from './remarkSquashReferences';
import remarkToSlatePlugin from './remark-slate'; import remarkImagesToText from './remarkImagesToText';
import remarkImagesToText from './remark-images-to-text'; import remarkShortcodes from './remarkShortcodes';
import remarkShortcodes from './remark-shortcodes'; import slateToRemarkParser from './slateRemark';
import registry from '../../../../lib/registry'; import registry from '../../../../lib/registry';
export const remarkToHtml = (mdast, getAsset) => { /**
const result = unified() * This module contains all serializers for the Markdown widget.
.use(remarkToRehypeShortcodes, { plugins: registry.getEditorComponents(), getAsset }) *
.use(remarkToRehype, { allowDangerousHTML: true }) * The value of a Markdown widget is transformed to various formats during
.runSync(mdast); * editing, and these formats are referenced throughout serializer source
* documentation. Below is brief glossary of the formats used.
*
* - Markdown {string}
* The stringified Markdown value. The value of the field is persisted
* (stored) in this format, and the stringified value is also used when the
* editor is in "raw" Markdown mode.
*
* - MDAST {object}
* Also loosely referred to as "Remark". MDAST stands for MarkDown AST
* (Abstract Syntax Tree), and is an object representation of a Markdown
* document. Underneath, it's a Unist tree with a Markdown-specific schema. An
* MDAST is used as the source of truth for any Markdown field within the CMS
* once the Markdown string value is loaded. MDAST syntax is a part of the
* Unified ecosystem, and powers the Remark processor, so Remark plugins may
* be used.
*
* - HAST {object}
* Also loosely referred to as "Rehype". HAST, similar to MDAST, is an object
* representation of an HTML document. The field value takes this format
* temporarily before the document is stringified to HTML.
*
* - HTML {string}
* The field value is stringifed to HTML for preview purposes - the HTML value
* is never parsed, it is output only.
*
* - Slate Raw AST {object}
* Slate's Raw AST is a very simple and unopinionated object representation of
* a document in a Slate editor. We define our own Markdown-specific schema
* for serialization to/from Slate's Raw AST and MDAST.
*
* Overview of the Markdown widget serialization life cycle:
*
* - Entry Load
* When an entry is loaded, all Markdown widget values are serialized to
* MDAST within the entry draft.
*
* - Visual Editor Render
* When a Markdown widget using the visual editor renders, it converts the
* MDAST value from the entry draft to Slate's Raw AST, and renders that.
*
* - Visual Editor Update
* When the value of a Markdown field is changed in the visual editor, the
* resulting Slate Raw AST is converted back to MDAST, and the MDAST value is
* set as the new state of the field in the entry draft.
*
* - Visual Editor Paste
* When a value is pasted to the visual editor, the pasted value is checked
* for HTML data. If HTML is found, the value is deserialized to an HAST, then
* to MDAST, and finally to Slate's Raw AST. If no HTML is found, the plain
* text value of the paste is serialized to Slate's Raw AST via the Slate
* Plain serializer. The deserialized fragment is then inserted to the Slate
* document.
*
* - Raw Editor Render
* When a Markdown widget using the raw editor (Markdown switch activated),
* it stringifies the MDAST from the entry draft to Markdown, and runs the
* stringified Markdown through Slate's Plain serializer, which outputs a
* Slate Raw AST of the plain text, which is then rendered in the editor.
*
* - Raw Editor Update
* When the value of a Markdown field is changed in the raw editor, the
* resulting Slate Raw AST is stringified back to a string, and the string
* value is then parsed as Markdown into an MDAST. The MDAST value is
* set as the new state of the field in the entry draft.
*
* - Raw Editor Paste
* When a value is pasted to the raw editor, the text value of the paste is
* serialized to Slate's Raw AST via the Slate Plain serializer. The
* deserialized fragment is then inserted to the Slate document.
*
* - Preview Pane Render
* When the preview pane renders the value of a Markdown widget, it first
* converts the MDAST value to HAST, stringifies the HAST to HTML, and
* renders that.
*
* - Entry Persist (Save)
* On persist, the MDAST value in the entry draft is stringified back to
* a Markdown string for storage.
*/
const output = unified()
.use(rehypeToHtml, { allowDangerousHTML: true, allowDangerousCharacters: true })
.stringify(result);
return output
}
export const htmlToSlate = html => {
const hast = unified()
.use(htmlToRehype, { fragment: true })
.parse(html);
const result = unified()
.use(rehypeRemoveEmpty)
.use(rehypeMinifyWhitespace)
.use(rehypePaperEmoji)
.use(rehypeToRemark)
.use(remarkNestedList)
.use(remarkToSlatePlugin)
.runSync(hast);
return result;
};
/**
* Deserialize a Markdown string to an MDAST.
*/
export const markdownToRemark = markdown => { export const markdownToRemark = markdown => {
/**
* Disabling tokenizers allows us to turn off features within the Remark
* parser.
*/
function disableTokenizers() {
/**
* Turn off soft breaks until we can properly support them across both
* editors.
*/
pull(this.Parser.prototype.inlineMethods, 'break');
}
/**
* Parse the Markdown string input to an MDAST.
*/
const parsed = unified() const parsed = unified()
.use(markdownToRemarkPlugin, { fences: true, pedantic: true, footnotes: true, commonmark: true }) .use(markdownToRemarkPlugin, { fences: true, pedantic: true, commonmark: true })
.use(disableTokenizers)
.parse(markdown); .parse(markdown);
/**
* Further transform the MDAST with plugins.
*/
const result = unified() const result = unified()
.use(remarkSquashReferences)
.use(remarkImagesToText) .use(remarkImagesToText)
.use(remarkShortcodes, { plugins: registry.getEditorComponents() }) .use(remarkShortcodes, { plugins: registry.getEditorComponents() })
.runSync(parsed); .runSync(parsed);
@ -59,6 +141,10 @@ export const markdownToRemark = markdown => {
return result; return result;
}; };
/**
* Serialize an MDAST to a Markdown string.
*/
export const remarkToMarkdown = obj => { export const remarkToMarkdown = obj => {
/** /**
* Rewrite the remark-stringify text visitor to simply return the text value, * Rewrite the remark-stringify text visitor to simply return the text value,
@ -71,133 +157,84 @@ export const remarkToMarkdown = obj => {
visitors.text = node => node.value; visitors.text = node => node.value;
}; };
/**
* Provide an empty MDAST if no value is provided.
*/
const mdast = obj || u('root', [u('paragraph', [u('text', '')])]); const mdast = obj || u('root', [u('paragraph', [u('text', '')])]);
const result = unified()
const markdown = unified()
.use(remarkToMarkdownPlugin, { listItemIndent: '1', fences: true, pedantic: true, commonmark: true }) .use(remarkToMarkdownPlugin, { listItemIndent: '1', fences: true, pedantic: true, commonmark: true })
.use(remarkAllowAllText) .use(remarkAllowAllText)
.stringify(mdast); .stringify(mdast);
return result;
return markdown;
}; };
/**
* Convert an MDAST to an HTML string.
*/
export const remarkToHtml = (mdast, getAsset) => {
const hast = unified()
.use(remarkToRehypeShortcodes, { plugins: registry.getEditorComponents(), getAsset })
.use(remarkToRehype, { allowDangerousHTML: true })
.runSync(mdast);
const html = unified()
.use(rehypeToHtml, { allowDangerousHTML: true, allowDangerousCharacters: true })
.stringify(hast);
return html;
}
/**
* Deserialize an HTML string to Slate's Raw AST. Currently used for HTML
* pastes.
*/
export const htmlToSlate = html => {
const hast = unified()
.use(htmlToRehype, { fragment: true })
.parse(html);
const mdast = unified()
.use(rehypePaperEmoji)
.use(rehypeToRemark)
.runSync(hast);
const slateRaw = unified()
.use(remarkImagesToText)
.use(remarkShortcodes, { plugins: registry.getEditorComponents() })
.use(remarkWrapHtml)
.use(remarkToSlatePlugin)
.runSync(mdast);
return slateRaw;
};
/**
* Convert an MDAST to Slate's Raw AST.
*/
export const remarkToSlate = mdast => { export const remarkToSlate = mdast => {
const result = unified() const result = unified()
.use(remarkWrapHtml)
.use(remarkToSlatePlugin) .use(remarkToSlatePlugin)
.runSync(mdast); .runSync(mdast);
return result; return result;
}; };
export const slateToRemark = (raw, shortcodePlugins) => {
const typeMap = {
'paragraph': 'paragraph',
'heading-one': 'heading',
'heading-two': 'heading',
'heading-three': 'heading',
'heading-four': 'heading',
'heading-five': 'heading',
'heading-six': 'heading',
'quote': 'blockquote',
'code': 'code',
'numbered-list': 'list',
'bulleted-list': 'list',
'list-item': 'listItem',
'table': 'table',
'table-row': 'tableRow',
'table-cell': 'tableCell',
'thematic-break': 'thematicBreak',
'link': 'link',
'image': 'image',
};
const markMap = {
bold: 'strong',
italic: 'emphasis',
strikethrough: 'delete',
code: 'inlineCode',
};
const transform = node => {
const children = isEmpty(node.nodes) ? node.nodes : node.nodes.reduce((acc, childNode) => {
if (childNode.kind !== 'text') {
acc.push(transform(childNode));
return acc;
}
if (childNode.ranges) {
childNode.ranges.forEach(range => {
const { marks = [], text } = range;
const markTypes = marks.map(mark => markMap[mark.type]);
if (markTypes.includes('inlineCode')) {
acc.push(u('inlineCode', text));
} else {
const textNode = u('html', text);
const nestedText = !markTypes.length ? textNode : markTypes.reduce((acc, markType) => {
const nested = u(markType, [acc]);
return nested;
}, textNode);
acc.push(nestedText);
}
});
} else {
acc.push(u('html', childNode.text)); /**
} * Convert a Slate Raw AST to MDAST.
return acc; *
}, []); * Requires shortcode plugins to parse shortcode nodes back to text.
*
if (node.type === 'root') { * Note that Unified is not utilized for the conversion from Slate's Raw AST to
return u('root', children); * MDAST. The conversion is manual because Unified can only operate on Unist
} * trees.
*/
if (node.type === 'shortcode') { export const slateToRemark = (raw) => {
const { data } = node; const mdast = slateToRemarkParser(raw, { shortcodePlugins: registry.getEditorComponents() });
const plugin = shortcodePlugins.get(data.shortcode); return mdast;
const text = plugin.toBlock(data.shortcodeData);
const textNode = u('html', text);
return u('paragraph', { data }, [ textNode ]);
}
if (node.type.startsWith('heading')) {
const depths = { one: 1, two: 2, three: 3, four: 4, five: 5, six: 6 };
const depth = node.type.split('-')[1];
const props = { depth: depths[depth] };
return u(typeMap[node.type], props, children);
}
if (['paragraph', 'quote', 'list-item', 'table', 'table-row', 'table-cell'].includes(node.type)) {
return u(typeMap[node.type], children);
}
if (node.type === 'code') {
const value = get(node.nodes, [0, 'text']);
const props = { lang: get(node.data, 'lang') };
return u(typeMap[node.type], props, value);
}
if (['numbered-list', 'bulleted-list'].includes(node.type)) {
const ordered = node.type === 'numbered-list';
const props = { ordered, start: get(node.data, 'start') || 1 };
return u(typeMap[node.type], props, children);
}
if (node.type === 'thematic-break') {
return u(typeMap[node.type]);
}
if (node.type === 'link') {
const data = get(node, 'data', {});
const { url, title } = data;
return u(typeMap[node.type], data, children);
}
if (node.type === 'image') {
const data = get(node, 'data', {});
const { url, title, alt } = data;
return u(typeMap[node.type], data);
}
}
raw.type = 'root';
const mdast = transform(raw);
const result = unified()
.use(remarkShortcodes, { plugins: registry.getEditorComponents() })
.runSync(mdast);
return result;
}; };

View File

@ -1,32 +0,0 @@
import { find, capitalize } from 'lodash';
/**
* Remove empty nodes, including the top level parents of deeply nested empty nodes.
*/
export default function rehypeRemoveEmpty() {
const isVoidElement = node => ['img', 'hr', 'br'].includes(node.tagName);
const isNonEmptyLeaf = node => ['text', 'raw'].includes(node.type) && node.value;
const isShortcode = node => node.properties && node.properties[`data${capitalize(shortcodeAttributePrefix)}`];
const isNonEmptyNode = node => {
return isVoidElement(node)
|| isNonEmptyLeaf(node)
|| isShortcode(node)
|| find(node.children, isNonEmptyNode);
};
const transform = node => {
if (isVoidElement(node) || isNonEmptyLeaf(node) || isShortcode(node)) {
return node;
}
if (node.children) {
node.children = node.children.reduce((acc, childNode) => {
if (isVoidElement(childNode) || isNonEmptyLeaf(childNode) || isShortcode(node)) {
return acc.concat(childNode);
}
return find(childNode.children, isNonEmptyNode) ? acc.concat(transform(childNode)) : acc;
}, []);
}
return node;
};
return transform;
}

View File

@ -1,33 +0,0 @@
/**
* If the first child of a list item is a list, include it in the previous list
* item. Otherwise it translates to markdown as having two bullets. When
* rehype-remark processes a list and finds children that are not list items, it
* wraps them in list items, which leads to the condition this plugin addresses.
* Dropbox Paper currently outputs this kind of HTML, which is invalid. We have
* a support issue open for it, and this plugin can potentially be removed when
* that's resolved.
*/
export default function remarkNestedList() {
const transform = node => {
if (node.type === 'list' && node.children && node.children.length > 1) {
node.children = node.children.reduce((acc, childNode, index) => {
if (index && childNode.children && childNode.children[0].type === 'list') {
acc[acc.length - 1].children.push(transform(childNode.children.shift()))
if (childNode.children.length) {
acc.push(transform(childNode));
}
} else {
acc.push(transform(childNode));
}
return acc;
}, []);
return node;
}
if (node.children) {
node.children = node.children.map(transform);
}
return node;
};
return transform;
}

View File

@ -1,172 +0,0 @@
import { get, isEmpty } from 'lodash';
import u from 'unist-builder';
import mdastDefinitions from 'mdast-util-definitions';
import modifyChildren from 'unist-util-modify-children';
export default function remarkToSlatePlugin() {
const typeMap = {
paragraph: 'paragraph',
blockquote: 'quote',
code: 'code',
listItem: 'list-item',
table: 'table',
tableRow: 'table-row',
tableCell: 'table-cell',
thematicBreak: 'thematic-break',
link: 'link',
image: 'image',
};
const markMap = {
strong: 'bold',
emphasis: 'italic',
delete: 'strikethrough',
inlineCode: 'code',
};
const toTextNode = (text, data) => ({ kind: 'text', text, data });
const wrapText = (node, index, parent) => {
if (['text', 'html'].includes(node.type)) {
parent.children.splice(index, 1, u('paragraph', [node]));
}
};
let getDefinition;
const transform = (node, index, siblings, parent) => {
let nodes;
if (node.type === 'root') {
// Create definition getter for link and image references
getDefinition = mdastDefinitions(node);
// Ensure top level text nodes are wrapped in paragraphs
modifyChildren(wrapText)(node);
}
if (isEmpty(node.children)) {
nodes = node.children;
} else {
// If a node returns a falsey value, exclude it. Some nodes do not
// translate from MDAST to Slate, such as definitions for link/image
// references or footnotes.
//
// Consider using unist-util-remove instead for this.
nodes = node.children.reduce((acc, childNode, idx, sibs) => {
const transformed = transform(childNode, idx, sibs, node);
if (transformed) {
acc.push(transformed);
}
return acc;
}, []);
}
if (node.type === 'root') {
return { nodes };
}
/**
* Convert MDAST shortcode nodes to Slate 'shortcode' type nodes.
*/
if (get(node, ['data', 'shortcode'])) {
const { data } = node;
const nodes = [ toTextNode('') ];
return { kind: 'block', type: 'shortcode', data, isVoid: true, nodes };
}
// Process raw html as text, since it's valid markdown
if (['text', 'html'].includes(node.type)) {
return toTextNode(node.value, node.data);
}
if (node.type === 'inlineCode') {
return { kind: 'text', ranges: [{ text: node.value, marks: [{ type: 'code' }] }] };
}
if (['strong', 'emphasis', 'delete'].includes(node.type)) {
const remarkToSlateMarks = (markNode, parentMarks = []) => {
const marks = [...parentMarks, { type: markMap[markNode.type] }];
const ranges = [];
markNode.children.forEach(childNode => {
if (['html', 'text'].includes(childNode.type)) {
ranges.push({ text: childNode.value, marks });
return;
}
const nestedRanges = remarkToSlateMarks(childNode, marks);
ranges.push(...nestedRanges);
});
return ranges;
};
return { kind: 'text', ranges: remarkToSlateMarks(node) };
}
if (node.type === 'heading') {
const depths = { 1: 'one', 2: 'two', 3: 'three', 4: 'four', 5: 'five', 6: 'six' };
return { kind: 'block', type: `heading-${depths[node.depth]}`, nodes };
}
if (['paragraph', 'blockquote', 'tableRow', 'tableCell'].includes(node.type)) {
return { kind: 'block', type: typeMap[node.type], nodes };
}
if (node.type === 'code') {
const data = { lang: node.lang };
const text = toTextNode(node.value);
const nodes = [text];
return { kind: 'block', type: typeMap[node.type], data, nodes };
}
if (node.type === 'list') {
const slateType = node.ordered ? 'numbered-list' : 'bulleted-list';
const data = { start: node.start };
return { kind: 'block', type: slateType, data, nodes };
}
if (node.type === 'listItem') {
const data = { checked: node.checked };
return { kind: 'block', type: typeMap[node.type], data, nodes };
}
if (node.type === 'table') {
const data = { align: node.align };
return { kind: 'block', type: typeMap[node.type], data, nodes };
}
if (node.type === 'thematicBreak') {
return { kind: 'block', type: typeMap[node.type], isVoid: true };
}
if (node.type === 'link') {
const { title, url } = node;
const data = { title, url };
return { kind: 'inline', type: typeMap[node.type], data, nodes };
}
if (node.type === 'linkReference') {
const definition = getDefinition(node.identifier);
const data = {};
if (definition) {
data.title = definition.title;
data.url = definition.url;
}
return { kind: 'inline', type: typeMap['link'], data, nodes };
}
if (node.type === 'image') {
const { title, url, alt } = node;
const data = { title, url, alt };
return { kind: 'block', type: typeMap[node.type], data };
}
if (node.type === 'imageReference') {
const definition = getDefinition(node.identifier);
const data = {};
if (definition) {
data.title = definition.title;
data.url = definition.url;
}
return { kind: 'block', type: typeMap['image'], data };
}
};
// Since `transform` is used for recursive child mapping, ensure that only the
// first argument is supplied on the initial call.
return node => transform(node);
}

View File

@ -0,0 +1,293 @@
import { get, isEmpty, isArray } from 'lodash';
import u from 'unist-builder';
import modifyChildren from 'unist-util-modify-children';
/**
* Map of MDAST node types to Slate node types.
*/
const typeMap = {
root: 'root',
paragraph: 'paragraph',
blockquote: 'quote',
code: 'code',
listItem: 'list-item',
table: 'table',
tableRow: 'table-row',
tableCell: 'table-cell',
thematicBreak: 'thematic-break',
link: 'link',
image: 'image',
shortcode: 'shortcode',
};
/**
* Map of MDAST node types to Slate mark types.
*/
const markMap = {
strong: 'bold',
emphasis: 'italic',
delete: 'strikethrough',
inlineCode: 'code',
};
/**
* Create a Slate Inline node.
*/
function createBlock(type, nodes, props = {}) {
if (!isArray(nodes)) {
props = nodes;
nodes = undefined;
}
return { kind: 'block', type, nodes, ...props };
}
/**
* Create a Slate Block node.
*/
function createInline(type, nodes, props = {}) {
return { kind: 'inline', type, nodes, ...props };
}
/**
* Create a Slate Raw text node.
*/
function createText(value, data) {
const node = { kind: 'text', data };
if (isArray(value)) {
return { ...node, ranges: value };
}
return {...node, text: value };
}
function convertMarkNode(node, parentMarks = []) {
/**
* Add the current node's mark type to the marks collected from parent
* mark nodes, if any.
*/
const marks = [...parentMarks, { type: markMap[node.type] }];
/**
* Set an array to collect sections of text.
*/
const ranges = [];
node.children.forEach(childNode => {
/**
* If a text node is a direct child of the current node, it should be
* set aside as a range, and all marks that have been collected in the
* `marks` array should apply to that specific range.
*/
if (['html', 'text'].includes(childNode.type)) {
ranges.push({ text: childNode.value, marks });
return;
}
/**
* Any non-text child node should be processed as a parent node. The
* recursive results should be pushed into the ranges array. This way,
* every MDAST nested text structure becomes a flat array of ranges
* that can serve as the value of a single Slate Raw text node.
*/
const nestedRanges = convertMarkNode(childNode, marks);
ranges.push(...nestedRanges);
});
return ranges;
}
/**
* Convert a single MDAST node to a Slate Raw node. Uses local node factories
* that mimic the unist-builder function utilized in the slateRemark
* transformer.
*/
function convertNode(node, nodes) {
/**
* Unified/Remark processors use mutable operations, so we don't want to
* change a node's type directly for conversion purposes, as that tends to
* unexpected errors.
*/
const type = get(node, ['data', 'shortcode']) ? 'shortcode' : node.type;
switch (type) {
/**
* General
*
* Convert simple cases that only require a type and children, with no
* additional properties.
*/
case 'root':
case 'paragraph':
case 'listItem':
case 'blockquote':
case 'tableRow':
case 'tableCell': {
return createBlock(typeMap[type], nodes);
}
/**
* Shortcodes
*
* Shortcode nodes are represented as "void" blocks in the Slate AST. They
* maintain the same data as MDAST shortcode nodes. Slate void blocks must
* contain a blank text node.
*/
case 'shortcode': {
const { data } = node;
const nodes = [ createText('') ];
return createBlock(typeMap[type], nodes, { data, isVoid: true });
}
/**
* Text
*
* Text and HTML nodes are both used to render text, and should be treated
* the same. HTML is treated as text because we never want to escape or
* encode it.
*/
case 'text':
case 'html': {
return createText(node.value, node.data);
}
/**
* Inline Code
*
* Inline code nodes from an MDAST are represented in our Slate schema as
* text nodes with a "code" mark. We manually create the "range" containing
* the inline code value and a "code" mark, and place it in an array for use
* as a Slate text node's children array.
*/
case 'inlineCode': {
const range = {
text: node.value,
marks: [{ type: 'code' }],
};
return createText([ range ]);
}
/**
* Marks
*
* Marks are typically decorative sub-types that apply to text nodes. In an
* MDAST, marks are nodes that can contain other nodes. This nested
* hierarchy has to be flattened and split into distinct text nodes with
* their own set of marks.
*/
case 'strong':
case 'emphasis':
case 'delete': {
return createText(convertMarkNode(node));
}
/**
* Headings
*
* MDAST headings use a single type with a separate "depth" property to
* indicate the heading level, while the Slate schema uses a separate node
* type for each heading level. Here we get the proper Slate node name based
* on the MDAST node depth.
*/
case 'heading': {
const depthMap = { 1: 'one', 2: 'two', 3: 'three', 4: 'four', 5: 'five', 6: 'six' };
const slateType = `heading-${depthMap[node.depth]}`;
return createBlock(slateType, nodes);
}
/**
* Code Blocks
*
* MDAST code blocks are a distinct node type with a simple text value. We
* convert that value into a nested child text node for Slate. We also carry
* over the "lang" data property if it's defined.
*/
case 'code': {
const data = { lang: node.lang };
const text = createText(node.value);
const nodes = [text];
return createBlock(typeMap[type], nodes, { data });
}
/**
* Lists
*
* MDAST has a single list type and an "ordered" property. We derive that
* information into the Slate schema's distinct list node types. We also
* include the "start" property, which indicates the number an ordered list
* starts at, if defined.
*/
case 'list': {
const slateType = node.ordered ? 'numbered-list' : 'bulleted-list';
const data = { start: node.start };
return createBlock(slateType, nodes, { data });
}
/**
* Thematic Breaks
*
* Thematic breaks are void nodes in the Slate schema.
*/
case 'thematicBreak': {
return createBlock(typeMap[type], { isVoid: true });
}
/**
* Links
*
* MDAST stores the link attributes directly on the node, while our Slate
* schema references them in the data object.
*/
case 'link': {
const { title, url } = node;
const data = { title, url };
return createInline(typeMap[type], nodes, { data });
}
/**
* Tables
*
* Tables are parsed separately because they may include an "align"
* property, which should be passed to the Slate node.
*/
case 'table': {
const data = { align: node.align };
return createBlock(typeMap[type], nodes, { data });
}
}
}
/**
* A Remark plugin for converting an MDAST to Slate Raw AST. Remark plugins
* return a `transform` function that receives the MDAST as it's first argument.
*/
export default function remarkToSlatePlugin() {
function transform(node) {
/**
* Call `transform` recursively on child nodes.
*
* If a node returns a falsey value, filter it out. Some nodes do not
* translate from MDAST to Slate, such as definitions for link/image
* references or footnotes.
*/
const children = !isEmpty(node.children) && node.children.map(transform).filter(val => val);
/**
* Run individual nodes through the conversion factory.
*/
return convertNode(node, children);
}
return transform;
}

View File

@ -0,0 +1,65 @@
import { without } from 'lodash';
import u from 'unist-builder';
import mdastDefinitions from 'mdast-util-definitions';
/**
* Raw markdown may contain image references or link references. Because there
* is no way to maintain these references within the Slate AST, we convert image
* and link references to standard images and links by putting their url's
* inline. The definitions are then removed from the document.
*
* For example, the following markdown:
*
* ```
* ![alpha][bravo]
*
* [bravo]: http://example.com/example.jpg
* ```
*
* Yields:
*
* ```
* ![alpha][http://example.com/example.jpg]
* ```
*
*/
export default function remarkSquashReferences() {
return getTransform;
function getTransform(node) {
const getDefinition = mdastDefinitions(node);
return transform.call(null, getDefinition, node);
}
function transform(getDefinition, node) {
/**
* Bind the `getDefinition` function to `transform` and recursively map all
* nodes.
*/
const boundTransform = transform.bind(null, getDefinition);
const children = node.children ? node.children.map(boundTransform) : node.children;
/**
* Combine reference and definition nodes into standard image and link
* nodes.
*/
if (['imageReference', 'linkReference'].includes(node.type)) {
const type = node.type === 'imageReference' ? 'image' : 'link';
const { title, url } = getDefinition(node.identifier) || {};
return u(type, { title, url, alt: node.alt }, children);
}
/**
* Remove definition nodes and filter the resulting null values from the
* filtered children array.
*/
if(node.type === 'definition') {
return null;
}
const filteredChildren = without(children, null);
return { ...node, children: filteredChildren };
}
}

View File

@ -0,0 +1,21 @@
import u from 'unist-builder';
/**
* Ensure that top level 'html' type nodes are wrapped in paragraphs. Html nodes
* are used for text nodes that we don't want Remark or Rehype to parse.
*/
export default function remarkWrapHtml() {
function transform(tree) {
tree.children = tree.children.map(node => {
if (node.type === 'html') {
return u('paragraph', [node]);
}
return node;
});
return tree;
}
return transform;
}

View File

@ -0,0 +1,330 @@
import { get, isEmpty, concat, without, flatten } from 'lodash';
import u from 'unist-builder';
/**
* Map of Slate node types to MDAST/Remark node types.
*/
const typeMap = {
'root': 'root',
'paragraph': 'paragraph',
'heading-one': 'heading',
'heading-two': 'heading',
'heading-three': 'heading',
'heading-four': 'heading',
'heading-five': 'heading',
'heading-six': 'heading',
'quote': 'blockquote',
'code': 'code',
'numbered-list': 'list',
'bulleted-list': 'list',
'list-item': 'listItem',
'table': 'table',
'table-row': 'tableRow',
'table-cell': 'tableCell',
'thematic-break': 'thematicBreak',
'link': 'link',
'image': 'image',
};
/**
* Map of Slate mark types to MDAST/Remark node types.
*/
const markMap = {
bold: 'strong',
italic: 'emphasis',
strikethrough: 'delete',
code: 'inlineCode',
};
/**
* Slate treats inline code decoration as a standard mark, but MDAST does
* not allow inline code nodes to contain children, only a single text
* value. An MDAST inline code node can be nested within mark nodes such
* as "emphasis" and "strong", but it cannot contain them.
*
* Because of this, if a "code" mark (translated to MDAST "inlineCode") is
* in the markTypes array, we make the base text node an "inlineCode" type
* instead of a standard text node.
*/
function processCodeMark(markTypes) {
const isInlineCode = markTypes.includes('inlineCode');
const filteredMarkTypes = isInlineCode ? without(markTypes, 'inlineCode') : markTypes;
const textNodeType = isInlineCode ? 'inlineCode' : 'html';
return { filteredMarkTypes, textNodeType };
}
/**
* Wraps a text node in one or more mark nodes by placing the text node in an
* array and using that as the `children` value of a mark node. The resulting
* mark node is then placed in an array and used as the child of a mark node for
* the next mark type in `markTypes`. This continues for each member of
* `markTypes`. If `markTypes` is empty, the original text node is returned.
*/
function wrapTextWithMarks(textNode, markTypes) {
const wrapTextWithMark = (childNode, markType) => u(markType, [childNode]);
return markTypes.reduce(wrapTextWithMark, textNode);
}
/**
* Converts a Slate Raw text node to an MDAST text node.
*
* Slate text nodes without marks often simply have a "text" property with
* the value. In this case the conversion to MDAST is simple. If a Slate
* text node does not have a "text" property, it will instead have a
* "ranges" property containing an array of objects, each with an array of
* marks, such as "bold" or "italic", along with a "text" property.
*
* MDAST instead expresses such marks in a nested structure, with individual
* nodes for each mark type nested until the deepest mark node, which will
* contain the text node.
*
* To convert a Slate text node's marks to MDAST, we treat each "range" as a
* separate text node, convert the text node itself to an MDAST text node,
* and then recursively wrap the text node for each mark, collecting the results
* of each range in a single array of child nodes.
*
* For example, this Slate text node:
*
* {
* kind: 'text',
* ranges: [
* {
* text: 'test',
* marks: ['bold', 'italic']
* },
* {
* text: 'test two'
* }
* ]
* }
*
* ...would be converted to this MDAST nested structure:
*
* [
* {
* type: 'strong',
* children: [{
* type: 'emphasis',
* children: [{
* type: 'text',
* value: 'test'
* }]
* }]
* },
* {
* type: 'text',
* value: 'test two'
* }
* ]
*
* This example also demonstrates how a single Slate node may need to be
* replaced with multiple MDAST nodes, so the resulting array must be flattened.
*/
function convertTextNode(node) {
/**
* If the Slate text node has no "ranges" property, just return an equivalent
* MDAST node.
*/
if (!node.ranges) {
return u('html', node.text);
}
/**
* If there is no "text" property, convert the text range(s) to an array of
* one or more nested MDAST nodes.
*/
const textNodes = node.ranges.map(range => {
/**
* Get an array of the mark types, converted to their MDAST equivalent
* types.
*/
const { marks = [], text } = range;
const markTypes = marks.map(mark => markMap[mark.type]);
/**
* Code marks must be removed from the marks array, and the presence of a
* code mark changes the text node type that should be used.
*/
const { filteredMarkTypes, textNodeType } = processCodeMark(markTypes);
/**
* Create the base text node.
*/
const textNode = u(textNodeType, text);
/**
* Recursively wrap the base text node in the individual mark nodes, if
* any exist.
*/
return wrapTextWithMarks(textNode, filteredMarkTypes);
});
/**
* Since each range will be mapped into an array, we flatten the result to
* return a single array of all nodes.
*/
return flatten(textNodes);
}
/**
* Convert a single Slate Raw node to an MDAST node. Uses the unist-builder `u`
* function to create MDAST nodes and parses shortcodes.
*/
function convertNode(node, children, shortcodePlugins) {
switch (node.type) {
/**
* General
*
* Convert simple cases that only require a type and children, with no
* additional properties.
*/
case 'root':
case 'paragraph':
case 'quote':
case 'list-item':
case 'table':
case 'table-row':
case 'table-cell': {
return u(typeMap[node.type], children);
}
/**
* Shortcodes
*
* Shortcode nodes only exist in Slate's Raw AST if they were inserted
* via the plugin toolbar in memory, so they should always have
* shortcode data attached. The "shortcode" data property contains the
* name of the registered shortcode plugin, and the "shortcodeData" data
* property contains the data received from the shortcode plugin's
* `fromBlock` method when the shortcode node was created.
*
* Here we get the shortcode plugin from the registry and use it's
* `toBlock` method to recreate the original markdown shortcode. We then
* insert that text into a new "html" type node (a "text" type node
* might get encoded or escaped by remark-stringify). Finally, we wrap
* the "html" node in a "paragraph" type node, as shortcode nodes must
* be alone in their own paragraph.
*/
case 'shortcode': {
const { data } = node;
const plugin = shortcodePlugins.get(data.shortcode);
const text = plugin.toBlock(data.shortcodeData);
const textNode = u('html', text);
return u('paragraph', { data }, [ textNode ]);
}
/**
* Headings
*
* Slate schemas don't usually infer basic type info from data, so each
* level of heading is a separately named type. The MDAST schema just
* has a single "heading" type with the depth stored in a "depth"
* property on the node. Here we derive the depth from the Slate node
* type - e.g., for "heading-two", we need a depth value of "2".
*/
case 'heading-one':
case 'heading-two':
case 'heading-three':
case 'heading-four':
case 'heading-five':
case 'heading-six': {
const depthMap = { one: 1, two: 2, three: 3, four: 4, five: 5, six: 6 };
const depthText = node.type.split('-')[1];
const depth = depthMap[depthText];
return u(typeMap[node.type], { depth }, children);
}
/**
* Code Blocks
*
* Code block nodes have a single text child, and may have a code language
* stored in the "lang" data property. Here we transfer both the node
* value and the "lang" data property to the new MDAST node.
*/
case 'code': {
const value = get(node.nodes, [0, 'text']);
const lang = get(node.data, 'lang');
return u(typeMap[node.type], { lang }, value);
}
/**
* Lists
*
* Our Slate schema has separate node types for ordered and unordered
* lists, but the MDAST spec uses a single type with a boolean "ordered"
* property to indicate whether the list is numbered. The MDAST spec also
* allows for a "start" property to indicate the first number used for an
* ordered list. Here we translate both values to our Slate schema.
*/
case 'numbered-list':
case 'bulleted-list': {
const ordered = node.type === 'numbered-list';
const props = { ordered, start: get(node.data, 'start') || 1 };
return u(typeMap[node.type], props, children);
}
/**
* Thematic Breaks
*
* Thematic breaks don't have children. We parse them separately for
* clarity.
*/
case 'thematic-break': {
return u(typeMap[node.type]);
}
/**
* Links
*
* The url and title attributes of link nodes are stored in properties on
* the node for both Slate and Remark schemas.
*/
case 'link': {
const { url, title } = get(node, 'data', {});
return u(typeMap[node.type], { url, title }, children);
}
/**
* No default case is supplied because an unhandled case should never
* occur. In the event that it does, let the error throw (for now).
*/
}
}
export default function slateToRemark(raw, { shortcodePlugins }) {
/**
* The transform function mimics the approach of a Remark plugin for
* conformity with the other serialization functions. This function converts
* Slate nodes to MDAST nodes, and recursively calls itself to process child
* nodes to arbitrary depth.
*/
function transform(node) {
/**
* Call `transform` recursively on child nodes, and flatten the resulting
* array.
*/
const children = !isEmpty(node.nodes) && flatten(node.nodes.map(transform));
/**
* Run individual nodes through conversion factories.
*/
return node.kind === 'text' ? convertTextNode(node) : convertNode(node, children, shortcodePlugins);
}
/**
* The Slate Raw AST generally won't have a top level type, so we set it to
* "root" for clarity.
*/
raw.type = 'root';
const mdast = transform(raw);
return mdast;
}