From cf2b7be25f8f4824d6e7e3a130a892e95db6b378 Mon Sep 17 00:00:00 2001 From: Shawn Erquhart Date: Mon, 31 Jul 2017 16:41:40 -0400 Subject: [PATCH] refactor and document rte serializers --- .../Widgets/Markdown/serializers/index.js | 341 ++++++++++-------- .../serializers/rehype-remove-empty.js | 32 -- ...ype-paper-emoji.js => rehypePaperEmoji.js} | 0 .../serializers/remark-nested-list.js | 33 -- .../Markdown/serializers/remark-slate.js | 172 --------- ...mages-to-text.js => remarkImagesToText.js} | 0 ...hortcodes.js => remarkRehypeShortcodes.js} | 0 ...mark-shortcodes.js => remarkShortcodes.js} | 0 .../Markdown/serializers/remarkSlate.js | 293 +++++++++++++++ .../serializers/remarkSquashReferences.js | 65 ++++ .../Markdown/serializers/remarkWrapHtml.js | 21 ++ .../Markdown/serializers/slateRemark.js | 330 +++++++++++++++++ 12 files changed, 898 insertions(+), 389 deletions(-) delete mode 100644 src/components/Widgets/Markdown/serializers/rehype-remove-empty.js rename src/components/Widgets/Markdown/serializers/{rehype-paper-emoji.js => rehypePaperEmoji.js} (100%) delete mode 100644 src/components/Widgets/Markdown/serializers/remark-nested-list.js delete mode 100644 src/components/Widgets/Markdown/serializers/remark-slate.js rename src/components/Widgets/Markdown/serializers/{remark-images-to-text.js => remarkImagesToText.js} (100%) rename src/components/Widgets/Markdown/serializers/{remark-rehype-shortcodes.js => remarkRehypeShortcodes.js} (100%) rename src/components/Widgets/Markdown/serializers/{remark-shortcodes.js => remarkShortcodes.js} (100%) create mode 100644 src/components/Widgets/Markdown/serializers/remarkSlate.js create mode 100644 src/components/Widgets/Markdown/serializers/remarkSquashReferences.js create mode 100644 src/components/Widgets/Markdown/serializers/remarkWrapHtml.js create mode 100644 src/components/Widgets/Markdown/serializers/slateRemark.js diff --git a/src/components/Widgets/Markdown/serializers/index.js b/src/components/Widgets/Markdown/serializers/index.js index b2b596bc..c66fc890 100644 --- a/src/components/Widgets/Markdown/serializers/index.js +++ b/src/components/Widgets/Markdown/serializers/index.js @@ -1,4 +1,4 @@ -import { get, isEmpty, reduce } from 'lodash'; +import { get, isEmpty, reduce, pull } from 'lodash'; import unified from 'unified'; import u from 'unist-builder'; import markdownToRemarkPlugin from 'remark-parse'; @@ -7,51 +7,133 @@ import remarkToRehype from 'remark-rehype'; import rehypeToHtml from 'rehype-stringify'; import htmlToRehype from 'rehype-parse'; import rehypeToRemark from 'rehype-remark'; -import rehypeMinifyWhitespace from 'rehype-minify-whitespace'; -import remarkToRehypeShortcodes from './remark-rehype-shortcodes'; -import rehypeRemoveEmpty from './rehype-remove-empty'; -import rehypePaperEmoji from './rehype-paper-emoji'; -import remarkNestedList from './remark-nested-list'; -import remarkToSlatePlugin from './remark-slate'; -import remarkImagesToText from './remark-images-to-text'; -import remarkShortcodes from './remark-shortcodes'; +import remarkToRehypeShortcodes from './remarkRehypeShortcodes'; +import rehypePaperEmoji from './rehypePaperEmoji'; +import remarkWrapHtml from './remarkWrapHtml'; +import remarkToSlatePlugin from './remarkSlate'; +import remarkSquashReferences from './remarkSquashReferences'; +import remarkImagesToText from './remarkImagesToText'; +import remarkShortcodes from './remarkShortcodes'; +import slateToRemarkParser from './slateRemark'; import registry from '../../../../lib/registry'; -export const remarkToHtml = (mdast, getAsset) => { - const result = unified() - .use(remarkToRehypeShortcodes, { plugins: registry.getEditorComponents(), getAsset }) - .use(remarkToRehype, { allowDangerousHTML: true }) - .runSync(mdast); +/** + * This module contains all serializers for the Markdown widget. + * + * The value of a Markdown widget is transformed to various formats during + * editing, and these formats are referenced throughout serializer source + * documentation. Below is brief glossary of the formats used. + * + * - Markdown {string} + * The stringified Markdown value. The value of the field is persisted + * (stored) in this format, and the stringified value is also used when the + * editor is in "raw" Markdown mode. + * + * - MDAST {object} + * Also loosely referred to as "Remark". MDAST stands for MarkDown AST + * (Abstract Syntax Tree), and is an object representation of a Markdown + * document. Underneath, it's a Unist tree with a Markdown-specific schema. An + * MDAST is used as the source of truth for any Markdown field within the CMS + * once the Markdown string value is loaded. MDAST syntax is a part of the + * Unified ecosystem, and powers the Remark processor, so Remark plugins may + * be used. + * + * - HAST {object} + * Also loosely referred to as "Rehype". HAST, similar to MDAST, is an object + * representation of an HTML document. The field value takes this format + * temporarily before the document is stringified to HTML. + * + * - HTML {string} + * The field value is stringifed to HTML for preview purposes - the HTML value + * is never parsed, it is output only. + * + * - Slate Raw AST {object} + * Slate's Raw AST is a very simple and unopinionated object representation of + * a document in a Slate editor. We define our own Markdown-specific schema + * for serialization to/from Slate's Raw AST and MDAST. + * + * Overview of the Markdown widget serialization life cycle: + * + * - Entry Load + * When an entry is loaded, all Markdown widget values are serialized to + * MDAST within the entry draft. + * + * - Visual Editor Render + * When a Markdown widget using the visual editor renders, it converts the + * MDAST value from the entry draft to Slate's Raw AST, and renders that. + * + * - Visual Editor Update + * When the value of a Markdown field is changed in the visual editor, the + * resulting Slate Raw AST is converted back to MDAST, and the MDAST value is + * set as the new state of the field in the entry draft. + * + * - Visual Editor Paste + * When a value is pasted to the visual editor, the pasted value is checked + * for HTML data. If HTML is found, the value is deserialized to an HAST, then + * to MDAST, and finally to Slate's Raw AST. If no HTML is found, the plain + * text value of the paste is serialized to Slate's Raw AST via the Slate + * Plain serializer. The deserialized fragment is then inserted to the Slate + * document. + * + * - Raw Editor Render + * When a Markdown widget using the raw editor (Markdown switch activated), + * it stringifies the MDAST from the entry draft to Markdown, and runs the + * stringified Markdown through Slate's Plain serializer, which outputs a + * Slate Raw AST of the plain text, which is then rendered in the editor. + * + * - Raw Editor Update + * When the value of a Markdown field is changed in the raw editor, the + * resulting Slate Raw AST is stringified back to a string, and the string + * value is then parsed as Markdown into an MDAST. The MDAST value is + * set as the new state of the field in the entry draft. + * + * - Raw Editor Paste + * When a value is pasted to the raw editor, the text value of the paste is + * serialized to Slate's Raw AST via the Slate Plain serializer. The + * deserialized fragment is then inserted to the Slate document. + * + * - Preview Pane Render + * When the preview pane renders the value of a Markdown widget, it first + * converts the MDAST value to HAST, stringifies the HAST to HTML, and + * renders that. + * + * - Entry Persist (Save) + * On persist, the MDAST value in the entry draft is stringified back to + * a Markdown string for storage. + */ - const output = unified() - .use(rehypeToHtml, { allowDangerousHTML: true, allowDangerousCharacters: true }) - .stringify(result); - return output -} - -export const htmlToSlate = html => { - const hast = unified() - .use(htmlToRehype, { fragment: true }) - .parse(html); - - const result = unified() - .use(rehypeRemoveEmpty) - .use(rehypeMinifyWhitespace) - .use(rehypePaperEmoji) - .use(rehypeToRemark) - .use(remarkNestedList) - .use(remarkToSlatePlugin) - .runSync(hast); - - return result; -}; +/** + * Deserialize a Markdown string to an MDAST. + */ export const markdownToRemark = markdown => { + + /** + * Disabling tokenizers allows us to turn off features within the Remark + * parser. + */ + function disableTokenizers() { + + /** + * Turn off soft breaks until we can properly support them across both + * editors. + */ + pull(this.Parser.prototype.inlineMethods, 'break'); + } + + /** + * Parse the Markdown string input to an MDAST. + */ const parsed = unified() - .use(markdownToRemarkPlugin, { fences: true, pedantic: true, footnotes: true, commonmark: true }) + .use(markdownToRemarkPlugin, { fences: true, pedantic: true, commonmark: true }) + .use(disableTokenizers) .parse(markdown); + /** + * Further transform the MDAST with plugins. + */ const result = unified() + .use(remarkSquashReferences) .use(remarkImagesToText) .use(remarkShortcodes, { plugins: registry.getEditorComponents() }) .runSync(parsed); @@ -59,6 +141,10 @@ export const markdownToRemark = markdown => { return result; }; + +/** + * Serialize an MDAST to a Markdown string. + */ export const remarkToMarkdown = obj => { /** * Rewrite the remark-stringify text visitor to simply return the text value, @@ -71,133 +157,84 @@ export const remarkToMarkdown = obj => { visitors.text = node => node.value; }; + /** + * Provide an empty MDAST if no value is provided. + */ const mdast = obj || u('root', [u('paragraph', [u('text', '')])]); - const result = unified() + + const markdown = unified() .use(remarkToMarkdownPlugin, { listItemIndent: '1', fences: true, pedantic: true, commonmark: true }) .use(remarkAllowAllText) .stringify(mdast); - return result; + + return markdown; }; + +/** + * Convert an MDAST to an HTML string. + */ +export const remarkToHtml = (mdast, getAsset) => { + const hast = unified() + .use(remarkToRehypeShortcodes, { plugins: registry.getEditorComponents(), getAsset }) + .use(remarkToRehype, { allowDangerousHTML: true }) + .runSync(mdast); + + const html = unified() + .use(rehypeToHtml, { allowDangerousHTML: true, allowDangerousCharacters: true }) + .stringify(hast); + + return html; +} + + +/** + * Deserialize an HTML string to Slate's Raw AST. Currently used for HTML + * pastes. + */ +export const htmlToSlate = html => { + const hast = unified() + .use(htmlToRehype, { fragment: true }) + .parse(html); + + const mdast = unified() + .use(rehypePaperEmoji) + .use(rehypeToRemark) + .runSync(hast); + + const slateRaw = unified() + .use(remarkImagesToText) + .use(remarkShortcodes, { plugins: registry.getEditorComponents() }) + .use(remarkWrapHtml) + .use(remarkToSlatePlugin) + .runSync(mdast); + + return slateRaw; +}; + + +/** + * Convert an MDAST to Slate's Raw AST. + */ export const remarkToSlate = mdast => { const result = unified() + .use(remarkWrapHtml) .use(remarkToSlatePlugin) .runSync(mdast); return result; }; -export const slateToRemark = (raw, shortcodePlugins) => { - const typeMap = { - 'paragraph': 'paragraph', - 'heading-one': 'heading', - 'heading-two': 'heading', - 'heading-three': 'heading', - 'heading-four': 'heading', - 'heading-five': 'heading', - 'heading-six': 'heading', - 'quote': 'blockquote', - 'code': 'code', - 'numbered-list': 'list', - 'bulleted-list': 'list', - 'list-item': 'listItem', - 'table': 'table', - 'table-row': 'tableRow', - 'table-cell': 'tableCell', - 'thematic-break': 'thematicBreak', - 'link': 'link', - 'image': 'image', - }; - const markMap = { - bold: 'strong', - italic: 'emphasis', - strikethrough: 'delete', - code: 'inlineCode', - }; - const transform = node => { - const children = isEmpty(node.nodes) ? node.nodes : node.nodes.reduce((acc, childNode) => { - if (childNode.kind !== 'text') { - acc.push(transform(childNode)); - return acc; - } - if (childNode.ranges) { - childNode.ranges.forEach(range => { - const { marks = [], text } = range; - const markTypes = marks.map(mark => markMap[mark.type]); - if (markTypes.includes('inlineCode')) { - acc.push(u('inlineCode', text)); - } else { - const textNode = u('html', text); - const nestedText = !markTypes.length ? textNode : markTypes.reduce((acc, markType) => { - const nested = u(markType, [acc]); - return nested; - }, textNode); - acc.push(nestedText); - } - }); - } else { - acc.push(u('html', childNode.text)); - } - return acc; - }, []); - - if (node.type === 'root') { - return u('root', children); - } - - if (node.type === 'shortcode') { - const { data } = node; - const plugin = shortcodePlugins.get(data.shortcode); - const text = plugin.toBlock(data.shortcodeData); - const textNode = u('html', text); - return u('paragraph', { data }, [ textNode ]); - } - - if (node.type.startsWith('heading')) { - const depths = { one: 1, two: 2, three: 3, four: 4, five: 5, six: 6 }; - const depth = node.type.split('-')[1]; - const props = { depth: depths[depth] }; - return u(typeMap[node.type], props, children); - } - - if (['paragraph', 'quote', 'list-item', 'table', 'table-row', 'table-cell'].includes(node.type)) { - return u(typeMap[node.type], children); - } - - if (node.type === 'code') { - const value = get(node.nodes, [0, 'text']); - const props = { lang: get(node.data, 'lang') }; - return u(typeMap[node.type], props, value); - } - - if (['numbered-list', 'bulleted-list'].includes(node.type)) { - const ordered = node.type === 'numbered-list'; - const props = { ordered, start: get(node.data, 'start') || 1 }; - return u(typeMap[node.type], props, children); - } - - if (node.type === 'thematic-break') { - return u(typeMap[node.type]); - } - - if (node.type === 'link') { - const data = get(node, 'data', {}); - const { url, title } = data; - return u(typeMap[node.type], data, children); - } - - if (node.type === 'image') { - const data = get(node, 'data', {}); - const { url, title, alt } = data; - return u(typeMap[node.type], data); - } - } - raw.type = 'root'; - const mdast = transform(raw); - - const result = unified() - .use(remarkShortcodes, { plugins: registry.getEditorComponents() }) - .runSync(mdast); - - return result; +/** + * Convert a Slate Raw AST to MDAST. + * + * Requires shortcode plugins to parse shortcode nodes back to text. + * + * Note that Unified is not utilized for the conversion from Slate's Raw AST to + * MDAST. The conversion is manual because Unified can only operate on Unist + * trees. + */ +export const slateToRemark = (raw) => { + const mdast = slateToRemarkParser(raw, { shortcodePlugins: registry.getEditorComponents() }); + return mdast; }; diff --git a/src/components/Widgets/Markdown/serializers/rehype-remove-empty.js b/src/components/Widgets/Markdown/serializers/rehype-remove-empty.js deleted file mode 100644 index 4d59b6a5..00000000 --- a/src/components/Widgets/Markdown/serializers/rehype-remove-empty.js +++ /dev/null @@ -1,32 +0,0 @@ -import { find, capitalize } from 'lodash'; - -/** - * Remove empty nodes, including the top level parents of deeply nested empty nodes. - */ -export default function rehypeRemoveEmpty() { - const isVoidElement = node => ['img', 'hr', 'br'].includes(node.tagName); - const isNonEmptyLeaf = node => ['text', 'raw'].includes(node.type) && node.value; - const isShortcode = node => node.properties && node.properties[`data${capitalize(shortcodeAttributePrefix)}`]; - const isNonEmptyNode = node => { - return isVoidElement(node) - || isNonEmptyLeaf(node) - || isShortcode(node) - || find(node.children, isNonEmptyNode); - }; - - const transform = node => { - if (isVoidElement(node) || isNonEmptyLeaf(node) || isShortcode(node)) { - return node; - } - if (node.children) { - node.children = node.children.reduce((acc, childNode) => { - if (isVoidElement(childNode) || isNonEmptyLeaf(childNode) || isShortcode(node)) { - return acc.concat(childNode); - } - return find(childNode.children, isNonEmptyNode) ? acc.concat(transform(childNode)) : acc; - }, []); - } - return node; - }; - return transform; -} diff --git a/src/components/Widgets/Markdown/serializers/rehype-paper-emoji.js b/src/components/Widgets/Markdown/serializers/rehypePaperEmoji.js similarity index 100% rename from src/components/Widgets/Markdown/serializers/rehype-paper-emoji.js rename to src/components/Widgets/Markdown/serializers/rehypePaperEmoji.js diff --git a/src/components/Widgets/Markdown/serializers/remark-nested-list.js b/src/components/Widgets/Markdown/serializers/remark-nested-list.js deleted file mode 100644 index 930daeb7..00000000 --- a/src/components/Widgets/Markdown/serializers/remark-nested-list.js +++ /dev/null @@ -1,33 +0,0 @@ -/** - * If the first child of a list item is a list, include it in the previous list - * item. Otherwise it translates to markdown as having two bullets. When - * rehype-remark processes a list and finds children that are not list items, it - * wraps them in list items, which leads to the condition this plugin addresses. - * Dropbox Paper currently outputs this kind of HTML, which is invalid. We have - * a support issue open for it, and this plugin can potentially be removed when - * that's resolved. - */ - -export default function remarkNestedList() { - const transform = node => { - if (node.type === 'list' && node.children && node.children.length > 1) { - node.children = node.children.reduce((acc, childNode, index) => { - if (index && childNode.children && childNode.children[0].type === 'list') { - acc[acc.length - 1].children.push(transform(childNode.children.shift())) - if (childNode.children.length) { - acc.push(transform(childNode)); - } - } else { - acc.push(transform(childNode)); - } - return acc; - }, []); - return node; - } - if (node.children) { - node.children = node.children.map(transform); - } - return node; - }; - return transform; -} diff --git a/src/components/Widgets/Markdown/serializers/remark-slate.js b/src/components/Widgets/Markdown/serializers/remark-slate.js deleted file mode 100644 index f979916b..00000000 --- a/src/components/Widgets/Markdown/serializers/remark-slate.js +++ /dev/null @@ -1,172 +0,0 @@ -import { get, isEmpty } from 'lodash'; -import u from 'unist-builder'; -import mdastDefinitions from 'mdast-util-definitions'; -import modifyChildren from 'unist-util-modify-children'; - -export default function remarkToSlatePlugin() { - const typeMap = { - paragraph: 'paragraph', - blockquote: 'quote', - code: 'code', - listItem: 'list-item', - table: 'table', - tableRow: 'table-row', - tableCell: 'table-cell', - thematicBreak: 'thematic-break', - link: 'link', - image: 'image', - }; - const markMap = { - strong: 'bold', - emphasis: 'italic', - delete: 'strikethrough', - inlineCode: 'code', - }; - const toTextNode = (text, data) => ({ kind: 'text', text, data }); - const wrapText = (node, index, parent) => { - if (['text', 'html'].includes(node.type)) { - parent.children.splice(index, 1, u('paragraph', [node])); - } - }; - - let getDefinition; - const transform = (node, index, siblings, parent) => { - let nodes; - - if (node.type === 'root') { - // Create definition getter for link and image references - getDefinition = mdastDefinitions(node); - // Ensure top level text nodes are wrapped in paragraphs - modifyChildren(wrapText)(node); - } - - if (isEmpty(node.children)) { - nodes = node.children; - } else { - // If a node returns a falsey value, exclude it. Some nodes do not - // translate from MDAST to Slate, such as definitions for link/image - // references or footnotes. - // - // Consider using unist-util-remove instead for this. - nodes = node.children.reduce((acc, childNode, idx, sibs) => { - const transformed = transform(childNode, idx, sibs, node); - if (transformed) { - acc.push(transformed); - } - return acc; - }, []); - } - - if (node.type === 'root') { - return { nodes }; - } - - /** - * Convert MDAST shortcode nodes to Slate 'shortcode' type nodes. - */ - if (get(node, ['data', 'shortcode'])) { - const { data } = node; - const nodes = [ toTextNode('') ]; - return { kind: 'block', type: 'shortcode', data, isVoid: true, nodes }; - } - - // Process raw html as text, since it's valid markdown - if (['text', 'html'].includes(node.type)) { - return toTextNode(node.value, node.data); - } - - if (node.type === 'inlineCode') { - return { kind: 'text', ranges: [{ text: node.value, marks: [{ type: 'code' }] }] }; - } - - if (['strong', 'emphasis', 'delete'].includes(node.type)) { - const remarkToSlateMarks = (markNode, parentMarks = []) => { - const marks = [...parentMarks, { type: markMap[markNode.type] }]; - const ranges = []; - markNode.children.forEach(childNode => { - if (['html', 'text'].includes(childNode.type)) { - ranges.push({ text: childNode.value, marks }); - return; - } - const nestedRanges = remarkToSlateMarks(childNode, marks); - ranges.push(...nestedRanges); - }); - return ranges; - }; - - return { kind: 'text', ranges: remarkToSlateMarks(node) }; - } - - if (node.type === 'heading') { - const depths = { 1: 'one', 2: 'two', 3: 'three', 4: 'four', 5: 'five', 6: 'six' }; - return { kind: 'block', type: `heading-${depths[node.depth]}`, nodes }; - } - - if (['paragraph', 'blockquote', 'tableRow', 'tableCell'].includes(node.type)) { - return { kind: 'block', type: typeMap[node.type], nodes }; - } - - if (node.type === 'code') { - const data = { lang: node.lang }; - const text = toTextNode(node.value); - const nodes = [text]; - return { kind: 'block', type: typeMap[node.type], data, nodes }; - } - - if (node.type === 'list') { - const slateType = node.ordered ? 'numbered-list' : 'bulleted-list'; - const data = { start: node.start }; - return { kind: 'block', type: slateType, data, nodes }; - } - - if (node.type === 'listItem') { - const data = { checked: node.checked }; - return { kind: 'block', type: typeMap[node.type], data, nodes }; - } - - if (node.type === 'table') { - const data = { align: node.align }; - return { kind: 'block', type: typeMap[node.type], data, nodes }; - } - - if (node.type === 'thematicBreak') { - return { kind: 'block', type: typeMap[node.type], isVoid: true }; - } - - if (node.type === 'link') { - const { title, url } = node; - const data = { title, url }; - return { kind: 'inline', type: typeMap[node.type], data, nodes }; - } - - if (node.type === 'linkReference') { - const definition = getDefinition(node.identifier); - const data = {}; - if (definition) { - data.title = definition.title; - data.url = definition.url; - } - return { kind: 'inline', type: typeMap['link'], data, nodes }; - } - - if (node.type === 'image') { - const { title, url, alt } = node; - const data = { title, url, alt }; - return { kind: 'block', type: typeMap[node.type], data }; - } - - if (node.type === 'imageReference') { - const definition = getDefinition(node.identifier); - const data = {}; - if (definition) { - data.title = definition.title; - data.url = definition.url; - } - return { kind: 'block', type: typeMap['image'], data }; - } - }; - - // Since `transform` is used for recursive child mapping, ensure that only the - // first argument is supplied on the initial call. - return node => transform(node); -} diff --git a/src/components/Widgets/Markdown/serializers/remark-images-to-text.js b/src/components/Widgets/Markdown/serializers/remarkImagesToText.js similarity index 100% rename from src/components/Widgets/Markdown/serializers/remark-images-to-text.js rename to src/components/Widgets/Markdown/serializers/remarkImagesToText.js diff --git a/src/components/Widgets/Markdown/serializers/remark-rehype-shortcodes.js b/src/components/Widgets/Markdown/serializers/remarkRehypeShortcodes.js similarity index 100% rename from src/components/Widgets/Markdown/serializers/remark-rehype-shortcodes.js rename to src/components/Widgets/Markdown/serializers/remarkRehypeShortcodes.js diff --git a/src/components/Widgets/Markdown/serializers/remark-shortcodes.js b/src/components/Widgets/Markdown/serializers/remarkShortcodes.js similarity index 100% rename from src/components/Widgets/Markdown/serializers/remark-shortcodes.js rename to src/components/Widgets/Markdown/serializers/remarkShortcodes.js diff --git a/src/components/Widgets/Markdown/serializers/remarkSlate.js b/src/components/Widgets/Markdown/serializers/remarkSlate.js new file mode 100644 index 00000000..bc53ac66 --- /dev/null +++ b/src/components/Widgets/Markdown/serializers/remarkSlate.js @@ -0,0 +1,293 @@ +import { get, isEmpty, isArray } from 'lodash'; +import u from 'unist-builder'; +import modifyChildren from 'unist-util-modify-children'; + +/** + * Map of MDAST node types to Slate node types. + */ +const typeMap = { + root: 'root', + paragraph: 'paragraph', + blockquote: 'quote', + code: 'code', + listItem: 'list-item', + table: 'table', + tableRow: 'table-row', + tableCell: 'table-cell', + thematicBreak: 'thematic-break', + link: 'link', + image: 'image', + shortcode: 'shortcode', +}; + + +/** + * Map of MDAST node types to Slate mark types. + */ +const markMap = { + strong: 'bold', + emphasis: 'italic', + delete: 'strikethrough', + inlineCode: 'code', +}; + + +/** + * Create a Slate Inline node. + */ +function createBlock(type, nodes, props = {}) { + if (!isArray(nodes)) { + props = nodes; + nodes = undefined; + } + + return { kind: 'block', type, nodes, ...props }; +} + + +/** + * Create a Slate Block node. + */ +function createInline(type, nodes, props = {}) { + return { kind: 'inline', type, nodes, ...props }; +} + + +/** + * Create a Slate Raw text node. + */ +function createText(value, data) { + const node = { kind: 'text', data }; + if (isArray(value)) { + return { ...node, ranges: value }; + } + return {...node, text: value }; +} + +function convertMarkNode(node, parentMarks = []) { + + /** + * Add the current node's mark type to the marks collected from parent + * mark nodes, if any. + */ + const marks = [...parentMarks, { type: markMap[node.type] }]; + + /** + * Set an array to collect sections of text. + */ + const ranges = []; + + node.children.forEach(childNode => { + + /** + * If a text node is a direct child of the current node, it should be + * set aside as a range, and all marks that have been collected in the + * `marks` array should apply to that specific range. + */ + if (['html', 'text'].includes(childNode.type)) { + ranges.push({ text: childNode.value, marks }); + return; + } + + /** + * Any non-text child node should be processed as a parent node. The + * recursive results should be pushed into the ranges array. This way, + * every MDAST nested text structure becomes a flat array of ranges + * that can serve as the value of a single Slate Raw text node. + */ + const nestedRanges = convertMarkNode(childNode, marks); + ranges.push(...nestedRanges); + }); + + return ranges; +} + +/** + * Convert a single MDAST node to a Slate Raw node. Uses local node factories + * that mimic the unist-builder function utilized in the slateRemark + * transformer. + */ +function convertNode(node, nodes) { + + /** + * Unified/Remark processors use mutable operations, so we don't want to + * change a node's type directly for conversion purposes, as that tends to + * unexpected errors. + */ + const type = get(node, ['data', 'shortcode']) ? 'shortcode' : node.type; + + switch (type) { + + /** + * General + * + * Convert simple cases that only require a type and children, with no + * additional properties. + */ + case 'root': + case 'paragraph': + case 'listItem': + case 'blockquote': + case 'tableRow': + case 'tableCell': { + return createBlock(typeMap[type], nodes); + } + + + /** + * Shortcodes + * + * Shortcode nodes are represented as "void" blocks in the Slate AST. They + * maintain the same data as MDAST shortcode nodes. Slate void blocks must + * contain a blank text node. + */ + case 'shortcode': { + const { data } = node; + const nodes = [ createText('') ]; + return createBlock(typeMap[type], nodes, { data, isVoid: true }); + } + + /** + * Text + * + * Text and HTML nodes are both used to render text, and should be treated + * the same. HTML is treated as text because we never want to escape or + * encode it. + */ + case 'text': + case 'html': { + return createText(node.value, node.data); + } + + /** + * Inline Code + * + * Inline code nodes from an MDAST are represented in our Slate schema as + * text nodes with a "code" mark. We manually create the "range" containing + * the inline code value and a "code" mark, and place it in an array for use + * as a Slate text node's children array. + */ + case 'inlineCode': { + const range = { + text: node.value, + marks: [{ type: 'code' }], + }; + return createText([ range ]); + } + + /** + * Marks + * + * Marks are typically decorative sub-types that apply to text nodes. In an + * MDAST, marks are nodes that can contain other nodes. This nested + * hierarchy has to be flattened and split into distinct text nodes with + * their own set of marks. + */ + case 'strong': + case 'emphasis': + case 'delete': { + return createText(convertMarkNode(node)); + } + + /** + * Headings + * + * MDAST headings use a single type with a separate "depth" property to + * indicate the heading level, while the Slate schema uses a separate node + * type for each heading level. Here we get the proper Slate node name based + * on the MDAST node depth. + */ + case 'heading': { + const depthMap = { 1: 'one', 2: 'two', 3: 'three', 4: 'four', 5: 'five', 6: 'six' }; + const slateType = `heading-${depthMap[node.depth]}`; + return createBlock(slateType, nodes); + } + + /** + * Code Blocks + * + * MDAST code blocks are a distinct node type with a simple text value. We + * convert that value into a nested child text node for Slate. We also carry + * over the "lang" data property if it's defined. + */ + case 'code': { + const data = { lang: node.lang }; + const text = createText(node.value); + const nodes = [text]; + return createBlock(typeMap[type], nodes, { data }); + } + + /** + * Lists + * + * MDAST has a single list type and an "ordered" property. We derive that + * information into the Slate schema's distinct list node types. We also + * include the "start" property, which indicates the number an ordered list + * starts at, if defined. + */ + case 'list': { + const slateType = node.ordered ? 'numbered-list' : 'bulleted-list'; + const data = { start: node.start }; + return createBlock(slateType, nodes, { data }); + } + + + /** + * Thematic Breaks + * + * Thematic breaks are void nodes in the Slate schema. + */ + case 'thematicBreak': { + return createBlock(typeMap[type], { isVoid: true }); + } + + /** + * Links + * + * MDAST stores the link attributes directly on the node, while our Slate + * schema references them in the data object. + */ + case 'link': { + const { title, url } = node; + const data = { title, url }; + return createInline(typeMap[type], nodes, { data }); + } + + /** + * Tables + * + * Tables are parsed separately because they may include an "align" + * property, which should be passed to the Slate node. + */ + case 'table': { + const data = { align: node.align }; + return createBlock(typeMap[type], nodes, { data }); + } + } +} + + +/** + * A Remark plugin for converting an MDAST to Slate Raw AST. Remark plugins + * return a `transform` function that receives the MDAST as it's first argument. + */ +export default function remarkToSlatePlugin() { + function transform(node) { + + /** + * Call `transform` recursively on child nodes. + * + * If a node returns a falsey value, filter it out. Some nodes do not + * translate from MDAST to Slate, such as definitions for link/image + * references or footnotes. + */ + const children = !isEmpty(node.children) && node.children.map(transform).filter(val => val); + + /** + * Run individual nodes through the conversion factory. + */ + return convertNode(node, children); + } + + return transform; +} diff --git a/src/components/Widgets/Markdown/serializers/remarkSquashReferences.js b/src/components/Widgets/Markdown/serializers/remarkSquashReferences.js new file mode 100644 index 00000000..53762255 --- /dev/null +++ b/src/components/Widgets/Markdown/serializers/remarkSquashReferences.js @@ -0,0 +1,65 @@ +import { without } from 'lodash'; +import u from 'unist-builder'; +import mdastDefinitions from 'mdast-util-definitions'; + +/** + * Raw markdown may contain image references or link references. Because there + * is no way to maintain these references within the Slate AST, we convert image + * and link references to standard images and links by putting their url's + * inline. The definitions are then removed from the document. + * + * For example, the following markdown: + * + * ``` + * ![alpha][bravo] + * + * [bravo]: http://example.com/example.jpg + * ``` + * + * Yields: + * + * ``` + * ![alpha][http://example.com/example.jpg] + * ``` + * + */ +export default function remarkSquashReferences() { + return getTransform; + + function getTransform(node) { + const getDefinition = mdastDefinitions(node); + return transform.call(null, getDefinition, node); + } + + function transform(getDefinition, node) { + + /** + * Bind the `getDefinition` function to `transform` and recursively map all + * nodes. + */ + const boundTransform = transform.bind(null, getDefinition); + const children = node.children ? node.children.map(boundTransform) : node.children; + + /** + * Combine reference and definition nodes into standard image and link + * nodes. + */ + if (['imageReference', 'linkReference'].includes(node.type)) { + const type = node.type === 'imageReference' ? 'image' : 'link'; + const { title, url } = getDefinition(node.identifier) || {}; + return u(type, { title, url, alt: node.alt }, children); + } + + /** + * Remove definition nodes and filter the resulting null values from the + * filtered children array. + */ + if(node.type === 'definition') { + return null; + } + + const filteredChildren = without(children, null); + + return { ...node, children: filteredChildren }; + } +} diff --git a/src/components/Widgets/Markdown/serializers/remarkWrapHtml.js b/src/components/Widgets/Markdown/serializers/remarkWrapHtml.js new file mode 100644 index 00000000..baee06bb --- /dev/null +++ b/src/components/Widgets/Markdown/serializers/remarkWrapHtml.js @@ -0,0 +1,21 @@ +import u from 'unist-builder'; + +/** + * Ensure that top level 'html' type nodes are wrapped in paragraphs. Html nodes + * are used for text nodes that we don't want Remark or Rehype to parse. + */ +export default function remarkWrapHtml() { + + function transform(tree) { + tree.children = tree.children.map(node => { + if (node.type === 'html') { + return u('paragraph', [node]); + } + return node; + }); + + return tree; + } + + return transform; +} diff --git a/src/components/Widgets/Markdown/serializers/slateRemark.js b/src/components/Widgets/Markdown/serializers/slateRemark.js new file mode 100644 index 00000000..21853abc --- /dev/null +++ b/src/components/Widgets/Markdown/serializers/slateRemark.js @@ -0,0 +1,330 @@ +import { get, isEmpty, concat, without, flatten } from 'lodash'; +import u from 'unist-builder'; + +/** + * Map of Slate node types to MDAST/Remark node types. + */ +const typeMap = { + 'root': 'root', + 'paragraph': 'paragraph', + 'heading-one': 'heading', + 'heading-two': 'heading', + 'heading-three': 'heading', + 'heading-four': 'heading', + 'heading-five': 'heading', + 'heading-six': 'heading', + 'quote': 'blockquote', + 'code': 'code', + 'numbered-list': 'list', + 'bulleted-list': 'list', + 'list-item': 'listItem', + 'table': 'table', + 'table-row': 'tableRow', + 'table-cell': 'tableCell', + 'thematic-break': 'thematicBreak', + 'link': 'link', + 'image': 'image', +}; + + +/** + * Map of Slate mark types to MDAST/Remark node types. + */ +const markMap = { + bold: 'strong', + italic: 'emphasis', + strikethrough: 'delete', + code: 'inlineCode', +}; + + +/** + * Slate treats inline code decoration as a standard mark, but MDAST does + * not allow inline code nodes to contain children, only a single text + * value. An MDAST inline code node can be nested within mark nodes such + * as "emphasis" and "strong", but it cannot contain them. + * + * Because of this, if a "code" mark (translated to MDAST "inlineCode") is + * in the markTypes array, we make the base text node an "inlineCode" type + * instead of a standard text node. + */ +function processCodeMark(markTypes) { + const isInlineCode = markTypes.includes('inlineCode'); + const filteredMarkTypes = isInlineCode ? without(markTypes, 'inlineCode') : markTypes; + const textNodeType = isInlineCode ? 'inlineCode' : 'html'; + return { filteredMarkTypes, textNodeType }; +} + + +/** + * Wraps a text node in one or more mark nodes by placing the text node in an + * array and using that as the `children` value of a mark node. The resulting + * mark node is then placed in an array and used as the child of a mark node for + * the next mark type in `markTypes`. This continues for each member of + * `markTypes`. If `markTypes` is empty, the original text node is returned. + */ +function wrapTextWithMarks(textNode, markTypes) { + const wrapTextWithMark = (childNode, markType) => u(markType, [childNode]); + return markTypes.reduce(wrapTextWithMark, textNode); +} + +/** + * Converts a Slate Raw text node to an MDAST text node. + * + * Slate text nodes without marks often simply have a "text" property with + * the value. In this case the conversion to MDAST is simple. If a Slate + * text node does not have a "text" property, it will instead have a + * "ranges" property containing an array of objects, each with an array of + * marks, such as "bold" or "italic", along with a "text" property. + * + * MDAST instead expresses such marks in a nested structure, with individual + * nodes for each mark type nested until the deepest mark node, which will + * contain the text node. + * + * To convert a Slate text node's marks to MDAST, we treat each "range" as a + * separate text node, convert the text node itself to an MDAST text node, + * and then recursively wrap the text node for each mark, collecting the results + * of each range in a single array of child nodes. + * + * For example, this Slate text node: + * + * { + * kind: 'text', + * ranges: [ + * { + * text: 'test', + * marks: ['bold', 'italic'] + * }, + * { + * text: 'test two' + * } + * ] + * } + * + * ...would be converted to this MDAST nested structure: + * + * [ + * { + * type: 'strong', + * children: [{ + * type: 'emphasis', + * children: [{ + * type: 'text', + * value: 'test' + * }] + * }] + * }, + * { + * type: 'text', + * value: 'test two' + * } + * ] + * + * This example also demonstrates how a single Slate node may need to be + * replaced with multiple MDAST nodes, so the resulting array must be flattened. + */ +function convertTextNode(node) { + + /** + * If the Slate text node has no "ranges" property, just return an equivalent + * MDAST node. + */ + if (!node.ranges) { + return u('html', node.text); + } + + /** + * If there is no "text" property, convert the text range(s) to an array of + * one or more nested MDAST nodes. + */ + const textNodes = node.ranges.map(range => { + /** + * Get an array of the mark types, converted to their MDAST equivalent + * types. + */ + const { marks = [], text } = range; + const markTypes = marks.map(mark => markMap[mark.type]); + + /** + * Code marks must be removed from the marks array, and the presence of a + * code mark changes the text node type that should be used. + */ + const { filteredMarkTypes, textNodeType } = processCodeMark(markTypes); + + /** + * Create the base text node. + */ + const textNode = u(textNodeType, text); + + /** + * Recursively wrap the base text node in the individual mark nodes, if + * any exist. + */ + return wrapTextWithMarks(textNode, filteredMarkTypes); + }); + + /** + * Since each range will be mapped into an array, we flatten the result to + * return a single array of all nodes. + */ + return flatten(textNodes); +} + + +/** + * Convert a single Slate Raw node to an MDAST node. Uses the unist-builder `u` + * function to create MDAST nodes and parses shortcodes. + */ +function convertNode(node, children, shortcodePlugins) { + switch (node.type) { + + /** + * General + * + * Convert simple cases that only require a type and children, with no + * additional properties. + */ + case 'root': + case 'paragraph': + case 'quote': + case 'list-item': + case 'table': + case 'table-row': + case 'table-cell': { + return u(typeMap[node.type], children); + } + + /** + * Shortcodes + * + * Shortcode nodes only exist in Slate's Raw AST if they were inserted + * via the plugin toolbar in memory, so they should always have + * shortcode data attached. The "shortcode" data property contains the + * name of the registered shortcode plugin, and the "shortcodeData" data + * property contains the data received from the shortcode plugin's + * `fromBlock` method when the shortcode node was created. + * + * Here we get the shortcode plugin from the registry and use it's + * `toBlock` method to recreate the original markdown shortcode. We then + * insert that text into a new "html" type node (a "text" type node + * might get encoded or escaped by remark-stringify). Finally, we wrap + * the "html" node in a "paragraph" type node, as shortcode nodes must + * be alone in their own paragraph. + */ + case 'shortcode': { + const { data } = node; + const plugin = shortcodePlugins.get(data.shortcode); + const text = plugin.toBlock(data.shortcodeData); + const textNode = u('html', text); + return u('paragraph', { data }, [ textNode ]); + } + + /** + * Headings + * + * Slate schemas don't usually infer basic type info from data, so each + * level of heading is a separately named type. The MDAST schema just + * has a single "heading" type with the depth stored in a "depth" + * property on the node. Here we derive the depth from the Slate node + * type - e.g., for "heading-two", we need a depth value of "2". + */ + case 'heading-one': + case 'heading-two': + case 'heading-three': + case 'heading-four': + case 'heading-five': + case 'heading-six': { + const depthMap = { one: 1, two: 2, three: 3, four: 4, five: 5, six: 6 }; + const depthText = node.type.split('-')[1]; + const depth = depthMap[depthText]; + return u(typeMap[node.type], { depth }, children); + } + + /** + * Code Blocks + * + * Code block nodes have a single text child, and may have a code language + * stored in the "lang" data property. Here we transfer both the node + * value and the "lang" data property to the new MDAST node. + */ + case 'code': { + const value = get(node.nodes, [0, 'text']); + const lang = get(node.data, 'lang'); + return u(typeMap[node.type], { lang }, value); + } + + /** + * Lists + * + * Our Slate schema has separate node types for ordered and unordered + * lists, but the MDAST spec uses a single type with a boolean "ordered" + * property to indicate whether the list is numbered. The MDAST spec also + * allows for a "start" property to indicate the first number used for an + * ordered list. Here we translate both values to our Slate schema. + */ + case 'numbered-list': + case 'bulleted-list': { + const ordered = node.type === 'numbered-list'; + const props = { ordered, start: get(node.data, 'start') || 1 }; + return u(typeMap[node.type], props, children); + } + + /** + * Thematic Breaks + * + * Thematic breaks don't have children. We parse them separately for + * clarity. + */ + case 'thematic-break': { + return u(typeMap[node.type]); + } + + /** + * Links + * + * The url and title attributes of link nodes are stored in properties on + * the node for both Slate and Remark schemas. + */ + case 'link': { + const { url, title } = get(node, 'data', {}); + return u(typeMap[node.type], { url, title }, children); + } + + /** + * No default case is supplied because an unhandled case should never + * occur. In the event that it does, let the error throw (for now). + */ + } +} + + +export default function slateToRemark(raw, { shortcodePlugins }) { + /** + * The transform function mimics the approach of a Remark plugin for + * conformity with the other serialization functions. This function converts + * Slate nodes to MDAST nodes, and recursively calls itself to process child + * nodes to arbitrary depth. + */ + function transform(node) { + + /** + * Call `transform` recursively on child nodes, and flatten the resulting + * array. + */ + const children = !isEmpty(node.nodes) && flatten(node.nodes.map(transform)); + + /** + * Run individual nodes through conversion factories. + */ + return node.kind === 'text' ? convertTextNode(node) : convertNode(node, children, shortcodePlugins); + } + + /** + * The Slate Raw AST generally won't have a top level type, so we set it to + * "root" for clarity. + */ + raw.type = 'root'; + + const mdast = transform(raw); + return mdast; +}