diff --git a/package.json b/package.json index 38cf8b04..9d025019 100644 --- a/package.json +++ b/package.json @@ -159,6 +159,7 @@ "slug": "^0.9.1", "unified": "^6.1.4", "unist-builder": "^1.0.2", + "unist-util-visit-parents": "^1.1.1", "uuid": "^2.0.3", "whatwg-fetch": "^1.0.0" }, diff --git a/src/components/Widgets/Markdown/serializers/__tests__/remarkAssertParents.spec.js b/src/components/Widgets/Markdown/serializers/__tests__/remarkAssertParents.spec.js new file mode 100644 index 00000000..afccd2ed --- /dev/null +++ b/src/components/Widgets/Markdown/serializers/__tests__/remarkAssertParents.spec.js @@ -0,0 +1,204 @@ +import u from 'unist-builder'; +import remarkAssertParents from '../remarkAssertParents'; + +const transform = remarkAssertParents(); + +describe('remarkAssertParents', () => { + it('should unnest invalidly nested blocks', () => { + const input = u('root', [ + u('paragraph', [ + u('paragraph', [ u('text', 'Paragraph text.') ]), + u('heading', { depth: 1 }, [ u('text', 'Heading text.') ]), + u('code', 'someCode()'), + u('blockquote', [ u('text', 'Quote text.') ]), + u('list', [ u('listItem', [ u('text', 'A list item.') ]) ]), + u('table', [ u('tableRow', [ u('tableCell', [ u('text', 'Text in a table cell.') ]) ]) ]), + u('thematicBreak'), + ]), + ]); + + const output = u('root', [ + u('paragraph', [ u('text', 'Paragraph text.') ]), + u('heading', { depth: 1 }, [ u('text', 'Heading text.') ]), + u('code', 'someCode()'), + u('blockquote', [ u('text', 'Quote text.') ]), + u('list', [ u('listItem', [ u('text', 'A list item.') ]) ]), + u('table', [ u('tableRow', [ u('tableCell', [ u('text', 'Text in a table cell.') ]) ]) ]), + u('thematicBreak'), + ]); + + expect(transform(input)).toEqual(output); + }); + + it('should unnest deeply nested blocks', () => { + const input = u('root', [ + u('paragraph', [ + u('paragraph', [ + u('paragraph', [ + u('paragraph', [ u('text', 'Paragraph text.') ]), + u('heading', { depth: 1 }, [ u('text', 'Heading text.') ]), + u('code', 'someCode()'), + u('blockquote', [ + u('paragraph', [ + u('strong', [ + u('heading', [ + u('text', 'Quote text.'), + ]), + ]), + ]), + ]), + u('list', [ u('listItem', [ u('text', 'A list item.') ]) ]), + u('table', [ u('tableRow', [ u('tableCell', [ u('text', 'Text in a table cell.') ]) ]) ]), + u('thematicBreak'), + ]), + ]), + ]), + ]); + + const output = u('root', [ + u('paragraph', [ u('text', 'Paragraph text.') ]), + u('heading', { depth: 1 }, [ u('text', 'Heading text.') ]), + u('code', 'someCode()'), + u('blockquote', [ + u('heading', [ + u('text', 'Quote text.'), + ]), + ]), + u('list', [ u('listItem', [ u('text', 'A list item.') ]) ]), + u('table', [ u('tableRow', [ u('tableCell', [ u('text', 'Text in a table cell.') ]) ]) ]), + u('thematicBreak'), + ]); + + expect(transform(input)).toEqual(output); + }); + + it('should remove blocks that are emptied as a result of denesting', () => { + const input = u('root', [ + u('paragraph', [ + u('heading', { depth: 1 }, [ u('text', 'Heading text.') ]), + ]), + ]); + + const output = u('root', [ + u('heading', { depth: 1 }, [ u('text', 'Heading text.') ]), + ]); + + expect(transform(input)).toEqual(output); + }); + + it('should remove blocks that are emptied as a result of denesting', () => { + const input = u('root', [ + u('paragraph', [ + u('heading', { depth: 1 }, [ u('text', 'Heading text.') ]), + ]), + ]); + + const output = u('root', [ + u('heading', { depth: 1 }, [ u('text', 'Heading text.') ]), + ]); + + expect(transform(input)).toEqual(output); + }); + + it('should handle assymetrical splits', () => { + const input = u('root', [ + u('paragraph', [ + u('heading', { depth: 1 }, [ u('text', 'Heading text.') ]), + ]), + ]); + + const output = u('root', [ + u('heading', { depth: 1 }, [ u('text', 'Heading text.') ]), + ]); + + expect(transform(input)).toEqual(output); + }); + + it('should nest invalidly nested blocks in the nearest valid ancestor', () => { + const input = u('root', [ + u('paragraph', [ + u('blockquote', [ + u('strong', [ + u('heading', { depth: 1 }, [ u('text', 'Heading text.') ]), + ]), + ]), + ]), + ]); + + const output = u('root', [ + u('blockquote', [ + u('heading', { depth: 1 }, [ u('text', 'Heading text.') ]), + ]), + ]); + + expect(transform(input)).toEqual(output); + }); + + it('should preserve validly nested siblings of invalidly nested blocks', () => { + const input = u('root', [ + u('paragraph', [ + u('blockquote', [ + u('strong', [ + u('text', 'Deep validly nested text a.'), + u('heading', { depth: 1 }, [ u('text', 'Heading text.') ]), + u('text', 'Deep validly nested text b.'), + ]), + ]), + u('text', 'Validly nested text.'), + ]), + ]); + + const output = u('root', [ + u('blockquote', [ + u('strong', [ + u('text', 'Deep validly nested text a.'), + ]), + u('heading', { depth: 1 }, [ u('text', 'Heading text.') ]), + u('strong', [ + u('text', 'Deep validly nested text b.'), + ]), + ]), + u('paragraph', [ + u('text', 'Validly nested text.'), + ]), + ]); + + expect(transform(input)).toEqual(output); + }); + + it('should allow intermediate parents like list and table to contain required block children', () => { + const input = u('root', [ + u('blockquote', [ + u('list', [ + u('listItem', [ + u('table', [ + u('tableRow', [ + u('tableCell', [ + u('heading', { depth: 1 }, [ u('text', 'Validly nested heading text.') ]), + ]), + ]), + ]), + ]), + ]), + ]), + ]); + + const output = u('root', [ + u('blockquote', [ + u('list', [ + u('listItem', [ + u('table', [ + u('tableRow', [ + u('tableCell', [ + u('heading', { depth: 1 }, [ u('text', 'Validly nested heading text.') ]), + ]), + ]), + ]), + ]), + ]), + ]), + ]); + + expect(transform(input)).toEqual(output); + }); +}); diff --git a/src/components/Widgets/Markdown/serializers/index.js b/src/components/Widgets/Markdown/serializers/index.js index c66fc890..d7663450 100644 --- a/src/components/Widgets/Markdown/serializers/index.js +++ b/src/components/Widgets/Markdown/serializers/index.js @@ -9,6 +9,7 @@ import htmlToRehype from 'rehype-parse'; import rehypeToRemark from 'rehype-remark'; import remarkToRehypeShortcodes from './remarkRehypeShortcodes'; import rehypePaperEmoji from './rehypePaperEmoji'; +import remarkAssertParents from './remarkAssertParents'; import remarkWrapHtml from './remarkWrapHtml'; import remarkToSlatePlugin from './remarkSlate'; import remarkSquashReferences from './remarkSquashReferences'; @@ -199,10 +200,11 @@ export const htmlToSlate = html => { const mdast = unified() .use(rehypePaperEmoji) - .use(rehypeToRemark) + .use(rehypeToRemark, { minify: false }) .runSync(hast); const slateRaw = unified() + .use(remarkAssertParents) .use(remarkImagesToText) .use(remarkShortcodes, { plugins: registry.getEditorComponents() }) .use(remarkWrapHtml) diff --git a/src/components/Widgets/Markdown/serializers/remarkAssertParents.js b/src/components/Widgets/Markdown/serializers/remarkAssertParents.js new file mode 100644 index 00000000..afbc20b8 --- /dev/null +++ b/src/components/Widgets/Markdown/serializers/remarkAssertParents.js @@ -0,0 +1,83 @@ +import { concat, last, nth, isEmpty, set } from 'lodash'; +import visitParents from 'unist-util-visit-parents'; + +/** + * remarkUnwrapInvalidNest + * + * Some MDAST node types can only be nested within specific node types - for + * example, a paragraph can't be nested within another paragraph, and a heading + * can't be nested in a "strong" type node. This kind of invalid MDAST can be + * generated by rehype-remark from invalid HTML. + * + * This plugin finds instances of invalid nesting, and unwraps the invalidly + * nested nodes as far up the parental line as necessary, splitting parent nodes + * along the way. The resulting node has no invalidly nested nodes, and all + * validly nested nodes retain their ancestry. Nodes that are emptied as a + * result of unnesting nodes are removed from the tree. + */ +export default function remarkUnwrapInvalidNest() { + return transform; + + function transform(tree) { + const invalidNest = findInvalidNest(tree); + + if (!invalidNest) return tree; + + splitTreeAtNest(tree, invalidNest); + + return transform(tree); + } + + /** + * visitParents uses unist-util-visit-parent to check every node in the + * tree while having access to every ancestor of the node. This is ideal + * for determining whether a block node has an ancestor that should not + * contain a block node. Note that it operates in a mutable fashion. + */ + function findInvalidNest(tree) { + /** + * Node types that are considered "blocks". + */ + const blocks = ['paragraph', 'heading', 'code', 'blockquote', 'list', 'table', 'thematicBreak']; + + /** + * Node types that can contain "block" nodes as direct children. We check + */ + const canContainBlocks = ['root', 'blockquote', 'listItem', 'tableCell']; + + let invalidNest; + + visitParents(tree, (node, parents) => { + const parentType = !isEmpty(parents) && last(parents).type; + const isInvalidNest = blocks.includes(node.type) && !canContainBlocks.includes(parentType); + + if (isInvalidNest) { + invalidNest = concat(parents, node); + return false; + } + }); + + return invalidNest; + } + + function splitTreeAtNest(tree, nest) { + const grandparent = nth(nest, -3) || tree; + const parent = nth(nest, -2); + const node = last(nest); + + const splitIndex = grandparent.children.indexOf(parent); + const splitChildren = grandparent.children; + const splitChildIndex = parent.children.indexOf(node); + + const childrenBefore = parent.children.slice(0, splitChildIndex); + const childrenAfter = parent.children.slice(splitChildIndex + 1); + const nodeBefore = !isEmpty(childrenBefore) && { ...parent, children: childrenBefore }; + const nodeAfter = !isEmpty(childrenAfter) && { ...parent, children: childrenAfter }; + + const childrenToInsert = [nodeBefore, node, nodeAfter].filter(val => !isEmpty(val)); + const beforeChildren = splitChildren.slice(0, splitIndex); + const afterChildren = splitChildren.slice(splitIndex + 1); + const newChildren = concat(beforeChildren, childrenToInsert, afterChildren); + grandparent.children = newChildren; + } +} diff --git a/yarn.lock b/yarn.lock index 97a6a911..2bdc0ff8 100644 --- a/yarn.lock +++ b/yarn.lock @@ -9036,6 +9036,10 @@ unist-util-stringify-position@^1.0.0: version "1.1.1" resolved "https://registry.yarnpkg.com/unist-util-stringify-position/-/unist-util-stringify-position-1.1.1.tgz#3ccbdc53679eed6ecf3777dd7f5e3229c1b6aa3c" +unist-util-visit-parents@^1.1.1: + version "1.1.1" + resolved "https://registry.yarnpkg.com/unist-util-visit-parents/-/unist-util-visit-parents-1.1.1.tgz#7d3f56b5b039a3c6e2d16e51cc093f10e4755342" + unist-util-visit@^1.0.0, unist-util-visit@^1.1.0, unist-util-visit@^1.1.1: version "1.1.3" resolved "https://registry.yarnpkg.com/unist-util-visit/-/unist-util-visit-1.1.3.tgz#ec268e731b9d277a79a5b5aa0643990e405d600b"