fix rte pasted links with leading/trailing spaces
This commit is contained in:
@ -0,0 +1,45 @@
|
||||
import unified from 'unified';
|
||||
import markdownToRemark from 'remark-parse';
|
||||
import remarkToMarkdown from 'remark-stringify';
|
||||
import remarkPaddedLinks from '../remarkPaddedLinks';
|
||||
|
||||
const input = markdown =>
|
||||
unified()
|
||||
.use(markdownToRemark)
|
||||
.use(remarkPaddedLinks)
|
||||
.use(remarkToMarkdown)
|
||||
.processSync(markdown)
|
||||
.contents;
|
||||
|
||||
const output = markdown =>
|
||||
unified()
|
||||
.use(markdownToRemark)
|
||||
.use(remarkToMarkdown)
|
||||
.processSync(markdown)
|
||||
.contents;
|
||||
|
||||
describe('remarkPaddedLinks', () => {
|
||||
it('should move leading and trailing spaces outside of a link', () => {
|
||||
expect(input('[ a ](b)')).toEqual(output(' [a](b) '));
|
||||
});
|
||||
|
||||
it('should convert multiple leading or trailing spaces to a single space', () => {
|
||||
expect(input('[ a ](b)')).toEqual(output(' [a](b) '));
|
||||
});
|
||||
|
||||
it('should work with only a leading space or only a trailing space', () => {
|
||||
expect(input('[ a](b)[c ](d)')).toEqual(output(' [a](b)[c](d) '));
|
||||
});
|
||||
|
||||
it('should work for nested links', () => {
|
||||
expect(input('* # a[ b ](c)d')).toEqual(output('* # a [b](c) d'));
|
||||
});
|
||||
|
||||
it('should work for parents with multiple links that are not siblings', () => {
|
||||
expect(input('# a[ b ](c)d **[ e ](f)**')).toEqual(output('# a [b](c) d ** [e](f) **'));
|
||||
});
|
||||
|
||||
it('should work for links with arbitrarily nested children', () => {
|
||||
expect(input('[ a __*b*__ _c_ ](d)')).toEqual(output(' [a __*b*__ _c_](d) '));
|
||||
});
|
||||
});
|
@ -10,6 +10,7 @@ import rehypeToRemark from 'rehype-remark';
|
||||
import remarkToRehypeShortcodes from './remarkRehypeShortcodes';
|
||||
import rehypePaperEmoji from './rehypePaperEmoji';
|
||||
import remarkAssertParents from './remarkAssertParents';
|
||||
import remarkPaddedLinks from './remarkPaddedLinks';
|
||||
import remarkWrapHtml from './remarkWrapHtml';
|
||||
import remarkToSlatePlugin from './remarkSlate';
|
||||
import remarkSquashReferences from './remarkSquashReferences';
|
||||
@ -205,6 +206,7 @@ export const htmlToSlate = html => {
|
||||
|
||||
const slateRaw = unified()
|
||||
.use(remarkAssertParents)
|
||||
.use(remarkPaddedLinks)
|
||||
.use(remarkImagesToText)
|
||||
.use(remarkShortcodes, { plugins: registry.getEditorComponents() })
|
||||
.use(remarkWrapHtml)
|
||||
|
120
src/components/Widgets/Markdown/serializers/remarkPaddedLinks.js
Normal file
120
src/components/Widgets/Markdown/serializers/remarkPaddedLinks.js
Normal file
@ -0,0 +1,120 @@
|
||||
import {
|
||||
get,
|
||||
set,
|
||||
find,
|
||||
findLast,
|
||||
startsWith,
|
||||
endsWith,
|
||||
trimStart,
|
||||
trimEnd,
|
||||
concat,
|
||||
flatMap
|
||||
} from 'lodash';
|
||||
import u from 'unist-builder';
|
||||
import toString from 'mdast-util-to-string';
|
||||
|
||||
/**
|
||||
* Convert leading and trailing spaces in a link to single spaces outside of the
|
||||
* link. MDASTs derived from pasted Google Docs HTML require this treatment.
|
||||
*
|
||||
* Note that, because we're potentially replacing characters in a link node's
|
||||
* children with character's in a link node's siblings, we have to operate on a
|
||||
* parent (link) node and its children at once, rather than just processing
|
||||
* children one at a time.
|
||||
*/
|
||||
export default function remarkPaddedLinks() {
|
||||
|
||||
function transform(node) {
|
||||
|
||||
/**
|
||||
* Because we're operating on link nodes and their children at once, we can
|
||||
* exit if the current node has no children.
|
||||
*/
|
||||
if (!node.children) return node;
|
||||
|
||||
/**
|
||||
* Process a node's children if any of them are links. If a node is a link
|
||||
* with leading or trailing spaces, we'll get back an array of nodes instead
|
||||
* of a single node, so we use `flatMap` to keep those nodes as siblings
|
||||
* with the other children.
|
||||
*
|
||||
* If performance improvements are found desirable, we could change this to
|
||||
* only pass in the link nodes instead of the entire array of children, but
|
||||
* this seems unlikely to produce a noticeable perf gain.
|
||||
*/
|
||||
const hasLinkChild = node.children.some(child => child.type === 'link');
|
||||
const processedChildren = hasLinkChild ? flatMap(node.children, transformChildren) : node.children;
|
||||
|
||||
/**
|
||||
* Run all children through the transform recursively.
|
||||
*/
|
||||
const children = processedChildren.map(transform);
|
||||
|
||||
return { ...node, children };
|
||||
};
|
||||
|
||||
function transformChildren(node) {
|
||||
if (node.type !== 'link') return node;
|
||||
|
||||
/**
|
||||
* Get the node's complete string value, check for leading and trailing
|
||||
* whitespace, and get nodes from each edge where whitespace is found.
|
||||
*/
|
||||
const text = toString(node);
|
||||
const leadingWhitespaceNode = startsWith(text, ' ') && getEdgeTextChild(node);
|
||||
const trailingWhitespaceNode = endsWith(text, ' ') && getEdgeTextChild(node, true);
|
||||
|
||||
if (!leadingWhitespaceNode && !trailingWhitespaceNode) return node;
|
||||
|
||||
/**
|
||||
* Trim the edge nodes in place. Unified handles everything in a mutable
|
||||
* fashion, so it's often simpler to do the same when working with Unified
|
||||
* ASTs.
|
||||
*/
|
||||
if (leadingWhitespaceNode) {
|
||||
leadingWhitespaceNode.value = trimStart(leadingWhitespaceNode.value);
|
||||
}
|
||||
|
||||
if (trailingWhitespaceNode) {
|
||||
trailingWhitespaceNode.value = trimEnd(trailingWhitespaceNode.value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an array of nodes. The first and last child will either be `false`
|
||||
* or a text node. We filter out the false values before returning.
|
||||
*/
|
||||
const nodes = [
|
||||
leadingWhitespaceNode && u('text', ' '),
|
||||
node,
|
||||
trailingWhitespaceNode && u('text', ' ')
|
||||
];
|
||||
|
||||
return nodes.filter(val => val);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the first or last non-blank text child of a node, regardless of
|
||||
* nesting. If `end` is truthy, get the last node, otherwise first.
|
||||
*/
|
||||
function getEdgeTextChild(node, end) {
|
||||
const findFn = end ? findLast : find;
|
||||
|
||||
let edgeChildWithValue;
|
||||
setEdgeChildWithValue(node);
|
||||
return edgeChildWithValue;
|
||||
|
||||
/**
|
||||
* searchChildren checks a node and all of it's children deeply to find a
|
||||
* non-blank text value. When the text node is found, we set it in an outside
|
||||
* variable, as it may be deep in the tree and therefore wouldn't be returned
|
||||
* by `find`/`findLast`.
|
||||
*/
|
||||
function setEdgeChildWithValue(child) {
|
||||
if (!edgeChildWithValue && child.value) {
|
||||
edgeChildWithValue = child;
|
||||
}
|
||||
findFn(child.children, setEdgeChildWithValue);
|
||||
}
|
||||
}
|
||||
return transform;
|
||||
}
|
Reference in New Issue
Block a user