fix rte pasted links with leading/trailing spaces

This commit is contained in:
Shawn Erquhart
2017-08-04 15:57:23 -04:00
parent 317a876891
commit 0ea62e0f9d
4 changed files with 190 additions and 357 deletions

View File

@ -0,0 +1,45 @@
import unified from 'unified';
import markdownToRemark from 'remark-parse';
import remarkToMarkdown from 'remark-stringify';
import remarkPaddedLinks from '../remarkPaddedLinks';
const input = markdown =>
unified()
.use(markdownToRemark)
.use(remarkPaddedLinks)
.use(remarkToMarkdown)
.processSync(markdown)
.contents;
const output = markdown =>
unified()
.use(markdownToRemark)
.use(remarkToMarkdown)
.processSync(markdown)
.contents;
describe('remarkPaddedLinks', () => {
it('should move leading and trailing spaces outside of a link', () => {
expect(input('[ a ](b)')).toEqual(output(' [a](b) '));
});
it('should convert multiple leading or trailing spaces to a single space', () => {
expect(input('[ a ](b)')).toEqual(output(' [a](b) '));
});
it('should work with only a leading space or only a trailing space', () => {
expect(input('[ a](b)[c ](d)')).toEqual(output(' [a](b)[c](d) '));
});
it('should work for nested links', () => {
expect(input('* # a[ b ](c)d')).toEqual(output('* # a [b](c) d'));
});
it('should work for parents with multiple links that are not siblings', () => {
expect(input('# a[ b ](c)d **[ e ](f)**')).toEqual(output('# a [b](c) d ** [e](f) **'));
});
it('should work for links with arbitrarily nested children', () => {
expect(input('[ a __*b*__ _c_ ](d)')).toEqual(output(' [a __*b*__ _c_](d) '));
});
});

View File

@ -10,6 +10,7 @@ import rehypeToRemark from 'rehype-remark';
import remarkToRehypeShortcodes from './remarkRehypeShortcodes';
import rehypePaperEmoji from './rehypePaperEmoji';
import remarkAssertParents from './remarkAssertParents';
import remarkPaddedLinks from './remarkPaddedLinks';
import remarkWrapHtml from './remarkWrapHtml';
import remarkToSlatePlugin from './remarkSlate';
import remarkSquashReferences from './remarkSquashReferences';
@ -205,6 +206,7 @@ export const htmlToSlate = html => {
const slateRaw = unified()
.use(remarkAssertParents)
.use(remarkPaddedLinks)
.use(remarkImagesToText)
.use(remarkShortcodes, { plugins: registry.getEditorComponents() })
.use(remarkWrapHtml)

View File

@ -0,0 +1,120 @@
import {
get,
set,
find,
findLast,
startsWith,
endsWith,
trimStart,
trimEnd,
concat,
flatMap
} from 'lodash';
import u from 'unist-builder';
import toString from 'mdast-util-to-string';
/**
* Convert leading and trailing spaces in a link to single spaces outside of the
* link. MDASTs derived from pasted Google Docs HTML require this treatment.
*
* Note that, because we're potentially replacing characters in a link node's
* children with character's in a link node's siblings, we have to operate on a
* parent (link) node and its children at once, rather than just processing
* children one at a time.
*/
export default function remarkPaddedLinks() {
function transform(node) {
/**
* Because we're operating on link nodes and their children at once, we can
* exit if the current node has no children.
*/
if (!node.children) return node;
/**
* Process a node's children if any of them are links. If a node is a link
* with leading or trailing spaces, we'll get back an array of nodes instead
* of a single node, so we use `flatMap` to keep those nodes as siblings
* with the other children.
*
* If performance improvements are found desirable, we could change this to
* only pass in the link nodes instead of the entire array of children, but
* this seems unlikely to produce a noticeable perf gain.
*/
const hasLinkChild = node.children.some(child => child.type === 'link');
const processedChildren = hasLinkChild ? flatMap(node.children, transformChildren) : node.children;
/**
* Run all children through the transform recursively.
*/
const children = processedChildren.map(transform);
return { ...node, children };
};
function transformChildren(node) {
if (node.type !== 'link') return node;
/**
* Get the node's complete string value, check for leading and trailing
* whitespace, and get nodes from each edge where whitespace is found.
*/
const text = toString(node);
const leadingWhitespaceNode = startsWith(text, ' ') && getEdgeTextChild(node);
const trailingWhitespaceNode = endsWith(text, ' ') && getEdgeTextChild(node, true);
if (!leadingWhitespaceNode && !trailingWhitespaceNode) return node;
/**
* Trim the edge nodes in place. Unified handles everything in a mutable
* fashion, so it's often simpler to do the same when working with Unified
* ASTs.
*/
if (leadingWhitespaceNode) {
leadingWhitespaceNode.value = trimStart(leadingWhitespaceNode.value);
}
if (trailingWhitespaceNode) {
trailingWhitespaceNode.value = trimEnd(trailingWhitespaceNode.value);
}
/**
* Create an array of nodes. The first and last child will either be `false`
* or a text node. We filter out the false values before returning.
*/
const nodes = [
leadingWhitespaceNode && u('text', ' '),
node,
trailingWhitespaceNode && u('text', ' ')
];
return nodes.filter(val => val);
}
/**
* Get the first or last non-blank text child of a node, regardless of
* nesting. If `end` is truthy, get the last node, otherwise first.
*/
function getEdgeTextChild(node, end) {
const findFn = end ? findLast : find;
let edgeChildWithValue;
setEdgeChildWithValue(node);
return edgeChildWithValue;
/**
* searchChildren checks a node and all of it's children deeply to find a
* non-blank text value. When the text node is found, we set it in an outside
* variable, as it may be deep in the tree and therefore wouldn't be returned
* by `find`/`findLast`.
*/
function setEdgeChildWithValue(child) {
if (!edgeChildWithValue && child.value) {
edgeChildWithValue = child;
}
findFn(child.children, setEdgeChildWithValue);
}
}
return transform;
}