escape markdown entities entered as literal text

This commit is contained in:
Shawn Erquhart 2017-08-29 17:29:43 -04:00
parent ac8df98407
commit 8e805cabd8
3 changed files with 112 additions and 2 deletions

View File

@ -0,0 +1,33 @@
import unified from 'unified';
import u from 'unist-builder';
import remarkEscapeMarkdownEntities from '../remarkEscapeMarkdownEntities';
const process = text => {
const tree = u('root', [ u('text', text) ]);
const escapedMdast = unified()
.use(remarkEscapeMarkdownEntities)
.runSync(tree);
return escapedMdast.children[0].value;
};
describe('remarkEscapeMarkdownEntities', () => {
it('should escape common markdown entities', () => {
expect(process('*~`[_')).toEqual('\\*\\~\\`\\[\\_');
});
it('should escape leading markdown entities', () => {
expect(process('#')).toEqual('\\#');
expect(process('-')).toEqual('\\-');
});
it('should escape leading markdown entities preceded by whitespace', () => {
expect(process('\n #')).toEqual('\\#');
expect(process(' \n-')).toEqual('\\-');
});
it('should not escape leading markdown entities preceded by non-whitespace characters', () => {
expect(process('a# # b #')).toEqual('a# # b #');
expect(process('a- - b -')).toEqual('a- - b -');
});
});

View File

@ -16,6 +16,7 @@ import remarkToSlatePlugin from './remarkSlate';
import remarkSquashReferences from './remarkSquashReferences';
import remarkImagesToText from './remarkImagesToText';
import remarkShortcodes from './remarkShortcodes';
import remarkEscapeMarkdownEntities from './remarkEscapeMarkdownEntities'
import slateToRemarkParser from './slateRemark';
import registry from '../../../../lib/registry';
@ -164,10 +165,30 @@ export const remarkToMarkdown = obj => {
*/
const mdast = obj || u('root', [u('paragraph', [u('text', '')])]);
const remarkToMarkdownPluginOpts = {
commonmark: true,
fences: true,
pedantic: true,
listItemIndent: '1',
// Settings to emulate the defaults from the Prosemirror editor, not
// necessarily optimal. Should eventually be configurable.
bullet: '*',
strong: '*',
rule: '-',
};
/**
* Escape markdown entities found in text and html nodes within the MDAST.
*/
const escapedMdast = unified()
.use(remarkEscapeMarkdownEntities)
.runSync(mdast);
const markdown = unified()
.use(remarkToMarkdownPlugin, { listItemIndent: '1', fences: true, pedantic: true, commonmark: true })
.use(remarkToMarkdownPlugin, remarkToMarkdownPluginOpts)
.use(remarkAllowAllText)
.stringify(mdast);
.stringify(escapedMdast);
return markdown;
};

View File

@ -0,0 +1,56 @@
/**
* A Remark plugin for escaping markdown entities.
*
* When markdown entities are entered in raw markdown, they don't appear as
* characters in the resulting AST; for example, dashes surrounding a piece of
* text cause the text to be inserted in a special node type, but the asterisks
* themselves aren't present as text. Therefore, we generally don't expect to
* encounter markdown characters in text nodes.
*
* However, the CMS visual editor does not interpret markdown characters, and
* users will expect these characters to be represented literally. In that case,
* we need to escape them, otherwise they'll be interpreted during
* stringification.
*/
export default function remarkEscapeMarkdownEntities() {
/**
* Escape all occurrences of '[', '*', '_', '`', and '~'.
*/
function escapeCommonChars(text) {
return text.replace(/[\[*_`~]/g, '\\$&');
}
/**
* Runs escapeCommonChars, and also escapes '#' and '-' when found at the
* beginning of any node's first child node.
*/
function escapeAllChars(text) {
const partiallyEscapedMarkdown = escapeCommonChars(text);
return partiallyEscapedMarkdown.replace(/^\s*([#-])/, '$`\\$1');
}
const transform = (node, index) => {
const children = node.children && node.children.map(transform);
/**
* Escape characters in text and html nodes only. We store a lot of normal
* text in html nodes to keep Remark from escaping html entities.
*/
if (['text', 'html'].includes(node.type)) {
/**
* Escape all characters if this is the first child node, otherwise only
* common characters.
*/
const value = index === 0 ? escapeAllChars(node.value) : escapeCommonChars(node.value);
return { ...node, value, children };
}
/**
* Always return nodes with recursively mapped children.
*/
return {...node, children };
};
return transform;
}