escape markdown entities entered as literal text
This commit is contained in:
parent
ac8df98407
commit
8e805cabd8
@ -0,0 +1,33 @@
|
||||
import unified from 'unified';
|
||||
import u from 'unist-builder';
|
||||
import remarkEscapeMarkdownEntities from '../remarkEscapeMarkdownEntities';
|
||||
|
||||
const process = text => {
|
||||
const tree = u('root', [ u('text', text) ]);
|
||||
const escapedMdast = unified()
|
||||
.use(remarkEscapeMarkdownEntities)
|
||||
.runSync(tree);
|
||||
|
||||
return escapedMdast.children[0].value;
|
||||
};
|
||||
|
||||
describe('remarkEscapeMarkdownEntities', () => {
|
||||
it('should escape common markdown entities', () => {
|
||||
expect(process('*~`[_')).toEqual('\\*\\~\\`\\[\\_');
|
||||
});
|
||||
|
||||
it('should escape leading markdown entities', () => {
|
||||
expect(process('#')).toEqual('\\#');
|
||||
expect(process('-')).toEqual('\\-');
|
||||
});
|
||||
|
||||
it('should escape leading markdown entities preceded by whitespace', () => {
|
||||
expect(process('\n #')).toEqual('\\#');
|
||||
expect(process(' \n-')).toEqual('\\-');
|
||||
});
|
||||
|
||||
it('should not escape leading markdown entities preceded by non-whitespace characters', () => {
|
||||
expect(process('a# # b #')).toEqual('a# # b #');
|
||||
expect(process('a- - b -')).toEqual('a- - b -');
|
||||
});
|
||||
});
|
@ -16,6 +16,7 @@ import remarkToSlatePlugin from './remarkSlate';
|
||||
import remarkSquashReferences from './remarkSquashReferences';
|
||||
import remarkImagesToText from './remarkImagesToText';
|
||||
import remarkShortcodes from './remarkShortcodes';
|
||||
import remarkEscapeMarkdownEntities from './remarkEscapeMarkdownEntities'
|
||||
import slateToRemarkParser from './slateRemark';
|
||||
import registry from '../../../../lib/registry';
|
||||
|
||||
@ -164,10 +165,30 @@ export const remarkToMarkdown = obj => {
|
||||
*/
|
||||
const mdast = obj || u('root', [u('paragraph', [u('text', '')])]);
|
||||
|
||||
const remarkToMarkdownPluginOpts = {
|
||||
commonmark: true,
|
||||
fences: true,
|
||||
pedantic: true,
|
||||
listItemIndent: '1',
|
||||
|
||||
// Settings to emulate the defaults from the Prosemirror editor, not
|
||||
// necessarily optimal. Should eventually be configurable.
|
||||
bullet: '*',
|
||||
strong: '*',
|
||||
rule: '-',
|
||||
};
|
||||
|
||||
/**
|
||||
* Escape markdown entities found in text and html nodes within the MDAST.
|
||||
*/
|
||||
const escapedMdast = unified()
|
||||
.use(remarkEscapeMarkdownEntities)
|
||||
.runSync(mdast);
|
||||
|
||||
const markdown = unified()
|
||||
.use(remarkToMarkdownPlugin, { listItemIndent: '1', fences: true, pedantic: true, commonmark: true })
|
||||
.use(remarkToMarkdownPlugin, remarkToMarkdownPluginOpts)
|
||||
.use(remarkAllowAllText)
|
||||
.stringify(mdast);
|
||||
.stringify(escapedMdast);
|
||||
|
||||
return markdown;
|
||||
};
|
||||
|
@ -0,0 +1,56 @@
|
||||
/**
|
||||
* A Remark plugin for escaping markdown entities.
|
||||
*
|
||||
* When markdown entities are entered in raw markdown, they don't appear as
|
||||
* characters in the resulting AST; for example, dashes surrounding a piece of
|
||||
* text cause the text to be inserted in a special node type, but the asterisks
|
||||
* themselves aren't present as text. Therefore, we generally don't expect to
|
||||
* encounter markdown characters in text nodes.
|
||||
*
|
||||
* However, the CMS visual editor does not interpret markdown characters, and
|
||||
* users will expect these characters to be represented literally. In that case,
|
||||
* we need to escape them, otherwise they'll be interpreted during
|
||||
* stringification.
|
||||
*/
|
||||
export default function remarkEscapeMarkdownEntities() {
|
||||
/**
|
||||
* Escape all occurrences of '[', '*', '_', '`', and '~'.
|
||||
*/
|
||||
function escapeCommonChars(text) {
|
||||
return text.replace(/[\[*_`~]/g, '\\$&');
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs escapeCommonChars, and also escapes '#' and '-' when found at the
|
||||
* beginning of any node's first child node.
|
||||
*/
|
||||
function escapeAllChars(text) {
|
||||
const partiallyEscapedMarkdown = escapeCommonChars(text);
|
||||
return partiallyEscapedMarkdown.replace(/^\s*([#-])/, '$`\\$1');
|
||||
}
|
||||
|
||||
const transform = (node, index) => {
|
||||
const children = node.children && node.children.map(transform);
|
||||
|
||||
/**
|
||||
* Escape characters in text and html nodes only. We store a lot of normal
|
||||
* text in html nodes to keep Remark from escaping html entities.
|
||||
*/
|
||||
if (['text', 'html'].includes(node.type)) {
|
||||
|
||||
/**
|
||||
* Escape all characters if this is the first child node, otherwise only
|
||||
* common characters.
|
||||
*/
|
||||
const value = index === 0 ? escapeAllChars(node.value) : escapeCommonChars(node.value);
|
||||
return { ...node, value, children };
|
||||
}
|
||||
|
||||
/**
|
||||
* Always return nodes with recursively mapped children.
|
||||
*/
|
||||
return {...node, children };
|
||||
};
|
||||
|
||||
return transform;
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user