Merge pull request #567 from netlify/escape-markdown

escape markdown entities entered as literal text
2017-08-30 10:44:43 -04:00 · 2017-08-30 10:44:43 -04:00 · e43edf67b9
commit e43edf67b9
parent 5dfc0f0f24 8e805cabd8
3 changed files with 112 additions and 2 deletions
--- a/src/components/Widgets/Markdown/serializers/tests/remarkEscapeMarkdownEntities.spec.js
+++ b/src/components/Widgets/Markdown/serializers/tests/remarkEscapeMarkdownEntities.spec.js
@ -0,0 +1,33 @@
 import unified from 'unified';
 import u from 'unist-builder';
 import remarkEscapeMarkdownEntities from '../remarkEscapeMarkdownEntities';
 const process = text => {
  const tree = u('root', [ u('text', text) ]);
  const escapedMdast = unified()
    .use(remarkEscapeMarkdownEntities)
    .runSync(tree);
  return escapedMdast.children[0].value;
 };
 describe('remarkEscapeMarkdownEntities', () => {
  it('should escape common markdown entities', () => {
    expect(process('*~`[_')).toEqual('\\*\\~\\`\\[\\_');
  });
  it('should escape leading markdown entities', () => {
    expect(process('#')).toEqual('\\#');
    expect(process('-')).toEqual('\\-');
  });
  it('should escape leading markdown entities preceded by whitespace', () => {
    expect(process('\n #')).toEqual('\\#');
    expect(process(' \n-')).toEqual('\\-');
  });
  it('should not escape leading markdown entities preceded by non-whitespace characters', () => {
    expect(process('a# # b #')).toEqual('a# # b #');
    expect(process('a- - b -')).toEqual('a- - b -');
  });
 });
--- a/src/components/Widgets/Markdown/serializers/index.js
+++ b/src/components/Widgets/Markdown/serializers/index.js
@ -16,6 +16,7 @@ import remarkToSlatePlugin from './remarkSlate';
 import remarkSquashReferences from './remarkSquashReferences';
 import remarkImagesToText from './remarkImagesToText';
 import remarkShortcodes from './remarkShortcodes';
 import remarkEscapeMarkdownEntities from './remarkEscapeMarkdownEntities'
 import slateToRemarkParser from './slateRemark';
 import registry from '../../../../lib/registry';
@ -164,10 +165,30 @@ export const remarkToMarkdown = obj => {
   */
  const mdast = obj || u('root', [u('paragraph', [u('text', '')])]);
  const remarkToMarkdownPluginOpts = {
    commonmark: true,
    fences: true,
    pedantic: true,
    listItemIndent: '1',
    // Settings to emulate the defaults from the Prosemirror editor, not
    // necessarily optimal. Should eventually be configurable.
    bullet: '*',
    strong: '*',
    rule: '-',
  };
  /**
   * Escape markdown entities found in text and html nodes within the MDAST.
   */
  const escapedMdast = unified()
    .use(remarkEscapeMarkdownEntities)
    .runSync(mdast);
  const markdown = unified()
-    .use(remarkToMarkdownPlugin, { listItemIndent: '1', fences: true, pedantic: true, commonmark: true })
+    .use(remarkToMarkdownPlugin, remarkToMarkdownPluginOpts)
    .use(remarkAllowAllText)
-    .stringify(mdast);
+    .stringify(escapedMdast);
  return markdown;
 };
--- a/src/components/Widgets/Markdown/serializers/remarkEscapeMarkdownEntities.js
+++ b/src/components/Widgets/Markdown/serializers/remarkEscapeMarkdownEntities.js
@ -0,0 +1,56 @@
 /**
 * A Remark plugin for escaping markdown entities.
 *
 * When markdown entities are entered in raw markdown, they don't appear as
 * characters in the resulting AST; for example, dashes surrounding a piece of
 * text cause the text to be inserted in a special node type, but the asterisks
 * themselves aren't present as text. Therefore, we generally don't expect to
 * encounter markdown characters in text nodes.
 *
 * However, the CMS visual editor does not interpret markdown characters, and
 * users will expect these characters to be represented literally. In that case,
 * we need to escape them, otherwise they'll be interpreted during
 * stringification.
 */
 export default function remarkEscapeMarkdownEntities() {
  /**
   * Escape all occurrences of '[', '*', '_', '`', and '~'.
   */
  function escapeCommonChars(text) {
    return text.replace(/[\[*_`~]/g, '\\$&');
  }
  /**
   * Runs escapeCommonChars, and also escapes '#' and '-' when found at the
   * beginning of any node's first child node.
   */
  function escapeAllChars(text) {
    const partiallyEscapedMarkdown = escapeCommonChars(text);
    return partiallyEscapedMarkdown.replace(/^\s*([#-])/, '$`\\$1');
  }
  const transform = (node, index) => {
    const children = node.children && node.children.map(transform);
    /**
     * Escape characters in text and html nodes only. We store a lot of normal
     * text in html nodes to keep Remark from escaping html entities.
     */
    if (['text', 'html'].includes(node.type)) {
      /**
       * Escape all characters if this is the first child node, otherwise only
       * common characters.
       */
      const value = index === 0 ? escapeAllChars(node.value) : escapeCommonChars(node.value);
      return { ...node, value, children };
    }
    /**
     * Always return nodes with recursively mapped children.
     */
    return {...node, children };
  };
  return transform;
 }