diff --git a/src/components/Widgets/Markdown/MarkdownControl/RawEditor/index.js b/src/components/Widgets/Markdown/MarkdownControl/RawEditor/index.js index d3ccda2d..80b4295a 100644 --- a/src/components/Widgets/Markdown/MarkdownControl/RawEditor/index.js +++ b/src/components/Widgets/Markdown/MarkdownControl/RawEditor/index.js @@ -22,16 +22,14 @@ export default class RawEditor extends React.Component { this.setState({ editorState }); } - onChange = debounce(this.props.onChange, 250); - /** * When the document value changes, serialize from Slate's AST back to plain * text (which is Markdown) and pass that up as the new value. */ - handleDocumentChange = (doc, editorState) => { + handleDocumentChange = debounce((doc, editorState) => { const value = Plain.serialize(editorState); - this.onChange(value); - }; + this.props.onChange(value); + }, 150); /** * If a paste contains plain text, deserialize it to Slate's AST and insert diff --git a/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/__tests__/__snapshots__/parser.spec.js.snap b/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/__tests__/__snapshots__/parser.spec.js.snap index 341b42b3..81948733 100644 --- a/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/__tests__/__snapshots__/parser.spec.js.snap +++ b/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/__tests__/__snapshots__/parser.spec.js.snap @@ -443,7 +443,7 @@ become anything else.", Object { "data": undefined, "kind": "text", - "text": " is supported. This *can be ", + "text": " is supported. This ", }, Object { "data": undefined, @@ -451,18 +451,37 @@ become anything else.", "ranges": Array [ Object { "marks": Array [ + Object { + "type": "italic", + }, + ], + "text": "can be ", + }, + Object { + "marks": Array [ + Object { + "type": "italic", + }, Object { "type": "bold", }, ], "text": "nested", }, + Object { + "marks": Array [ + Object { + "type": "italic", + }, + ], + "text": " like", + }, ], }, Object { "data": undefined, "kind": "text", - "text": " like* so.", + "text": " so.", }, ], "type": "paragraph", @@ -1289,10 +1308,16 @@ Object { "text": "blue moon", }, Object { - "data": undefined, - "kind": "text", - "text": " + "kind": "inline", + "nodes": Array [ + Object { + "data": undefined, + "kind": "text", + "text": " ", + }, + ], + "type": "break", }, Object { "data": undefined, diff --git a/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/components.js b/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/components.js index 2f19dcc7..21c6421e 100644 --- a/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/components.js +++ b/src/components/Widgets/Markdown/MarkdownControl/VisualEditor/components.js @@ -1,4 +1,5 @@ import React from 'react'; +import { List } from 'immutable'; import cn from 'classnames'; import styles from './index.css'; @@ -35,9 +36,28 @@ export const NODE_COMPONENTS = {
*a*')).toEqual('
*a*'); + expect(process('')).toEqual(''); + expect(process('')).toEqual(''); + expect(process('
\n*a*\n')).toEqual('
\n*a*\n'); + expect(process('a b
*c*d e')).toEqual('a b
*c*d e'); + }); + + it('should not parse footnotes', () => { + expect(process('[^a]')).toEqual('\\[^a]'); + }); }); diff --git a/src/components/Widgets/Markdown/serializers/__tests__/remarkStripTrailingBreaks.spec.js b/src/components/Widgets/Markdown/serializers/__tests__/remarkStripTrailingBreaks.spec.js new file mode 100644 index 00000000..a7b0500f --- /dev/null +++ b/src/components/Widgets/Markdown/serializers/__tests__/remarkStripTrailingBreaks.spec.js @@ -0,0 +1,24 @@ +import unified from 'unified'; +import u from 'unist-builder'; +import remarkStripTrailingBreaks from '../remarkStripTrailingBreaks'; + +const process = children => { + const tree = u('root', children); + const strippedMdast = unified() + .use(remarkStripTrailingBreaks) + .runSync(tree); + + return strippedMdast.children; +}; + +describe('remarkStripTrailingBreaks', () => { + it('should remove trailing breaks at the end of a block', () => { + expect(process([u('break')])).toEqual([]); + expect(process([u('break'), u('text', '\n \n')])).toEqual([u('text', '\n \n')]); + expect(process([u('text', 'a'), u('break')])).toEqual([u('text', 'a')]); + }); + + it('should not remove trailing breaks that are not at the end of a block', () => { + expect(process([u('break'), u('text', 'a')])).toEqual([u('break'), u('text', 'a')]); + }); +}); diff --git a/src/components/Widgets/Markdown/serializers/__tests__/slate.spec.js b/src/components/Widgets/Markdown/serializers/__tests__/slate.spec.js new file mode 100644 index 00000000..fd3bda41 --- /dev/null +++ b/src/components/Widgets/Markdown/serializers/__tests__/slate.spec.js @@ -0,0 +1,40 @@ +import { flow } from 'lodash'; +import { markdownToSlate, slateToMarkdown } from '../index'; + +const process = flow([markdownToSlate, slateToMarkdown]); + +describe('slate', () => { + it('should distinguish between newlines and hard breaks', () => { + expect(process('a\n')).toEqual('a\n'); + }); + + it('should not decode encoded html entities in inline code', () => { + expect(process('
<div>
')).toEqual('<div>
\n');
+ });
+
+ it('should parse non-text children of mark nodes', () => {
+ expect(process('**a[b](c)d**')).toEqual('**a[b](c)d**\n');
+ expect(process('**[a](b)**')).toEqual('**[a](b)**\n');
+ expect(process('**data:image/s3,"s3://crabby-images/b6564/b656418ced6718c0a03139661b27479937ca95f8" alt="a"**')).toEqual('**data:image/s3,"s3://crabby-images/b6564/b656418ced6718c0a03139661b27479937ca95f8" alt="a"**\n');
+ expect(process('_`a`_')).toEqual('_`a`_\n');
+ expect(process('_`a`b_')).toEqual('_`a`b_\n');
+ });
+
+ it('should condense adjacent, identically styled text and inline nodes', () => {
+ expect(process('**a ~~b~~~~c~~**')).toEqual('**a ~~bc~~**\n');
+ expect(process('**a ~~b~~~~[c](d)~~**')).toEqual('**a ~~b[c](d)~~**\n');
+ });
+
+ it('should handle nested markdown entities', () => {
+ expect(process('**a**b**c**')).toEqual('**a**b**c**\n');
+ expect(process('**a _b_ c**')).toEqual('**a _b_ c**\n');
+ });
+
+ it('should parse inline images as images', () => {
+ expect(process('a data:image/s3,"s3://crabby-images/cf3dd/cf3ddd1a70028c583e18e50b432a34fb2a881ee7" alt="b"')).toEqual('a data:image/s3,"s3://crabby-images/cf3dd/cf3ddd1a70028c583e18e50b432a34fb2a881ee7" alt="b"\n');
+ });
+
+ it('should not escape markdown entities in html', () => {
+ expect(process('*')).toEqual('*\n');
+ });
+});
diff --git a/src/components/Widgets/Markdown/serializers/index.js b/src/components/Widgets/Markdown/serializers/index.js
index f3c11a64..5b00150d 100644
--- a/src/components/Widgets/Markdown/serializers/index.js
+++ b/src/components/Widgets/Markdown/serializers/index.js
@@ -16,7 +16,9 @@ import remarkToSlate from './remarkSlate';
import remarkSquashReferences from './remarkSquashReferences';
import remarkImagesToText from './remarkImagesToText';
import remarkShortcodes from './remarkShortcodes';
-import remarkEscapeMarkdownEntities from './remarkEscapeMarkdownEntities'
+import remarkEscapeMarkdownEntities from './remarkEscapeMarkdownEntities';
+import remarkStripTrailingBreaks from './remarkStripTrailingBreaks';
+import remarkAllowHtmlEntities from './remarkAllowHtmlEntities';
import slateToRemark from './slateRemark';
import registry from '../../../../lib/registry';
@@ -63,7 +65,9 @@ export const markdownToRemark = markdown => {
* Parse the Markdown string input to an MDAST.
*/
const parsed = unified()
- .use(markdownToRemarkPlugin, { fences: true, pedantic: true, commonmark: true })
+ .use(markdownToRemarkPlugin, { fences: true, commonmark: true })
+ .use(markdownToRemarkRemoveTokenizers, { inlineTokenizers: ['url'] })
+ .use(remarkAllowHtmlEntities)
.parse(markdown);
/**
@@ -79,6 +83,16 @@ export const markdownToRemark = markdown => {
};
+/**
+ * Remove named tokenizers from the parser, effectively deactivating them.
+ */
+function markdownToRemarkRemoveTokenizers({ inlineTokenizers }) {
+ inlineTokenizers && inlineTokenizers.forEach(tokenizer => {
+ delete this.Parser.prototype.inlineTokenizers[tokenizer];
+ });
+}
+
+
/**
* Serialize an MDAST to a Markdown string.
*/
@@ -102,7 +116,6 @@ export const remarkToMarkdown = obj => {
const remarkToMarkdownPluginOpts = {
commonmark: true,
fences: true,
- pedantic: true,
listItemIndent: '1',
// Settings to emulate the defaults from the Prosemirror editor, not
@@ -117,6 +130,7 @@ export const remarkToMarkdown = obj => {
*/
const escapedMdast = unified()
.use(remarkEscapeMarkdownEntities)
+ .use(remarkStripTrailingBreaks)
.runSync(mdast);
const markdown = unified()
diff --git a/src/components/Widgets/Markdown/serializers/remarkAllowHtmlEntities.js b/src/components/Widgets/Markdown/serializers/remarkAllowHtmlEntities.js
new file mode 100644
index 00000000..62e4d3be
--- /dev/null
+++ b/src/components/Widgets/Markdown/serializers/remarkAllowHtmlEntities.js
@@ -0,0 +1,59 @@
+export default function remarkAllowHtmlEntities() {
+ this.Parser.prototype.inlineTokenizers.text = text;
+
+ /**
+ * This is a port of the `remark-parse` text tokenizer, adapted to exclude
+ * HTML entity decoding.
+ */
+ function text(eat, value, silent) {
+ var self = this;
+ var methods;
+ var tokenizers;
+ var index;
+ var length;
+ var subvalue;
+ var position;
+ var tokenizer;
+ var name;
+ var min;
+ var now;
+
+ /* istanbul ignore if - never used (yet) */
+ if (silent) {
+ return true;
+ }
+
+ methods = self.inlineMethods;
+ length = methods.length;
+ tokenizers = self.inlineTokenizers;
+ index = -1;
+ min = value.length;
+
+ while (++index < length) {
+ name = methods[index];
+
+ if (name === 'text' || !tokenizers[name]) {
+ continue;
+ }
+
+ tokenizer = tokenizers[name].locator;
+
+ if (!tokenizer) {
+ eat.file.fail('Missing locator: `' + name + '`');
+ }
+
+ position = tokenizer.call(self, value, 1);
+
+ if (position !== -1 && position < min) {
+ min = position;
+ }
+ }
+
+ subvalue = value.slice(0, min);
+
+ eat(subvalue)({
+ type: 'text',
+ value: subvalue,
+ });
+ }
+};
diff --git a/src/components/Widgets/Markdown/serializers/remarkEscapeMarkdownEntities.js b/src/components/Widgets/Markdown/serializers/remarkEscapeMarkdownEntities.js
index d38bc8bd..6e7903c5 100644
--- a/src/components/Widgets/Markdown/serializers/remarkEscapeMarkdownEntities.js
+++ b/src/components/Widgets/Markdown/serializers/remarkEscapeMarkdownEntities.js
@@ -1,3 +1,237 @@
+import { flow, partial, flatMap, flatten, map } from 'lodash';
+import { joinPatternSegments, combinePatterns, replaceWhen } from '../../../../lib/regexHelper';
+
+/**
+ * Reusable regular expressions segments.
+ */
+const patternSegments = {
+ /**
+ * Matches zero or more HTML attributes followed by the tag close bracket,
+ * which may be prepended by zero or more spaces. The attributes can use
+ * single or double quotes and may be prepended by zero or more spaces.
+ */
+ htmlOpeningTagEnd: /(?: *\w+=(?:(?:"[^"]*")|(?:'[^']*')))* *>/,
+};
+
+
+/**
+ * Patterns matching substrings that should not be escaped. Array values must be
+ * joined before use.
+ */
+const nonEscapePatterns = {
+ /**
+ * HTML Tags
+ *
+ * Matches HTML opening tags and any attributes. Does not check for contents
+ * between tags or closing tags.
+ */
+ htmlTags: [
+ /**
+ * Matches the beginning of an HTML tag, excluding preformatted tag types.
+ */
+ /<(?!pre|style|script)[\w]+/,
+
+ /**
+ * Matches attributes.
+ */
+ patternSegments.htmlOpeningTagEnd,
+ ],
+
+
+ /**
+ * Preformatted HTML Blocks
+ *
+ * Matches HTML blocks with preformatted content. The content of these blocks,
+ * including the tags and attributes, should not be escaped at all.
+ */
+ preformattedHtmlBlocks: [
+ /**
+ * Matches the names of tags known to have preformatted content. The capture
+ * group is reused when matching the closing tag.
+ *
+ * NOTE: this pattern reuses a capture group, and could break if combined with
+ * other expressions using capture groups.
+ */
+ /<(pre|style|script)/,
+
+ /**
+ * Matches attributes.
+ */
+ patternSegments.htmlOpeningTagEnd,
+
+ /**
+ * Allow zero or more of any character (including line breaks) between the
+ * tags. Match lazily in case of subsequent blocks.
+ */
+ /(.|[\n\r])*?/,
+
+ /**
+ * Match closing tag via first capture group.
+ */
+ /<\/\1>/,
+ ],
+};
+
+
+/**
+ * Escape patterns
+ *
+ * Each escape pattern matches a markdown entity and captures up to two
+ * groups. These patterns must use one of the following formulas:
+ *
+ * - Single capture group followed by match content - /(...).../
+ * The captured characters should be escaped and the remaining match should
+ * remain unchanged.
+ *
+ * - Two capture groups surrounding matched content - /(...)...(...)/
+ * The captured characters in both groups should be escaped and the matched
+ * characters in between should remain unchanged.
+ */
+const escapePatterns = [
+ /**
+ * Emphasis/Bold - Asterisk
+ *
+ * Match strings surrounded by one or more asterisks on both sides.
+ */
+ /(\*+)[^\*]*(\1)/g,
+
+ /**
+ * Emphasis - Underscore
+ *
+ * Match strings surrounded by a single underscore on both sides followed by
+ * a word boundary. Remark disregards whether a word boundary exists at the
+ * beginning of an emphasis node.
+ */
+ /(_)[^_]+(_)\b/g,
+
+ /**
+ * Bold - Underscore
+ *
+ * Match strings surrounded by multiple underscores on both sides. Remark
+ * disregards the absence of word boundaries on either side of a bold node.
+ */
+ /(_{2,})[^_]*(\1)/g,
+
+ /**
+ * Strikethrough
+ *
+ * Match strings surrounded by multiple tildes on both sides.
+ */
+ /(~+)[^~]*(\1)/g,
+
+ /**
+ * Inline Code
+ *
+ * Match strings surrounded by backticks.
+ */
+ /(`+)[^`]*(\1)/g,
+
+ /**
+ * Links, Images, References, and Footnotes
+ *
+ * Match strings surrounded by brackets. This could be improved to
+ * specifically match only the exact syntax of each covered entity, but
+ * doing so through current approach would incur a considerable performance
+ * penalty.
+ */
+ /(\[)[^\]]*]/g,
+];
+
+
+/**
+ * Generate new non-escape expression. The non-escape expression matches
+ * substrings whose contents should not be processed for escaping.
+ */
+const joinedNonEscapePatterns = map(nonEscapePatterns, pattern => {
+ return new RegExp(joinPatternSegments(pattern));
+});
+const nonEscapePattern = combinePatterns(joinedNonEscapePatterns);
+
+
+/**
+ * Create chain of successive escape functions for various markdown entities.
+ */
+const escapeFunctions = escapePatterns.map(pattern => partial(escapeDelimiters, pattern));
+const escapeAll = flow(escapeFunctions);
+
+
+/**
+ * Executes both the `escapeCommonChars` and `escapeLeadingChars` functions.
+ */
+function escapeAllChars(text) {
+ const partiallyEscapedMarkdown = escapeCommonChars(text);
+ return escapeLeadingChars(partiallyEscapedMarkdown);
+}
+
+
+/**
+ * escapeLeadingChars
+ *
+ * Handles escaping for characters that must be positioned at the beginning of
+ * the string, such as headers and list items.
+ *
+ * Escapes '#', '*', '-', '>', '=', '|', and sequences of 3+ backticks or 4+
+ * spaces when found at the beginning of a string, preceded by zero or more
+ * whitespace characters.
+ */
+function escapeLeadingChars(text) {
+ return text.replace(/^\s*([-#*>=|]| {4,}|`{3,})/, '$`\\$1');
+}
+
+
+/**
+ * escapeCommonChars
+ *
+ * Escapes active markdown entities. See escape pattern groups for details on
+ * which entities are replaced.
+ */
+function escapeCommonChars(text) {
+ /**
+ * Generate new non-escape expression (must happen at execution time because
+ * we use `RegExp.exec`, which tracks it's own state internally).
+ */
+ const nonEscapeExpression = new RegExp(nonEscapePattern, 'gm');
+
+ /**
+ * Use `replaceWhen` to escape markdown entities only within substrings that
+ * are eligible for escaping.
+ */
+ return replaceWhen(nonEscapeExpression, escapeAll, text, true);
+}
+
+
+/**
+ * escapeDelimiters
+ *
+ * Executes `String.replace` for a given pattern, but only on the first two
+ * capture groups. Specifically intended for escaping opening (and optionally
+ * closing) markdown entities without escaping the content in between.
+ */
+function escapeDelimiters(pattern, text) {
+ return text.replace(pattern, (match, start, end) => {
+ const hasEnd = typeof end === 'string';
+ const matchSliceEnd = hasEnd ? match.length - end.length : match.length;
+ const content = match.slice(start.length, matchSliceEnd);
+ return `${escape(start)}${content}${hasEnd ? escape(end) : ''}`;
+ });
+}
+
+
+/**
+ * escape
+ *
+ * Simple replacement function for escaping markdown entities. Prepends every
+ * character in the received string with a backslash.
+ */
+function escape(delim) {
+ let result = '';
+ for (const char of delim) {
+ result += `\\${char}`;
+ }
+ return result;
+}
+
+
/**
* A Remark plugin for escaping markdown entities.
*
@@ -13,22 +247,6 @@
* stringification.
*/
export default function remarkEscapeMarkdownEntities() {
- /**
- * Escape all occurrences of '[', '*', '_', '`', and '~'.
- */
- function escapeCommonChars(text) {
- return text.replace(/[\[*_`~]/g, '\\$&');
- }
-
- /**
- * Runs escapeCommonChars, and also escapes '#' and '-' when found at the
- * beginning of any node's first child node.
- */
- function escapeAllChars(text) {
- const partiallyEscapedMarkdown = escapeCommonChars(text);
- return partiallyEscapedMarkdown.replace(/^\s*([#-])/, '$`\\$1');
- }
-
const transform = (node, index) => {
const children = node.children && node.children.map(transform);
diff --git a/src/components/Widgets/Markdown/serializers/remarkImagesToText.js b/src/components/Widgets/Markdown/serializers/remarkImagesToText.js
index f63d3820..568e8a06 100644
--- a/src/components/Widgets/Markdown/serializers/remarkImagesToText.js
+++ b/src/components/Widgets/Markdown/serializers/remarkImagesToText.js
@@ -1,18 +1,26 @@
/**
* Images must be parsed as shortcodes for asset proxying. This plugin converts
- * MDAST image nodes back to text to allow shortcode pattern matching.
+ * MDAST image nodes back to text to allow shortcode pattern matching. Note that
+ * this transformation only occurs for images that are the sole child of a top
+ * level paragraph - any other image is left alone and treated as an inline
+ * image.
*/
export default function remarkImagesToText() {
return transform;
function transform(node) {
- const children = node.children ? node.children.map(transform) : node.children;
- if (node.type === 'image') {
- const alt = node.alt || '';
- const url = node.url || '';
- const title = node.title ? ` "${node.title}"` : '';
- return { type: 'text', value: `data:image/s3,"s3://crabby-images/ed75a/ed75abd4096a3e4ba63198a021d539be29419ef9" alt="${alt}"` };
- }
+ const children = node.children.map(child => {
+ if (
+ child.type === 'paragraph'
+ && child.children.length === 1
+ && child.children[0].type === 'image'
+ ) {
+ const { alt = '', url = '', title = '' } = child.children[0];
+ const value = `data:image/s3,"s3://crabby-images/2a7aa/2a7aadee1e74ac49a1d26ca48e7a969cef06c6f4" alt="${alt}"`;
+ child.children = [{ type: 'text', value }];
+ }
+ return child;
+ });
return { ...node, children };
}
}
diff --git a/src/components/Widgets/Markdown/serializers/remarkSlate.js b/src/components/Widgets/Markdown/serializers/remarkSlate.js
index f8cc42f0..7f403089 100644
--- a/src/components/Widgets/Markdown/serializers/remarkSlate.js
+++ b/src/components/Widgets/Markdown/serializers/remarkSlate.js
@@ -1,6 +1,33 @@
-import { get, isEmpty, isArray } from 'lodash';
+import { get, isEmpty, isArray, last, flatMap } from 'lodash';
import u from 'unist-builder';
+/**
+ * A Remark plugin for converting an MDAST to Slate Raw AST. Remark plugins
+ * return a `transform` function that receives the MDAST as it's first argument.
+ */
+export default function remarkToSlate() {
+ return transform;
+}
+
+function transform(node) {
+
+ /**
+ * Call `transform` recursively on child nodes.
+ *
+ * If a node returns a falsey value, filter it out. Some nodes do not
+ * translate from MDAST to Slate, such as definitions for link/image
+ * references or footnotes.
+ */
+ const children = !['strong', 'emphasis', 'delete'].includes(node.type)
+ && !isEmpty(node.children)
+ && flatMap(node.children, transform).filter(val => val);
+
+ /**
+ * Run individual nodes through the conversion factory.
+ */
+ return convertNode(node, children);
+}
+
/**
* Map of MDAST node types to Slate node types.
*/
@@ -47,7 +74,7 @@ function createBlock(type, nodes, props = {}) {
/**
* Create a Slate Block node.
*/
-function createInline(type, nodes, props = {}) {
+function createInline(type, props = {}, nodes) {
return { kind: 'inline', type, nodes, ...props };
}
@@ -63,8 +90,7 @@ function createText(value, data) {
return {...node, text: value };
}
-function convertMarkNode(node, parentMarks = []) {
-
+function processMarkNode(node, parentMarks = []) {
/**
* Add the current node's mark type to the marks collected from parent
* mark nodes, if any.
@@ -72,34 +98,69 @@ function convertMarkNode(node, parentMarks = []) {
const markType = markMap[node.type];
const marks = markType ? [...parentMarks, { type: markMap[node.type] }] : parentMarks;
- /**
- * Set an array to collect sections of text.
- */
- const ranges = [];
+ const children = flatMap(node.children, childNode => {
+ switch (childNode.type) {
+ /**
+ * If a text node is a direct child of the current node, it should be
+ * set aside as a range, and all marks that have been collected in the
+ * `marks` array should apply to that specific range.
+ */
+ case 'html':
+ case 'text':
+ return { text: childNode.value, marks };
- node.children && node.children.forEach(childNode => {
+ /**
+ * MDAST inline code nodes don't have children, just a text value, similar
+ * to a text node, so it receives the same treatment as a text node, but we
+ * first add the inline code mark to the marks array.
+ */
+ case 'inlineCode': {
+ const childMarks = [ ...marks, { type: markMap['inlineCode'] } ];
+ return { text: childNode.value, marks: childMarks };
+ }
- /**
- * If a text node is a direct child of the current node, it should be
- * set aside as a range, and all marks that have been collected in the
- * `marks` array should apply to that specific range.
- */
- if (['html', 'text'].includes(childNode.type)) {
- ranges.push({ text: childNode.value, marks });
- return;
+ /**
+ * Process nested style nodes. The recursive results should be pushed into
+ * the ranges array. This way, every MDAST nested text structure becomes a
+ * flat array of ranges that can serve as the value of a single Slate Raw
+ * text node.
+ */
+ case 'strong':
+ case 'emphasis':
+ case 'delete':
+ return processMarkNode(childNode, marks);
+
+ /**
+ * Remaining nodes simply need mark data added to them, and to then be
+ * added into the cumulative children array.
+ */
+ default:
+ return { ...childNode, data: { marks } };
}
-
- /**
- * Any non-text child node should be processed as a parent node. The
- * recursive results should be pushed into the ranges array. This way,
- * every MDAST nested text structure becomes a flat array of ranges
- * that can serve as the value of a single Slate Raw text node.
- */
- const nestedRanges = convertMarkNode(childNode, marks);
- ranges.push(...nestedRanges);
});
- return ranges;
+ return children;
+}
+
+function convertMarkNode(node) {
+ const slateNodes = processMarkNode(node);
+
+ const convertedSlateNodes = slateNodes.reduce((acc, node, idx, nodes) => {
+ const lastConvertedNode = last(acc);
+ if (node.text && lastConvertedNode && lastConvertedNode.ranges) {
+ lastConvertedNode.ranges.push(node);
+ }
+ else if (node.text) {
+ acc.push(createText([node]));
+ }
+ else {
+ acc.push(transform(node));
+ }
+
+ return acc;
+ }, []);
+
+ return convertedSlateNodes;
}
/**
@@ -186,7 +247,7 @@ function convertNode(node, nodes) {
case 'strong':
case 'emphasis':
case 'delete': {
- return createText(convertMarkNode(node));
+ return convertMarkNode(node);
}
/**
@@ -239,7 +300,8 @@ function convertNode(node, nodes) {
* line breaks within a text node.
*/
case 'break': {
- return createText('\n');
+ const textNode = createText('\n');
+ return createInline('break', {}, [ textNode ]);
}
/**
@@ -258,11 +320,25 @@ function convertNode(node, nodes) {
* schema references them in the data object.
*/
case 'link': {
- const { title, url } = node;
- const data = { title, url };
- return createInline(typeMap[type], nodes, { data });
+ const { title, url, data } = node;
+ const newData = { ...data, title, url };
+ return createInline(typeMap[type], { data: newData }, nodes);
}
+ /**
+ * Images
+ *
+ * Identical to link nodes except for the lack of child nodes and addition
+ * of alt attribute data MDAST stores the link attributes directly on the
+ * node, while our Slate schema references them in the data object.
+ */
+ case 'image': {
+ const { title, url, alt, data } = node;
+ const newData = { ...data, title, alt, url };
+ return createInline(typeMap[type], { isVoid: true, data: newData });
+ }
+
+
/**
* Tables
*
@@ -275,29 +351,3 @@ function convertNode(node, nodes) {
}
}
}
-
-
-/**
- * A Remark plugin for converting an MDAST to Slate Raw AST. Remark plugins
- * return a `transform` function that receives the MDAST as it's first argument.
- */
-export default function remarkToSlate() {
- function transform(node) {
-
- /**
- * Call `transform` recursively on child nodes.
- *
- * If a node returns a falsey value, filter it out. Some nodes do not
- * translate from MDAST to Slate, such as definitions for link/image
- * references or footnotes.
- */
- const children = !isEmpty(node.children) && node.children.map(transform).filter(val => val);
-
- /**
- * Run individual nodes through the conversion factory.
- */
- return convertNode(node, children);
- }
-
- return transform;
-}
diff --git a/src/components/Widgets/Markdown/serializers/remarkStripTrailingBreaks.js b/src/components/Widgets/Markdown/serializers/remarkStripTrailingBreaks.js
new file mode 100644
index 00000000..67a37c3b
--- /dev/null
+++ b/src/components/Widgets/Markdown/serializers/remarkStripTrailingBreaks.js
@@ -0,0 +1,56 @@
+import mdastToString from 'mdast-util-to-string';
+
+/**
+ * Removes break nodes that are at the end of a block.
+ *
+ * When a trailing double space or backslash is encountered at the end of a
+ * markdown block, Remark will interpret the character(s) literally, as only
+ * break entities followed by text qualify as breaks. A manually created MDAST,
+ * however, may have such entities, and users of visual editors shouldn't see
+ * these artifacts in resulting markdown.
+ */
+export default function remarkStripTrailingBreaks() {
+ const transform = node => {
+ if (node.children) {
+ node.children = node.children
+ .map((child, idx, children) => {
+
+ /**
+ * Only touch break nodes. Convert all subsequent nodes to their text
+ * value and exclude the break node if no non-whitespace characters
+ * are found.
+ */
+ if (child.type === 'break') {
+ const subsequentNodes = children.slice(idx + 1);
+
+ /**
+ * Create a small MDAST so that mdastToString can process all
+ * siblings as children of one node rather than making multiple
+ * calls.
+ */
+ const fragment = { type: 'root', children: subsequentNodes };
+ const subsequentText = mdastToString(fragment);
+ return subsequentText.trim() ? child : null;
+ }
+
+ /**
+ * Always return the child if not a break.
+ */
+ return child;
+ })
+
+ /**
+ * Because some break nodes may be excluded, we filter out the resulting
+ * null values.
+ */
+ .filter(child => child)
+
+ /**
+ * Recurse through the MDAST by transforming each individual child node.
+ */
+ .map(transform);
+ }
+ return node;
+ };
+ return transform;
+};
diff --git a/src/components/Widgets/Markdown/serializers/slateRemark.js b/src/components/Widgets/Markdown/serializers/slateRemark.js
index b586cc81..7812dfac 100644
--- a/src/components/Widgets/Markdown/serializers/slateRemark.js
+++ b/src/components/Widgets/Markdown/serializers/slateRemark.js
@@ -1,4 +1,4 @@
-import { get, isEmpty, concat, without, flatten, flatMap, initial } from 'lodash';
+import { get, isEmpty, concat, without, flatten, flatMap, initial, last, difference, reverse, sortBy } from 'lodash';
import u from 'unist-builder';
/**
@@ -21,6 +21,7 @@ const typeMap = {
'table': 'table',
'table-row': 'tableRow',
'table-cell': 'tableCell',
+ 'break': 'break',
'thematic-break': 'thematicBreak',
'link': 'link',
'image': 'image',
@@ -37,6 +38,108 @@ const markMap = {
code: 'inlineCode',
};
+let shortcodePlugins;
+
+export default function slateToRemark(raw, opts) {
+ /**
+ * Set shortcode plugins in outer scope.
+ */
+ ({ shortcodePlugins } = opts);
+
+ /**
+ * The Slate Raw AST generally won't have a top level type, so we set it to
+ * "root" for clarity.
+ */
+ raw.type = 'root';
+
+ return transform(raw);
+}
+
+
+/**
+ * The transform function mimics the approach of a Remark plugin for
+ * conformity with the other serialization functions. This function converts
+ * Slate nodes to MDAST nodes, and recursively calls itself to process child
+ * nodes to arbitrary depth.
+ */
+function transform(node) {
+ /**
+ * Combine adjacent text and inline nodes before processing so they can
+ * share marks.
+ */
+ const combinedChildren = node.nodes && combineTextAndInline(node.nodes);
+
+ /**
+ * Call `transform` recursively on child nodes, and flatten the resulting
+ * array.
+ */
+ const children = !isEmpty(combinedChildren) && flatMap(combinedChildren, transform);
+
+ /**
+ * Run individual nodes through conversion factories.
+ */
+ return ['text'].includes(node.kind)
+ ? convertTextNode(node)
+ : convertNode(node, children, shortcodePlugins);
+}
+
+
+/**
+ * Includes inline nodes as ranges in adjacent text nodes where appropriate, so
+ * that mark node combining logic can apply to both text and inline nodes. This
+ * is necessary because Slate doesn't allow inline nodes to have marks while
+ * inline nodes in MDAST may be nested within mark nodes. Treating them as if
+ * they were text is a bit of a necessary hack.
+ */
+function combineTextAndInline(nodes) {
+ return nodes.reduce((acc, node, idx, nodes) => {
+ const prevNode = last(acc);
+ const prevNodeRanges = get(prevNode, 'ranges');
+ const data = node.data || {};
+
+ /**
+ * If the previous node has ranges and the current node has marks in data
+ * (only happens when we place them on inline nodes here in the parser), or
+ * the current node also has ranges (because the previous node was
+ * originally an inline node that we've already squashed into a range)
+ * combine the current node into the previous.
+ */
+ if (!isEmpty(prevNodeRanges) && !isEmpty(data.marks)) {
+ prevNodeRanges.push({ node, marks: data.marks });
+ return acc;
+ }
+
+ if (!isEmpty(prevNodeRanges) && !isEmpty(node.ranges)) {
+ prevNode.ranges = prevNodeRanges.concat(node.ranges);
+ return acc;
+ }
+
+ /**
+ * Break nodes contain a single child text node with a newline character
+ * for visual purposes in the editor, but Remark break nodes have no
+ * children, so we remove the child node here.
+ */
+ if (node.type === 'break') {
+ acc.push({ kind: 'inline', type: 'break' });
+ return acc;
+ }
+
+ /**
+ * Convert remaining inline nodes to standalone text nodes with ranges.
+ */
+ if (node.kind === 'inline') {
+ acc.push({ kind: 'text', ranges: [{ node, marks: data.marks }] });
+ return acc;
+ }
+
+ /**
+ * Only remaining case is an actual text node, can be pushed as is.
+ */
+ acc.push(node);
+ return acc;
+ }, []);
+}
+
/**
* Slate treats inline code decoration as a standard mark, but MDAST does
@@ -56,24 +159,6 @@ function processCodeMark(markTypes) {
}
-/**
- * Returns an array of one or more MDAST text nodes of the given type, derived
- * from the text received. Certain transformations, such as line breaks, cause
- * multiple nodes to be returned.
- */
-function createTextNodes(text, type = 'html') {
- /**
- * Split the text string at line breaks, then map each substring to an array
- * pair consisting of an MDAST text node followed by a break node. This will
- * result in nested arrays, so we use `flatMap` to produce a flattened array,
- * and `initial` to leave off the superfluous trailing break.
- */
- const brokenText = text.split('\n');
- const toPair = str => [u(type, str), u('break')];
- return initial(flatMap(brokenText, toPair));
-}
-
-
/**
* Wraps a text node in one or more mark nodes by placing the text node in an
* array and using that as the `children` value of a mark node. The resulting
@@ -142,50 +227,134 @@ function wrapTextWithMarks(textNode, markTypes) {
* replaced with multiple MDAST nodes, so the resulting array must be flattened.
*/
function convertTextNode(node) {
-
/**
- * If the Slate text node has no "ranges" property, just return an equivalent
- * MDAST node.
+ * If the Slate text node has a "ranges" property, translate the Slate AST to
+ * a nested MDAST structure. Otherwise, just return an equivalent MDAST text
+ * node.
*/
- if (!node.ranges) {
- return createTextNodes(node.text);
+ if (node.ranges) {
+ const processedRanges = node.ranges.map(processRanges);
+ const condensedNodes = processedRanges.reduce(condenseNodesReducer, { nodes: [] });
+ return condensedNodes.nodes;
}
- /**
- * If there is no "text" property, convert the text range(s) to an array of
- * one or more nested MDAST nodes.
- */
- const textNodes = node.ranges.map(range => {
- /**
- * Get an array of the mark types, converted to their MDAST equivalent
- * types.
- */
- const { marks = [], text } = range;
- const markTypes = marks.map(mark => markMap[mark.type]);
+ if (node.kind === 'inline') {
+ return transform(node);
+ }
+ return u('html', node.text);
+}
+
+
+/**
+ * Process Slate node ranges in preparation for MDAST transformation.
+ */
+function processRanges(range) {
+ /**
+ * Get an array of the mark types, converted to their MDAST equivalent
+ * types.
+ */
+ const { marks = [], text } = range;
+ const markTypes = marks.map(mark => markMap[mark.type]);
+
+ if (typeof range.text === 'string') {
/**
* Code marks must be removed from the marks array, and the presence of a
* code mark changes the text node type that should be used.
*/
const { filteredMarkTypes, textNodeType } = processCodeMark(markTypes);
+ return { text, marks: filteredMarkTypes, textNodeType };
+ }
- /**
- * Create the base text node.
- */
- const textNodes = createTextNodes(text, textNodeType);
+ return { node: range.node, marks: markTypes };
+}
- /**
- * Recursively wrap the base text node in the individual mark nodes, if
- * any exist.
- */
- return textNodes.map(textNode => wrapTextWithMarks(textNode, filteredMarkTypes));
- });
+
+/**
+ * Slate's AST doesn't group adjacent text nodes with the same marks - a
+ * change in marks from letter to letter, even if some are in common, results
+ * in a separate range. For example, given "**a_b_**", transformation to and
+ * from Slate's AST will result in "**a****_b_**".
+ *
+ * MDAST treats styling entities as distinct nodes that contain children, so a
+ * "strong" node can contain a plain text node with a sibling "emphasis" node,
+ * which contains more text. This reducer serves to create an optimized nested
+ * MDAST without the typical redundancies that Slate's AST would produce if
+ * transformed as-is. The reducer can be called recursively to produce nested
+ * structures.
+ */
+function condenseNodesReducer(acc, node, idx, nodes) {
+ /**
+ * Skip any nodes that are being processed as children of an MDAST node
+ * through recursive calls.
+ */
+ if (typeof acc.nextIndex === 'number' && acc.nextIndex > idx) {
+ return acc;
+ }
/**
- * Since each range will be mapped into an array, we flatten the result to
- * return a single array of all nodes.
+ * Processing for nodes with marks.
*/
- return flatten(textNodes);
+ if (node.marks && node.marks.length > 0) {
+ /**
+ * For each mark on the current node, get the number of consecutive nodes
+ * (starting with this one) that have the mark. Whichever mark covers the
+ * most nodes is used as the parent node, and the nodes with that mark are
+ * processed as children. If the greatest number of consecutive nodes is
+ * tied between multiple marks, there is no priority as to which goes
+ * first.
+ */
+ const markLengths = node.marks.map(mark => getMarkLength(mark, nodes.slice(idx)));
+ const parentMarkLength = last(sortBy(markLengths, 'length'));
+ const { markType: parentType, length: parentLength } = parentMarkLength;
+
+ /**
+ * Since this and any consecutive nodes with the parent mark are going to
+ * be processed as children of the parent mark, this reducer should simply
+ * return the accumulator until after the last node to be covered by the
+ * new parent node. Here we set the next index that should be processed,
+ * if any.
+ */
+ const newNextIndex = idx + parentLength;
+
+ /**
+ * Get the set of nodes that should be processed as children of the new
+ * parent mark node, run each through the reducer as children of the
+ * parent node, and create the parent MDAST node with the resulting
+ * children.
+ */
+ const children = nodes.slice(idx, newNextIndex);
+ const denestedChildren = children.map(child => ({ ...child, marks: without(child.marks, parentType) }));
+ const mdastChildren = denestedChildren.reduce(condenseNodesReducer, { nodes: [], parentType }).nodes;
+ const mdastNode = u(parentType, mdastChildren);
+
+ return { ...acc, nodes: [ ...acc.nodes, mdastNode ], nextIndex: newNextIndex };
+ }
+
+ /**
+ * Create the base text node, and pass in the array of mark types as data
+ * (helpful when optimizing/condensing the final structure).
+ */
+ const baseNode = typeof node.text === 'string'
+ ? u(node.textNodeType, { marks: node.marks }, node.text)
+ : transform(node.node);
+
+ /**
+ * Recursively wrap the base text node in the individual mark nodes, if
+ * any exist.
+ */
+ return { ...acc, nodes: [ ...acc.nodes, baseNode ] };
+}
+
+
+/**
+ * Get the number of consecutive Slate nodes containing a given mark beginning
+ * from the first received node.
+ */
+function getMarkLength(markType, nodes) {
+ let length = 0;
+ while(nodes[length] && nodes[length].marks.includes(markType)) { ++length; }
+ return { markType, length };
}
@@ -267,8 +436,8 @@ function convertNode(node, children, shortcodePlugins) {
*/
case 'code': {
const value = get(node.nodes, [0, 'text']);
- const lang = get(node.data, 'lang');
- return u(typeMap[node.type], { lang }, value);
+ const { lang, ...data } = get(node, 'data', {});
+ return u(typeMap[node.type], { lang, data }, value);
}
/**
@@ -288,11 +457,11 @@ function convertNode(node, children, shortcodePlugins) {
}
/**
- * Thematic Breaks
+ * Breaks
*
- * Thematic breaks don't have children. We parse them separately for
- * clarity.
+ * Breaks don't have children. We parse them separately for clarity.
*/
+ case 'break':
case 'thematic-break': {
return u(typeMap[node.type]);
}
@@ -304,45 +473,28 @@ function convertNode(node, children, shortcodePlugins) {
* the node for both Slate and Remark schemas.
*/
case 'link': {
- const { url, title } = get(node, 'data', {});
- return u(typeMap[node.type], { url, title }, children);
+ const { url, title, ...data } = get(node, 'data', {});
+ return u(typeMap[node.type], { url, title, data }, children);
}
+ /**
+ * Images
+ *
+ * This transformation is almost identical to that of links, except for the
+ * lack of child nodes and addition of `alt` attribute data. Currently the
+ * CMS handles block images by shortcode, so this case will only apply to
+ * inline images, which currently can only occur through raw markdown
+ * insertion.
+ */
+ case 'image': {
+ const { url, title, alt, ...data } = get(node, 'data', {});
+ return u(typeMap[node.type], { url, title, alt, data });
+ }
+
+
/**
* No default case is supplied because an unhandled case should never
* occur. In the event that it does, let the error throw (for now).
*/
}
}
-
-
-export default function slateToRemark(raw, { shortcodePlugins }) {
- /**
- * The transform function mimics the approach of a Remark plugin for
- * conformity with the other serialization functions. This function converts
- * Slate nodes to MDAST nodes, and recursively calls itself to process child
- * nodes to arbitrary depth.
- */
- function transform(node) {
-
- /**
- * Call `transform` recursively on child nodes, and flatten the resulting
- * array.
- */
- const children = !isEmpty(node.nodes) && flatten(node.nodes.map(transform));
-
- /**
- * Run individual nodes through conversion factories.
- */
- return node.kind === 'text' ? convertTextNode(node) : convertNode(node, children, shortcodePlugins);
- }
-
- /**
- * The Slate Raw AST generally won't have a top level type, so we set it to
- * "root" for clarity.
- */
- raw.type = 'root';
-
- const mdast = transform(raw);
- return mdast;
-}
diff --git a/src/components/Widgets/ObjectControl.js b/src/components/Widgets/ObjectControl.js
index ae3dd43d..fea66f01 100644
--- a/src/components/Widgets/ObjectControl.js
+++ b/src/components/Widgets/ObjectControl.js
@@ -21,6 +21,13 @@ export default class ObjectControl extends Component {
className: PropTypes.string,
};
+ /**
+ * In case the `onChange` function is frozen by a child widget implementation,
+ * e.g. when debounced, always get the latest object value instead of usin
+ * `this.props.value` directly.
+ */
+ getObjectValue = () => this.props.value;
+
controlFor(field) {
const { onAddAsset, onRemoveAsset, getAsset, value, onChange } = this.props;
if (field.get('widget') === 'hidden') {
@@ -38,7 +45,7 @@ export default class ObjectControl extends Component {
field,
value: fieldValue,
onChange: (val, metadata) => {
- onChange((value || Map()).set(field.get('name'), val), metadata);
+ onChange((this.getObjectValue() || Map()).set(field.get('name'), val), metadata);
},
onAddAsset,
onRemoveAsset,
diff --git a/src/lib/regexHelper.js b/src/lib/regexHelper.js
new file mode 100644
index 00000000..c4b14e41
--- /dev/null
+++ b/src/lib/regexHelper.js
@@ -0,0 +1,145 @@
+import { last } from 'lodash';
+
+/**
+ * Joins an array of regular expressions into a single expression, without
+ * altering the received expressions.
+ */
+export function joinPatternSegments(patterns) {
+ return patterns.map(p => p.source).join('');
+}
+
+
+/**
+ * Combines an array of regular expressions into a single expression, wrapping
+ * each in a non-capturing group and interposing alternation characters (|) so
+ * that each expression is executed separately.
+ */
+export function combinePatterns(patterns, flags = '') {
+ return patterns.map(p => `(?:${p.source})`).join('|');
+}
+
+
+/**
+ * Modify substrings within a string if they match a (global) pattern. Can be
+ * inverted to only modify non-matches.
+ *
+ * params:
+ * matchPattern - regexp - a regular expression to check for matches
+ * replaceFn - function - a replacement function that receives a matched
+ * substring and returns a replacement substring
+ * text - string - the string to process
+ * invertMatchPattern - boolean - if true, non-matching substrings are modified
+ * instead of matching substrings
+ */
+export function replaceWhen(matchPattern, replaceFn, text, invertMatchPattern) {
+ /**
+ * Splits the string into an array of objects with the following shape:
+ *
+ * {
+ * index: number - the index of the substring within the string
+ * text: string - the substring
+ * match: boolean - true if the substring matched `matchPattern`
+ * }
+ *
+ * Loops through matches via recursion (`RegExp.exec` tracks the loop
+ * internally).
+ */
+ function split(exp, text, acc) {
+ /**
+ * Get the next match starting from the end of the last match or start of
+ * string.
+ */
+ const match = exp.exec(text);
+ const lastEntry = last(acc);
+
+ /**
+ * `match` will be null if there are no matches.
+ */
+ if (!match) return acc;
+
+ /**
+ * If the match is at the beginning of the input string, normalize to a data
+ * object with the `match` flag set to `true`, and add to the accumulator.
+ */
+ if (match.index === 0) {
+ addSubstring(acc, 0, match[0], true);
+ }
+
+ /**
+ * If there are no entries in the accumulator, convert the substring before
+ * the match to a data object (without the `match` flag set to true) and
+ * push to the accumulator, followed by a data object for the matching
+ * substring.
+ */
+ else if (!lastEntry) {
+ addSubstring(acc, 0, match.input.slice(0, match.index));
+ addSubstring(acc, match.index, match[0], true);
+ }
+
+ /**
+ * If the last entry in the accumulator immediately preceded the current
+ * matched substring in the original string, just add the data object for
+ * the matching substring to the accumulator.
+ */
+ else if (match.index === lastEntry.index + lastEntry.text.length) {
+ addSubstring(acc, match.index, match[0], true);
+ }
+
+ /**
+ * Convert the substring before the match to a data object (without the
+ * `match` flag set to true), followed by a data object for the matching
+ * substring.
+ */
+ else {
+ const nextIndex = lastEntry.index + lastEntry.text.length;
+ const nextText = match.input.slice(nextIndex, match.index);
+ addSubstring(acc, nextIndex, nextText);
+ addSubstring(acc, match.index, match[0], true);
+ }
+
+ /**
+ * Continue executing the expression.
+ */
+ return split(exp, text, acc);
+ }
+
+ /**
+ * Factory for converting substrings to data objects and adding to an output
+ * array.
+ */
+ function addSubstring(arr, index, text, match = false) {
+ arr.push({ index, text, match });
+ }
+
+ /**
+ * Split the input string to an array of data objects, each representing a
+ * matching or non-matching string.
+ */
+ const acc = split(matchPattern, text, []);
+
+ /**
+ * Process the trailing substring after the final match, if one exists.
+ */
+ const lastEntry = last(acc);
+ if (!lastEntry) return replaceFn(text);
+
+ const nextIndex = lastEntry.index + lastEntry.text.length;
+ if (text.length > nextIndex) {
+ acc.push({ index: nextIndex, text: text.slice(nextIndex) });
+ }
+
+ /**
+ * Map the data objects in the accumulator to their string values, modifying
+ * matched strings with the replacement function. Modifies non-matches if
+ * `invertMatchPattern` is truthy.
+ */
+ const replacedText = acc.map(entry => {
+ const isMatch = invertMatchPattern ? !entry.match : entry.match;
+ return isMatch ? replaceFn(entry.text) : entry.text;
+ });
+
+ /**
+ * Return the joined string.
+ */
+ return replacedText.join('');
+}