From dc1f1c4535f5f1824bbeddf1882fe975e7086966 Mon Sep 17 00:00:00 2001 From: Le Tan Date: Thu, 16 Aug 2018 20:06:18 +0800 Subject: [PATCH] MdEditor: support pasting HTML as converted Markdown text via Turndown --- README.md | 1 + README_zh.md | 1 + src/resources/markdown_template.js | 112 +++ src/utils/turndown/README.md | 7 + src/utils/turndown/turndown-plugin-gfm.js | 165 ++++ src/utils/turndown/turndown.js | 932 ++++++++++++++++++++++ src/utils/vutils.cpp | 3 + src/vdocument.cpp | 13 + src/vdocument.h | 15 + src/vmdeditor.cpp | 41 + src/vmdeditor.h | 5 + src/vmdtab.cpp | 28 + src/vmdtab.h | 2 + src/vnote.cpp | 3 + src/vnote.h | 4 + src/vnote.qrc | 2 + 16 files changed, 1334 insertions(+) create mode 100644 src/utils/turndown/README.md create mode 100644 src/utils/turndown/turndown-plugin-gfm.js create mode 100644 src/utils/turndown/turndown.js diff --git a/README.md b/README.md index a232b926..4acb3d01 100644 --- a/README.md +++ b/README.md @@ -206,6 +206,7 @@ In VNote, almost everything is configurable, such as background color, font, and - [flowchart.js](https://github.com/adrai/flowchart.js) (MIT License) - [PlantUML](http://plantuml.com/) (MIT License) - [dom-to-image](https://github.com/tsayen/dom-to-image) (MIT License) +- [turndown](https://github.com/domchristie/turndown) (MIT License) # License VNote is licensed under the [MIT license](http://opensource.org/licenses/MIT). diff --git a/README_zh.md b/README_zh.md index 7d424b02..1d1eb199 100644 --- a/README_zh.md +++ b/README_zh.md @@ -207,6 +207,7 @@ VNote中,几乎一切都是可以定制的,例如背景颜色、字体以及 - [flowchart.js](https://github.com/adrai/flowchart.js) (MIT License) - [PlantUML](http://plantuml.com/) (MIT License) - [dom-to-image](https://github.com/tsayen/dom-to-image) (MIT License) +- [turndown](https://github.com/domchristie/turndown) (MIT License) # 代码许可 VNote使用[MIT许可](http://opensource.org/licenses/MIT)。 diff --git a/src/resources/markdown_template.js b/src/resources/markdown_template.js index 1fa91d1e..128df2c1 100644 --- a/src/resources/markdown_template.js +++ b/src/resources/markdown_template.js @@ -171,6 +171,10 @@ new QWebChannel(qt.webChannelTransport, content.noticeReadyToHighlightText(); } + if (typeof htmlToText == "function") { + content.requestHtmlToText.connect(htmlToText); + } + if (typeof textToHtml == "function") { content.requestTextToHtml.connect(textToHtml); content.noticeReadyToTextToHtml(); @@ -1465,3 +1469,111 @@ var setPreviewContent = function(lang, html) { previewDiv.className = ''; } }; + +var htmlToText = function(identifier, id, timeStamp, html) { + var splitString = function(str) { + var result = { leadingSpaces: '', + content: '', + trailingSpaces: '' + }; + if (!str) { + return result; + } + + var lRe = /^\s+/; + var ret = lRe.exec(str); + if (ret) { + result.leadingSpaces = ret[0]; + if (result.leadingSpaces.length == str.length) { + return result; + } + } + + var tRe = /\s+$/; + ret = tRe.exec(str); + if (ret) { + result.trailingSpaces = ret[0]; + } + + result.content = str.slice(result.leadingSpaces.length, + str.length - result.trailingSpaces.length); + return result; + }; + + var gfm = turndownPluginGfm.gfm + var ts = new TurndownService({ headingStyle: 'atx', + bulletListMarker: '-', + emDelimiter: '*', + hr: '***', + codeBlockStyle: 'fenced', + blankReplacement: function(content, node) { + if (node.nodeName == 'SPAN') { + return content; + } + + return node.isBlock ? '\n\n' : '' + } + }); + ts.use(gfm); + ts.addRule('emspan', { + filter: 'span', + replacement: function(content, node, options) { + if (node.style.fontWeight == 'bold') { + var con = splitString(content); + if (!con.content) { + return content; + } + + return con.leadingSpaces + options.strongDelimiter + + con.content + + options.strongDelimiter + con.trailingSpaces; + } else if (node.style.fontStyle == 'italic') { + var con = splitString(content); + if (!con.content) { + return content; + } + + return con.leadingSpaces + options.emDelimiter + + con.content + + options.emDelimiter + con.trailingSpaces; + } else { + return content; + } + } + }); + ts.addRule('mark', { + filter: 'mark', + replacement: function(content, node, options) { + return '' + content + ''; + } + }); + ts.addRule('emphasis_fix', { + filter: ['em', 'i'], + replacement: function (content, node, options) { + var con = splitString(content); + if (!con.content) { + return content; + } + + return con.leadingSpaces + options.emDelimiter + + con.content + + options.emDelimiter + con.trailingSpaces; + } + }); + ts.addRule('strong_fix', { + filter: ['strong', 'b'], + replacement: function (content, node, options) { + var con = splitString(content); + if (!con.content) { + return content; + } + + return con.leadingSpaces + options.strongDelimiter + + con.content + + options.strongDelimiter + con.trailingSpaces; + } + }); + + var markdown = ts.turndown(html); + content.htmlToTextCB(identifier, id, timeStamp, markdown); +}; diff --git a/src/utils/turndown/README.md b/src/utils/turndown/README.md new file mode 100644 index 00000000..d3f742e1 --- /dev/null +++ b/src/utils/turndown/README.md @@ -0,0 +1,7 @@ +# [turndown](https://github.com/domchristie/turndown) +v4.0.2 +Dom Christie + +# [turndown](https://github.com/domchristie/turndown-plugin-gfm) +v1.0.2 +Dom Christie diff --git a/src/utils/turndown/turndown-plugin-gfm.js b/src/utils/turndown/turndown-plugin-gfm.js new file mode 100644 index 00000000..859f4ad7 --- /dev/null +++ b/src/utils/turndown/turndown-plugin-gfm.js @@ -0,0 +1,165 @@ +var turndownPluginGfm = (function (exports) { +'use strict'; + +var highlightRegExp = /highlight-(?:text|source)-([a-z0-9]+)/; + +function highlightedCodeBlock (turndownService) { + turndownService.addRule('highlightedCodeBlock', { + filter: function (node) { + var firstChild = node.firstChild; + return ( + node.nodeName === 'DIV' && + highlightRegExp.test(node.className) && + firstChild && + firstChild.nodeName === 'PRE' + ) + }, + replacement: function (content, node, options) { + var className = node.className || ''; + var language = (className.match(highlightRegExp) || [null, ''])[1]; + + return ( + '\n\n' + options.fence + language + '\n' + + node.firstChild.textContent + + '\n' + options.fence + '\n\n' + ) + } + }); +} + +function strikethrough (turndownService) { + turndownService.addRule('strikethrough', { + filter: ['del', 's', 'strike'], + replacement: function (content) { + return '~' + content + '~' + } + }); +} + +var indexOf = Array.prototype.indexOf; +var every = Array.prototype.every; +var rules = {}; + +rules.tableCell = { + filter: ['th', 'td'], + replacement: function (content, node) { + return cell(content, node) + } +}; + +rules.tableRow = { + filter: 'tr', + replacement: function (content, node) { + var borderCells = ''; + var alignMap = { left: ':--', right: '--:', center: ':-:' }; + + if (isHeadingRow(node)) { + for (var i = 0; i < node.childNodes.length; i++) { + var border = '---'; + var align = ( + node.childNodes[i].getAttribute('align') || '' + ).toLowerCase(); + + if (align) border = alignMap[align] || border; + + borderCells += cell(border, node.childNodes[i]); + } + } + return '\n' + content + (borderCells ? '\n' + borderCells : '') + } +}; + +rules.table = { + // Only convert tables with a heading row. + // Tables with no heading row are kept using `keep` (see below). + filter: function (node) { + return node.nodeName === 'TABLE' && isHeadingRow(node.rows[0]) + }, + + replacement: function (content) { + // Ensure there are no blank lines + content = content.replace('\n\n', '\n'); + return '\n\n' + content + '\n\n' + } +}; + +rules.tableSection = { + filter: ['thead', 'tbody', 'tfoot'], + replacement: function (content) { + return content + } +}; + +// A tr is a heading row if: +// - the parent is a THEAD +// - or if its the first child of the TABLE or the first TBODY (possibly +// following a blank THEAD) +// - and every cell is a TH +function isHeadingRow (tr) { + var parentNode = tr.parentNode; + return ( + parentNode.nodeName === 'THEAD' || + ( + parentNode.firstChild === tr && + (parentNode.nodeName === 'TABLE' || isFirstTbody(parentNode)) && + every.call(tr.childNodes, function (n) { return n.nodeName === 'TH' }) + ) + ) +} + +function isFirstTbody (element) { + var previousSibling = element.previousSibling; + return ( + element.nodeName === 'TBODY' && ( + !previousSibling || + ( + previousSibling.nodeName === 'THEAD' && + /^\s*$/i.test(previousSibling.textContent) + ) + ) + ) +} + +function cell (content, node) { + var index = indexOf.call(node.parentNode.childNodes, node); + var prefix = ' '; + if (index === 0) prefix = '| '; + return prefix + content + ' |' +} + +function tables (turndownService) { + turndownService.keep(function (node) { + return node.nodeName === 'TABLE' && !isHeadingRow(node.rows[0]) + }); + for (var key in rules) turndownService.addRule(key, rules[key]); +} + +function taskListItems (turndownService) { + turndownService.addRule('taskListItems', { + filter: function (node) { + return node.type === 'checkbox' && node.parentNode.nodeName === 'LI' + }, + replacement: function (content, node) { + return (node.checked ? '[x]' : '[ ]') + ' ' + } + }); +} + +function gfm (turndownService) { + turndownService.use([ + highlightedCodeBlock, + strikethrough, + tables, + taskListItems + ]); +} + +exports.gfm = gfm; +exports.highlightedCodeBlock = highlightedCodeBlock; +exports.strikethrough = strikethrough; +exports.tables = tables; +exports.taskListItems = taskListItems; + +return exports; + +}({})); diff --git a/src/utils/turndown/turndown.js b/src/utils/turndown/turndown.js new file mode 100644 index 00000000..716ff225 --- /dev/null +++ b/src/utils/turndown/turndown.js @@ -0,0 +1,932 @@ +var TurndownService = (function () { +'use strict'; + +function extend (destination) { + for (var i = 1; i < arguments.length; i++) { + var source = arguments[i]; + for (var key in source) { + if (source.hasOwnProperty(key)) destination[key] = source[key]; + } + } + return destination +} + +function repeat (character, count) { + return Array(count + 1).join(character) +} + +var blockElements = [ + 'address', 'article', 'aside', 'audio', 'blockquote', 'body', 'canvas', + 'center', 'dd', 'dir', 'div', 'dl', 'dt', 'fieldset', 'figcaption', + 'figure', 'footer', 'form', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', + 'header', 'hgroup', 'hr', 'html', 'isindex', 'li', 'main', 'menu', 'nav', + 'noframes', 'noscript', 'ol', 'output', 'p', 'pre', 'section', 'table', + 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'ul' +]; + +function isBlock (node) { + return blockElements.indexOf(node.nodeName.toLowerCase()) !== -1 +} + +var voidElements = [ + 'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', + 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr' +]; + +function isVoid (node) { + return voidElements.indexOf(node.nodeName.toLowerCase()) !== -1 +} + +var voidSelector = voidElements.join(); +function hasVoid (node) { + return node.querySelector && node.querySelector(voidSelector) +} + +var rules = {}; + +rules.paragraph = { + filter: 'p', + + replacement: function (content) { + return '\n\n' + content + '\n\n' + } +}; + +rules.lineBreak = { + filter: 'br', + + replacement: function (content, node, options) { + return options.br + '\n' + } +}; + +rules.heading = { + filter: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'], + + replacement: function (content, node, options) { + var hLevel = Number(node.nodeName.charAt(1)); + + if (options.headingStyle === 'setext' && hLevel < 3) { + var underline = repeat((hLevel === 1 ? '=' : '-'), content.length); + return ( + '\n\n' + content + '\n' + underline + '\n\n' + ) + } else { + return '\n\n' + repeat('#', hLevel) + ' ' + content + '\n\n' + } + } +}; + +rules.blockquote = { + filter: 'blockquote', + + replacement: function (content) { + content = content.replace(/^\n+|\n+$/g, ''); + content = content.replace(/^/gm, '> '); + return '\n\n' + content + '\n\n' + } +}; + +rules.list = { + filter: ['ul', 'ol'], + + replacement: function (content, node) { + var parent = node.parentNode; + if (parent.nodeName === 'LI' && parent.lastElementChild === node) { + return '\n' + content + } else { + return '\n\n' + content + '\n\n' + } + } +}; + +rules.listItem = { + filter: 'li', + + replacement: function (content, node, options) { + content = content + .replace(/^\n+/, '') // remove leading newlines + .replace(/\n+$/, '\n') // replace trailing newlines with just a single one + .replace(/\n/gm, '\n '); // indent + var prefix = options.bulletListMarker + ' '; + var parent = node.parentNode; + if (parent.nodeName === 'OL') { + var start = parent.getAttribute('start'); + var index = Array.prototype.indexOf.call(parent.children, node); + prefix = (start ? Number(start) + index : index + 1) + '. '; + } + return ( + prefix + content + (node.nextSibling && !/\n$/.test(content) ? '\n' : '') + ) + } +}; + +rules.indentedCodeBlock = { + filter: function (node, options) { + return ( + options.codeBlockStyle === 'indented' && + node.nodeName === 'PRE' && + node.firstChild && + node.firstChild.nodeName === 'CODE' + ) + }, + + replacement: function (content, node, options) { + return ( + '\n\n ' + + node.firstChild.textContent.replace(/\n/g, '\n ') + + '\n\n' + ) + } +}; + +rules.fencedCodeBlock = { + filter: function (node, options) { + return ( + options.codeBlockStyle === 'fenced' && + node.nodeName === 'PRE' && + node.firstChild && + node.firstChild.nodeName === 'CODE' + ) + }, + + replacement: function (content, node, options) { + var className = node.firstChild.className || ''; + var language = (className.match(/language-(\S+)/) || [null, ''])[1]; + + return ( + '\n\n' + options.fence + language + '\n' + + node.firstChild.textContent + + '\n' + options.fence + '\n\n' + ) + } +}; + +rules.horizontalRule = { + filter: 'hr', + + replacement: function (content, node, options) { + return '\n\n' + options.hr + '\n\n' + } +}; + +rules.inlineLink = { + filter: function (node, options) { + return ( + options.linkStyle === 'inlined' && + node.nodeName === 'A' && + node.getAttribute('href') + ) + }, + + replacement: function (content, node) { + var href = node.getAttribute('href'); + var title = node.title ? ' "' + node.title + '"' : ''; + return '[' + content + '](' + href + title + ')' + } +}; + +rules.referenceLink = { + filter: function (node, options) { + return ( + options.linkStyle === 'referenced' && + node.nodeName === 'A' && + node.getAttribute('href') + ) + }, + + replacement: function (content, node, options) { + var href = node.getAttribute('href'); + var title = node.title ? ' "' + node.title + '"' : ''; + var replacement; + var reference; + + switch (options.linkReferenceStyle) { + case 'collapsed': + replacement = '[' + content + '][]'; + reference = '[' + content + ']: ' + href + title; + break + case 'shortcut': + replacement = '[' + content + ']'; + reference = '[' + content + ']: ' + href + title; + break + default: + var id = this.references.length + 1; + replacement = '[' + content + '][' + id + ']'; + reference = '[' + id + ']: ' + href + title; + } + + this.references.push(reference); + return replacement + }, + + references: [], + + append: function (options) { + var references = ''; + if (this.references.length) { + references = '\n\n' + this.references.join('\n') + '\n\n'; + this.references = []; // Reset references + } + return references + } +}; + +rules.emphasis = { + filter: ['em', 'i'], + + replacement: function (content, node, options) { + if (!content.trim()) return '' + return options.emDelimiter + content + options.emDelimiter + } +}; + +rules.strong = { + filter: ['strong', 'b'], + + replacement: function (content, node, options) { + if (!content.trim()) return '' + return options.strongDelimiter + content + options.strongDelimiter + } +}; + +rules.code = { + filter: function (node) { + var hasSiblings = node.previousSibling || node.nextSibling; + var isCodeBlock = node.parentNode.nodeName === 'PRE' && !hasSiblings; + + return node.nodeName === 'CODE' && !isCodeBlock + }, + + replacement: function (content) { + if (!content.trim()) return '' + + var delimiter = '`'; + var leadingSpace = ''; + var trailingSpace = ''; + var matches = content.match(/`+/gm); + if (matches) { + if (/^`/.test(content)) leadingSpace = ' '; + if (/`$/.test(content)) trailingSpace = ' '; + while (matches.indexOf(delimiter) !== -1) delimiter = delimiter + '`'; + } + + return delimiter + leadingSpace + content + trailingSpace + delimiter + } +}; + +rules.image = { + filter: 'img', + + replacement: function (content, node) { + var alt = node.alt || ''; + var src = node.getAttribute('src') || ''; + var title = node.title || ''; + var titlePart = title ? ' "' + title + '"' : ''; + return src ? '![' + alt + ']' + '(' + src + titlePart + ')' : '' + } +}; + +/** + * Manages a collection of rules used to convert HTML to Markdown + */ + +function Rules (options) { + this.options = options; + this._keep = []; + this._remove = []; + + this.blankRule = { + replacement: options.blankReplacement + }; + + this.keepReplacement = options.keepReplacement; + + this.defaultRule = { + replacement: options.defaultReplacement + }; + + this.array = []; + for (var key in options.rules) this.array.push(options.rules[key]); +} + +Rules.prototype = { + add: function (key, rule) { + this.array.unshift(rule); + }, + + keep: function (filter) { + this._keep.unshift({ + filter: filter, + replacement: this.keepReplacement + }); + }, + + remove: function (filter) { + this._remove.unshift({ + filter: filter, + replacement: function () { + return '' + } + }); + }, + + forNode: function (node) { + if (node.isBlank) return this.blankRule + var rule; + + if ((rule = findRule(this.array, node, this.options))) return rule + if ((rule = findRule(this._keep, node, this.options))) return rule + if ((rule = findRule(this._remove, node, this.options))) return rule + + return this.defaultRule + }, + + forEach: function (fn) { + for (var i = 0; i < this.array.length; i++) fn(this.array[i], i); + } +}; + +function findRule (rules, node, options) { + for (var i = 0; i < rules.length; i++) { + var rule = rules[i]; + if (filterValue(rule, node, options)) return rule + } + return void 0 +} + +function filterValue (rule, node, options) { + var filter = rule.filter; + if (typeof filter === 'string') { + if (filter === node.nodeName.toLowerCase()) return true + } else if (Array.isArray(filter)) { + if (filter.indexOf(node.nodeName.toLowerCase()) > -1) return true + } else if (typeof filter === 'function') { + if (filter.call(rule, node, options)) return true + } else { + throw new TypeError('`filter` needs to be a string, array, or function') + } +} + +/** + * The collapseWhitespace function is adapted from collapse-whitespace + * by Luc Thevenard. + * + * The MIT License (MIT) + * + * Copyright (c) 2014 Luc Thevenard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/** + * collapseWhitespace(options) removes extraneous whitespace from an the given element. + * + * @param {Object} options + */ +function collapseWhitespace (options) { + var element = options.element; + var isBlock = options.isBlock; + var isVoid = options.isVoid; + var isPre = options.isPre || function (node) { + return node.nodeName === 'PRE' + }; + + if (!element.firstChild || isPre(element)) return + + var prevText = null; + var prevVoid = false; + + var prev = null; + var node = next(prev, element, isPre); + + while (node !== element) { + if (node.nodeType === 3 || node.nodeType === 4) { // Node.TEXT_NODE or Node.CDATA_SECTION_NODE + var text = node.data.replace(/[ \r\n\t]+/g, ' '); + + if ((!prevText || / $/.test(prevText.data)) && + !prevVoid && text[0] === ' ') { + text = text.substr(1); + } + + // `text` might be empty at this point. + if (!text) { + node = remove(node); + continue + } + + node.data = text; + + prevText = node; + } else if (node.nodeType === 1) { // Node.ELEMENT_NODE + if (isBlock(node) || node.nodeName === 'BR') { + if (prevText) { + prevText.data = prevText.data.replace(/ $/, ''); + } + + prevText = null; + prevVoid = false; + } else if (isVoid(node)) { + // Avoid trimming space around non-block, non-BR void elements. + prevText = null; + prevVoid = true; + } + } else { + node = remove(node); + continue + } + + var nextNode = next(prev, node, isPre); + prev = node; + node = nextNode; + } + + if (prevText) { + prevText.data = prevText.data.replace(/ $/, ''); + if (!prevText.data) { + remove(prevText); + } + } +} + +/** + * remove(node) removes the given node from the DOM and returns the + * next node in the sequence. + * + * @param {Node} node + * @return {Node} node + */ +function remove (node) { + var next = node.nextSibling || node.parentNode; + + node.parentNode.removeChild(node); + + return next +} + +/** + * next(prev, current, isPre) returns the next node in the sequence, given the + * current and previous nodes. + * + * @param {Node} prev + * @param {Node} current + * @param {Function} isPre + * @return {Node} + */ +function next (prev, current, isPre) { + if ((prev && prev.parentNode === current) || isPre(current)) { + return current.nextSibling || current.parentNode + } + + return current.firstChild || current.nextSibling || current.parentNode +} + +/* + * Set up window for Node.js + */ + +var root = (typeof window !== 'undefined' ? window : {}); + +/* + * Parsing HTML strings + */ + +function canParseHTMLNatively () { + var Parser = root.DOMParser; + var canParse = false; + + // Adapted from https://gist.github.com/1129031 + // Firefox/Opera/IE throw errors on unsupported types + try { + // WebKit returns null on unsupported types + if (new Parser().parseFromString('', 'text/html')) { + canParse = true; + } + } catch (e) {} + + return canParse +} + +function createHTMLParser () { + var Parser = function () {}; + + { + if (shouldUseActiveX()) { + Parser.prototype.parseFromString = function (string) { + var doc = new window.ActiveXObject('htmlfile'); + doc.designMode = 'on'; // disable on-page scripts + doc.open(); + doc.write(string); + doc.close(); + return doc + }; + } else { + Parser.prototype.parseFromString = function (string) { + var doc = document.implementation.createHTMLDocument(''); + doc.open(); + doc.write(string); + doc.close(); + return doc + }; + } + } + return Parser +} + +function shouldUseActiveX () { + var useActiveX = false; + try { + document.implementation.createHTMLDocument('').open(); + } catch (e) { + if (window.ActiveXObject) useActiveX = true; + } + return useActiveX +} + +var HTMLParser = canParseHTMLNatively() ? root.DOMParser : createHTMLParser(); + +function RootNode (input) { + var root; + if (typeof input === 'string') { + var doc = htmlParser().parseFromString( + // DOM parsers arrange elements in the and . + // Wrapping in a custom element ensures elements are reliably arranged in + // a single element. + '' + input + '', + 'text/html' + ); + root = doc.getElementById('turndown-root'); + } else { + root = input.cloneNode(true); + } + collapseWhitespace({ + element: root, + isBlock: isBlock, + isVoid: isVoid + }); + + return root +} + +var _htmlParser; +function htmlParser () { + _htmlParser = _htmlParser || new HTMLParser(); + return _htmlParser +} + +function Node (node) { + node.isBlock = isBlock(node); + node.isCode = node.nodeName.toLowerCase() === 'code' || node.parentNode.isCode; + node.isBlank = isBlank(node); + node.flankingWhitespace = flankingWhitespace(node); + return node +} + +function isBlank (node) { + return ( + ['A', 'TH', 'TD'].indexOf(node.nodeName) === -1 && + /^\s*$/i.test(node.textContent) && + !isVoid(node) && + !hasVoid(node) + ) +} + +function flankingWhitespace (node) { + var leading = ''; + var trailing = ''; + + if (!node.isBlock) { + var hasLeading = /^[ \r\n\t]/.test(node.textContent); + var hasTrailing = /[ \r\n\t]$/.test(node.textContent); + + if (hasLeading && !isFlankedByWhitespace('left', node)) { + leading = ' '; + } + if (hasTrailing && !isFlankedByWhitespace('right', node)) { + trailing = ' '; + } + } + + return { leading: leading, trailing: trailing } +} + +function isFlankedByWhitespace (side, node) { + var sibling; + var regExp; + var isFlanked; + + if (side === 'left') { + sibling = node.previousSibling; + regExp = / $/; + } else { + sibling = node.nextSibling; + regExp = /^ /; + } + + if (sibling) { + if (sibling.nodeType === 3) { + isFlanked = regExp.test(sibling.nodeValue); + } else if (sibling.nodeType === 1 && !isBlock(sibling)) { + isFlanked = regExp.test(sibling.textContent); + } + } + return isFlanked +} + +var reduce = Array.prototype.reduce; +var leadingNewLinesRegExp = /^\n*/; +var trailingNewLinesRegExp = /\n*$/; + +function TurndownService (options) { + if (!(this instanceof TurndownService)) return new TurndownService(options) + + var defaults = { + rules: rules, + headingStyle: 'setext', + hr: '* * *', + bulletListMarker: '*', + codeBlockStyle: 'indented', + fence: '```', + emDelimiter: '_', + strongDelimiter: '**', + linkStyle: 'inlined', + linkReferenceStyle: 'full', + br: ' ', + blankReplacement: function (content, node) { + return node.isBlock ? '\n\n' : '' + }, + keepReplacement: function (content, node) { + return node.isBlock ? '\n\n' + node.outerHTML + '\n\n' : node.outerHTML + }, + defaultReplacement: function (content, node) { + return node.isBlock ? '\n\n' + content + '\n\n' : content + } + }; + this.options = extend({}, defaults, options); + this.rules = new Rules(this.options); +} + +TurndownService.prototype = { + /** + * The entry point for converting a string or DOM node to Markdown + * @public + * @param {String|HTMLElement} input The string or DOM node to convert + * @returns A Markdown representation of the input + * @type String + */ + + turndown: function (input) { + if (!canConvert(input)) { + throw new TypeError( + input + ' is not a string, or an element/document/fragment node.' + ) + } + + if (input === '') return '' + + var output = process.call(this, new RootNode(input)); + return postProcess.call(this, output) + }, + + /** + * Add one or more plugins + * @public + * @param {Function|Array} plugin The plugin or array of plugins to add + * @returns The Turndown instance for chaining + * @type Object + */ + + use: function (plugin) { + if (Array.isArray(plugin)) { + for (var i = 0; i < plugin.length; i++) this.use(plugin[i]); + } else if (typeof plugin === 'function') { + plugin(this); + } else { + throw new TypeError('plugin must be a Function or an Array of Functions') + } + return this + }, + + /** + * Adds a rule + * @public + * @param {String} key The unique key of the rule + * @param {Object} rule The rule + * @returns The Turndown instance for chaining + * @type Object + */ + + addRule: function (key, rule) { + this.rules.add(key, rule); + return this + }, + + /** + * Keep a node (as HTML) that matches the filter + * @public + * @param {String|Array|Function} filter The unique key of the rule + * @returns The Turndown instance for chaining + * @type Object + */ + + keep: function (filter) { + this.rules.keep(filter); + return this + }, + + /** + * Remove a node that matches the filter + * @public + * @param {String|Array|Function} filter The unique key of the rule + * @returns The Turndown instance for chaining + * @type Object + */ + + remove: function (filter) { + this.rules.remove(filter); + return this + }, + + /** + * Escapes Markdown syntax + * @public + * @param {String} string The string to escape + * @returns A string with Markdown syntax escaped + * @type String + */ + + escape: function (string) { + return ( + string + // Escape backslash escapes! + .replace(/\\(\S)/g, '\\\\$1') + + // Escape headings + .replace(/^(#{1,6} )/gm, '\\$1') + + // Escape hr + .replace(/^([-*_] *){3,}$/gm, function (match, character) { + return match.split(character).join('\\' + character) + }) + + // Escape ol bullet points + .replace(/^(\W* {0,3})(\d+)\. /gm, '$1$2\\. ') + + // Escape ul bullet points + .replace(/^([^\\\w]*)[*+-] /gm, function (match) { + return match.replace(/([*+-])/g, '\\$1') + }) + + // Escape blockquote indents + .replace(/^(\W* {0,3})> /gm, '$1\\> ') + + // Escape em/strong * + .replace(/\*+(?![*\s\W]).+?\*+/g, function (match) { + return match.replace(/\*/g, '\\*') + }) + + // Escape em/strong _ + .replace(/_+(?![_\s\W]).+?_+/g, function (match) { + return match.replace(/_/g, '\\_') + }) + + // Escape code _ + .replace(/`+(?![`\s\W]).+?`+/g, function (match) { + return match.replace(/`/g, '\\`') + }) + + // Escape link brackets + .replace(/[\[\]]/g, '\\$&') // eslint-disable-line no-useless-escape + ) + } +}; + +/** + * Reduces a DOM node down to its Markdown string equivalent + * @private + * @param {HTMLElement} parentNode The node to convert + * @returns A Markdown representation of the node + * @type String + */ + +function process (parentNode) { + var self = this; + return reduce.call(parentNode.childNodes, function (output, node) { + node = new Node(node); + + var replacement = ''; + if (node.nodeType === 3) { + replacement = node.isCode ? node.nodeValue : self.escape(node.nodeValue); + } else if (node.nodeType === 1) { + replacement = replacementForNode.call(self, node); + } + + return join(output, replacement) + }, '') +} + +/** + * Appends strings as each rule requires and trims the output + * @private + * @param {String} output The conversion output + * @returns A trimmed version of the ouput + * @type String + */ + +function postProcess (output) { + var self = this; + this.rules.forEach(function (rule) { + if (typeof rule.append === 'function') { + output = join(output, rule.append(self.options)); + } + }); + + return output.replace(/^[\t\r\n]+/, '').replace(/[\t\r\n\s]+$/, '') +} + +/** + * Converts an element node to its Markdown equivalent + * @private + * @param {HTMLElement} node The node to convert + * @returns A Markdown representation of the node + * @type String + */ + +function replacementForNode (node) { + var rule = this.rules.forNode(node); + var content = process.call(this, node); + var whitespace = node.flankingWhitespace; + if (whitespace.leading || whitespace.trailing) content = content.trim(); + return ( + whitespace.leading + + rule.replacement(content, node, this.options) + + whitespace.trailing + ) +} + +/** + * Determines the new lines between the current output and the replacement + * @private + * @param {String} output The current conversion output + * @param {String} replacement The string to append to the output + * @returns The whitespace to separate the current output and the replacement + * @type String + */ + +function separatingNewlines (output, replacement) { + var newlines = [ + output.match(trailingNewLinesRegExp)[0], + replacement.match(leadingNewLinesRegExp)[0] + ].sort(); + var maxNewlines = newlines[newlines.length - 1]; + return maxNewlines.length < 2 ? maxNewlines : '\n\n' +} + +function join (string1, string2) { + var separator = separatingNewlines(string1, string2); + + // Remove trailing/leading newlines and replace with separator + string1 = string1.replace(trailingNewLinesRegExp, ''); + string2 = string2.replace(leadingNewLinesRegExp, ''); + + return string1 + separator + string2 +} + +/** + * Determines whether an input can be converted + * @private + * @param {String|HTMLElement} input Describe this parameter + * @returns Describe what it returns + * @type String|Object|Array|Boolean|Number + */ + +function canConvert (input) { + return ( + input != null && ( + typeof input === 'string' || + (input.nodeType && ( + input.nodeType === 1 || input.nodeType === 9 || input.nodeType === 11 + )) + ) + ) +} + +return TurndownService; + +}()); diff --git a/src/utils/vutils.cpp b/src/utils/vutils.cpp index f2ebeaf4..c8efe38b 100644 --- a/src/utils/vutils.cpp +++ b/src/utils/vutils.cpp @@ -735,6 +735,9 @@ QString VUtils::generateHtmlTemplate(const QString &p_template, Q_ASSERT(false); } + extraFile += "\n"; + extraFile += "\n"; + if (g_config->getEnableMermaid()) { extraFile += "getMermaidCssStyleUrl() + "\"/>\n" + "\n" + diff --git a/src/vdocument.cpp b/src/vdocument.cpp index dfcdb37d..57a43f22 100644 --- a/src/vdocument.cpp +++ b/src/vdocument.cpp @@ -94,6 +94,14 @@ void VDocument::textToHtmlAsync(int p_identitifer, emit requestTextToHtml(p_identitifer, p_id, p_timeStamp, p_text, p_inlineStyle); } +void VDocument::htmlToTextAsync(int p_identitifer, + int p_id, + int p_timeStamp, + const QString &p_html) +{ + emit requestHtmlToText(p_identitifer, p_id, p_timeStamp, p_html); +} + void VDocument::getHtmlContentAsync() { emit requestHtmlContent(); @@ -104,6 +112,11 @@ void VDocument::textToHtmlCB(int p_identitifer, int p_id, int p_timeStamp, const emit textToHtmlFinished(p_identitifer, p_id, p_timeStamp, p_html); } +void VDocument::htmlToTextCB(int p_identitifer, int p_id, int p_timeStamp, const QString &p_text) +{ + emit htmlToTextFinished(p_identitifer, p_id, p_timeStamp, p_text); +} + void VDocument::noticeReadyToHighlightText() { m_readyToHighlight = true; diff --git a/src/vdocument.h b/src/vdocument.h index 139efe93..f5267f92 100644 --- a/src/vdocument.h +++ b/src/vdocument.h @@ -40,6 +40,12 @@ public: const QString &p_text, bool p_inlineStyle); + // Request to convert @p_html to Markdown text. + void htmlToTextAsync(int p_identitifer, + int p_id, + int p_timeStamp, + const QString &p_html); + void setFile(const VFile *p_file); bool isReadyToHighlight() const; @@ -92,6 +98,8 @@ public slots: void textToHtmlCB(int p_identitifer, int p_id, int p_timeStamp, const QString &p_html); + void htmlToTextCB(int p_identitifer, int p_id, int p_timeStamp, const QString &p_text); + void noticeReadyToTextToHtml(); // Web-side handle logics (MathJax etc.) is finished. @@ -144,8 +152,15 @@ signals: const QString &p_text, bool p_inlineStyle); + void requestHtmlToText(int p_identitifer, + int p_id, + int p_timeStamp, + const QString &p_html); + void textToHtmlFinished(int p_identitifer, int p_id, int p_timeStamp, const QString &p_html); + void htmlToTextFinished(int p_identitifer, int p_id, int p_timeStamp, const QString &p_text); + void requestHtmlContent(); void htmlContentFinished(const QString &p_headContent, diff --git a/src/vmdeditor.cpp b/src/vmdeditor.cpp index 675e965f..964f1ab9 100644 --- a/src/vmdeditor.cpp +++ b/src/vmdeditor.cpp @@ -815,6 +815,36 @@ void VMdEditor::insertFromMimeData(const QMimeData *p_source) m_editOps->insertImageFromURL(QUrl(reg.cap(2))); return; } + + // Handle HTML. + VSelectDialog dialog(tr("Insert From Clipboard"), this); + dialog.addSelection(tr("Insert Converted Markdown Text"), 0); + dialog.addSelection(tr("Insert As Text"), 1); + if (p_source->hasImage()) { + dialog.addSelection(tr("Insert As Image"), 2); + } + + if (dialog.exec() == QDialog::Accepted) { + switch (dialog.getSelection()) { + case 0: + ++m_copyTimeStamp; + emit requestHtmlToText(html, 0, m_copyTimeStamp); + break; + + case 1: + VTextEdit::insertFromMimeData(p_source); + break; + + case 2: + m_editOps->insertImageFromMimeData(p_source); + break; + + default: + break; + } + } + + return; } VSelectDialog dialog(tr("Insert From Clipboard"), this); @@ -1196,6 +1226,17 @@ void VMdEditor::textToHtmlFinished(int p_id, } } +void VMdEditor::htmlToTextFinished(int p_id, int p_timeStamp, const QString &p_text) +{ + Q_UNUSED(p_id); + if (m_copyTimeStamp == p_timeStamp && !p_text.isEmpty()) { + QTextCursor cursor = textCursor(); + cursor.insertText(p_text); + setTextCursor(cursor); + emit m_object->statusMessage(tr("Converted Markdown text inverted")); + } +} + void VMdEditor::wheelEvent(QWheelEvent *p_event) { if (handleWheelEvent(p_event)) { diff --git a/src/vmdeditor.h b/src/vmdeditor.h index 4a0eda5b..f2c933b7 100644 --- a/src/vmdeditor.h +++ b/src/vmdeditor.h @@ -84,6 +84,8 @@ public slots: void textToHtmlFinished(int p_id, int p_timeStamp, const QUrl &p_baseUrl, const QString &p_html); + void htmlToTextFinished(int p_id, int p_timeStamp, const QString &p_html); + // Wrapper functions for QPlainTextEdit/QTextEdit. public: void setExtraSelectionsW(const QList &p_selections) Q_DECL_OVERRIDE @@ -214,6 +216,9 @@ signals: // Request to convert @p_text to Html. void requestTextToHtml(const QString &p_text, int p_id, int p_timeStamp); + // Request to convert @p_html to Markdown text. + void requestHtmlToText(const QString &p_html, int p_id, int p_timeStamp); + protected: void updateFontAndPalette() Q_DECL_OVERRIDE; diff --git a/src/vmdtab.cpp b/src/vmdtab.cpp index a90d3642..c51edbb4 100644 --- a/src/vmdtab.cpp +++ b/src/vmdtab.cpp @@ -451,6 +451,15 @@ void VMdTab::setupMarkdownViewer() m_editor->textToHtmlFinished(p_id, p_timeStamp, m_webViewer->url(), p_html); }); + connect(m_document, &VDocument::htmlToTextFinished, + this, [this](int p_identitifer, int p_id, int p_timeStamp, const QString &p_text) { + Q_ASSERT(m_editor); + if (m_documentID != p_identitifer) { + return; + } + + m_editor->htmlToTextFinished(p_id, p_timeStamp, p_text); + }); connect(m_document, &VDocument::wordCountInfoUpdated, this, [this]() { VEditTabInfo info = fetchTabInfo(VEditTabInfo::InfoType::All); @@ -525,6 +534,8 @@ void VMdTab::setupMarkdownEditor() }); connect(m_editor, &VMdEditor::requestTextToHtml, this, &VMdTab::textToHtmlViaWebView); + connect(m_editor, &VMdEditor::requestHtmlToText, + this, &VMdTab::htmlToTextViaWebView); if (m_editor->getVim()) { connect(m_editor->getVim(), &VVim::commandLineTriggered, @@ -1217,6 +1228,23 @@ void VMdTab::textToHtmlViaWebView(const QString &p_text, int p_id, int p_timeSta m_document->textToHtmlAsync(m_documentID, p_id, p_timeStamp, p_text, true); } +void VMdTab::htmlToTextViaWebView(const QString &p_html, int p_id, int p_timeStamp) +{ + int maxRetry = 50; + while (!m_document->isReadyToTextToHtml() && maxRetry > 0) { + qDebug() << "wait for web side ready to convert HTML to text"; + VUtils::sleepWait(100); + --maxRetry; + } + + if (maxRetry == 0) { + qWarning() << "web side is not ready to convert HTML to text"; + return; + } + + m_document->htmlToTextAsync(m_documentID, p_id, p_timeStamp, p_html); +} + void VMdTab::handleVimCmdCommandCancelled() { if (m_isEditMode) { diff --git a/src/vmdtab.h b/src/vmdtab.h index 11c32657..dd3cb7d3 100644 --- a/src/vmdtab.h +++ b/src/vmdtab.h @@ -221,6 +221,8 @@ private: void textToHtmlViaWebView(const QString &p_text, int p_id, int p_timeStamp); + void htmlToTextViaWebView(const QString &p_html, int p_id, int p_timeStamp); + bool executeVimCommandInWebView(const QString &p_cmd); // Update web view by current content. diff --git a/src/vnote.cpp b/src/vnote.cpp index 8a240c9c..a8ae81cc 100644 --- a/src/vnote.cpp +++ b/src/vnote.cpp @@ -48,6 +48,9 @@ const QString VNote::c_showdownJsFile = ":/resources/showdown.js"; const QString VNote::c_showdownExtraFile = ":/utils/showdown/showdown.min.js"; const QString VNote::c_showdownAnchorExtraFile = ":/utils/showdown/showdown-headinganchor.js"; +const QString VNote::c_turndownJsFile = ":/utils/turndown/turndown.js"; +const QString VNote::c_turndownGfmExtraFile = ":/utils/turndown/turndown-plugin-gfm.js"; + const QString VNote::c_mermaidApiJsFile = ":/utils/mermaid/mermaidAPI.min.js"; const QString VNote::c_mermaidForestCssFile = ":/utils/mermaid/mermaid.forest.css"; diff --git a/src/vnote.h b/src/vnote.h index 3f1f1f53..3d75a4f8 100644 --- a/src/vnote.h +++ b/src/vnote.h @@ -60,6 +60,10 @@ public: static const QString c_showdownExtraFile; static const QString c_showdownAnchorExtraFile; + // Turndown + static const QString c_turndownJsFile; + static const QString c_turndownGfmExtraFile; + // Mermaid static const QString c_mermaidApiJsFile; static const QString c_mermaidForestCssFile; diff --git a/src/vnote.qrc b/src/vnote.qrc index 8801bb2a..4ba90040 100644 --- a/src/vnote.qrc +++ b/src/vnote.qrc @@ -267,5 +267,7 @@ resources/icons/increase_outline_level.svg utils/markdown-it/markdown-it-texmath.js resources/icons/up.svg + utils/turndown/turndown.js + utils/turndown/turndown-plugin-gfm.js