highlighter: support Fenced Code Block

This commit is contained in:
Le Tan 2018-07-12 20:19:07 +08:00
parent a20d306aab
commit fbfc6c1dd6
9 changed files with 171 additions and 138 deletions

View File

@ -193,9 +193,9 @@ void HGMarkdownHighlighter::highlightBlock(const QString &text)
goto exit; goto exit;
} }
// PEG Markdown Highlight does not handle the ``` code block correctly.
setCurrentBlockState(HighlightBlockState::Normal); setCurrentBlockState(HighlightBlockState::Normal);
highlightCodeBlock(curBlock, text);
highlightCodeBlock(blockNum, text);
if (currentBlockState() == HighlightBlockState::Normal) { if (currentBlockState() == HighlightBlockState::Normal) {
if (isVerbatimBlock(curBlock)) { if (isVerbatimBlock(curBlock)) {
@ -211,11 +211,6 @@ void HGMarkdownHighlighter::highlightBlock(const QString &text)
} }
} }
// PEG Markdown Highlight does not handle links with spaces in the URL.
// Links in the URL should be encoded to %20. We just let it be here and won't
// fix this.
// highlightLinkWithSpacesInURL(text);
if (currentBlockState() != HighlightBlockState::CodeBlock) { if (currentBlockState() != HighlightBlockState::CodeBlock) {
goto exit; goto exit;
} }
@ -377,6 +372,84 @@ void HGMarkdownHighlighter::initVerbatimBlocksFromResult()
} }
} }
void HGMarkdownHighlighter::initFencedCodeBlocksFromResult()
{
m_codeBlocks.clear();
m_codeBlocksState.clear();
if (!result) {
return;
}
// Ordered by start position in ascending order.
QMap<int, VElementRegion> regs;
pmh_element *elem = result[pmh_FENCEDCODEBLOCK];
while (elem != NULL) {
if (elem->end <= elem->pos) {
elem = elem->next;
continue;
}
if (!regs.contains(elem->pos)) {
regs.insert(elem->pos, VElementRegion(elem->pos, elem->end));
}
elem = elem->next;
}
VCodeBlock item;
bool inBlock = false;
for (auto it = regs.begin(); it != regs.end(); ++it) {
// [firstBlock, lastBlock].
int firstBlock = document->findBlock(it.value().m_startPos).blockNumber();
int lastBlock = document->findBlock(it.value().m_endPos - 1).blockNumber();
QTextBlock block = document->findBlockByNumber(firstBlock);
while (block.isValid()) {
int blockNumber = block.blockNumber();
if (blockNumber > lastBlock) {
break;
}
HighlightBlockState state = HighlightBlockState::Normal;
QString text = block.text();
if (inBlock) {
item.m_text = item.m_text + "\n" + text;
int idx = codeBlockEndExp.indexIn(text);
if (idx >= 0) {
// End block.
inBlock = false;
state = HighlightBlockState::CodeBlockEnd;
item.m_endBlock = blockNumber;
m_codeBlocks.append(item);
} else {
// Within code block.
state = HighlightBlockState::CodeBlock;
}
} else {
int idx = codeBlockStartExp.indexIn(text);
if (idx >= 0) {
// Start block.
inBlock = true;
state = HighlightBlockState::CodeBlockStart;
item.m_startBlock = blockNumber;
item.m_startPos = block.position();
item.m_text = text;
if (codeBlockStartExp.captureCount() == 2) {
item.m_lang = codeBlockStartExp.capturedTexts()[2];
}
}
}
if (state != HighlightBlockState::Normal) {
m_codeBlocksState.insert(blockNumber, state);
}
block = block.next();
}
}
}
void HGMarkdownHighlighter::initHeaderRegionsFromResult() void HGMarkdownHighlighter::initHeaderRegionsFromResult()
{ {
// From Qt5.7, the capacity is preserved. // From Qt5.7, the capacity is preserved.
@ -447,61 +520,46 @@ void HGMarkdownHighlighter::initBlockHighlihgtOne(unsigned long pos,
} }
} }
void HGMarkdownHighlighter::highlightCodeBlock(const QTextBlock &p_block, const QString &p_text) void HGMarkdownHighlighter::highlightCodeBlock(int p_blockNumber, const QString &p_text)
{ {
auto it = m_codeBlocksState.find(p_blockNumber);
if (it != m_codeBlocksState.end()) {
VTextBlockData *blockData = currentBlockData(); VTextBlockData *blockData = currentBlockData();
Q_ASSERT(blockData); Q_ASSERT(blockData);
int length = 0; HighlightBlockState state = it.value();
int index = -1; // Set code block indentation.
int preState = previousBlockState(); switch (state) {
int state = HighlightBlockState::Normal; case HighlightBlockState::CodeBlockStart:
{
if (preState != HighlightBlockState::CodeBlock int index = codeBlockStartExp.indexIn(p_text);
&& preState != HighlightBlockState::CodeBlockStart) { Q_ASSERT(index >= 0);
// Need to find a new code block start. blockData->setCodeBlockIndentation(codeBlockStartExp.capturedTexts()[1].size());
index = codeBlockStartExp.indexIn(p_text); break;
if (index >= 0 && !isVerbatimBlock(p_block)) {
// Start a new code block.
length = p_text.length();
state = HighlightBlockState::CodeBlockStart;
// The leading spaces of code block start and end must be identical.
int startLeadingSpaces = codeBlockStartExp.capturedTexts()[1].size();
blockData->setCodeBlockIndentation(startLeadingSpaces);
} else {
// A normal block.
blockData->setCodeBlockIndentation(-1);
return;
} }
} else {
// Need to find a code block end. case HighlightBlockState::CodeBlock:
V_FALLTHROUGH;
case HighlightBlockState::CodeBlockEnd:
{
int startLeadingSpaces = 0; int startLeadingSpaces = 0;
VTextBlockData *preBlockData = previousBlockData(); VTextBlockData *preBlockData = previousBlockData();
if (preBlockData) { if (preBlockData) {
startLeadingSpaces = preBlockData->getCodeBlockIndentation(); startLeadingSpaces = preBlockData->getCodeBlockIndentation();
} }
index = codeBlockEndExp.indexIn(p_text);
// The closing ``` should have the same indentation as the open ```.
if (index >= 0
&& startLeadingSpaces == codeBlockEndExp.capturedTexts()[1].size()) {
// End of code block.
length = p_text.length();
state = HighlightBlockState::CodeBlockEnd;
} else {
// Within code block.
index = 0;
length = p_text.length();
state = HighlightBlockState::CodeBlock;
}
blockData->setCodeBlockIndentation(startLeadingSpaces); blockData->setCodeBlockIndentation(startLeadingSpaces);
break;
} }
default:
Q_ASSERT(false);
break;
}
// Set code block state.
setCurrentBlockState(state); setCurrentBlockState(state);
setFormat(index, length, m_codeBlockFormat); }
} }
static bool intersect(const QList<QPair<int, int>> &p_indices, int &p_start, int &p_end) static bool intersect(const QList<QPair<int, int>> &p_indices, int &p_start, int &p_end)
@ -727,30 +785,6 @@ void HGMarkdownHighlighter::highlightCodeBlockColorColumn(const QString &p_text)
setFormat(cc - 1, 1, m_colorColumnFormat); setFormat(cc - 1, 1, m_colorColumnFormat);
} }
void HGMarkdownHighlighter::highlightLinkWithSpacesInURL(const QString &p_text)
{
if (currentBlockState() == HighlightBlockState::CodeBlock) {
return;
}
// TODO: should select links with spaces in URL.
QRegExp regExp("[\\!]?\\[[^\\]]*\\]\\(([^\\n\\)]+)\\)");
int index = regExp.indexIn(p_text);
while (index >= 0) {
Q_ASSERT(regExp.captureCount() == 1);
int length = regExp.matchedLength();
QString capturedText = regExp.capturedTexts()[1];
if (capturedText.contains(' ')) {
if (p_text[index] == '!' && m_imageFormat.isValid()) {
setFormat(index, length, m_imageFormat);
} else if (m_linkFormat.isValid()) {
setFormat(index, length, m_linkFormat);
}
}
index = regExp.indexIn(p_text, index + length);
}
}
void HGMarkdownHighlighter::parse() void HGMarkdownHighlighter::parse()
{ {
if (!parsing.testAndSetRelaxed(0, 1)) { if (!parsing.testAndSetRelaxed(0, 1)) {
@ -779,6 +813,8 @@ void HGMarkdownHighlighter::parse()
initVerbatimBlocksFromResult(); initVerbatimBlocksFromResult();
initFencedCodeBlocksFromResult();
initInlineCodeRegionsFromResult(); initInlineCodeRegionsFromResult();
initBoldItalicRegionsFromResult(); initBoldItalicRegionsFromResult();
@ -872,56 +908,8 @@ bool HGMarkdownHighlighter::updateCodeBlocks()
m_codeBlockHighlights[i].clear(); m_codeBlockHighlights[i].clear();
} }
QVector<VCodeBlock> codeBlocks; m_numOfCodeBlockHighlightsToRecv = m_codeBlocks.size();
emit codeBlocksUpdated(m_codeBlocks);
VCodeBlock item;
bool inBlock = false;
int startLeadingSpaces = -1;
// Only handle complete codeblocks.
QTextBlock block = document->firstBlock();
while (block.isValid()) {
if (!inBlock && isVerbatimBlock(block)) {
block = block.next();
continue;
}
QString text = block.text();
if (inBlock) {
item.m_text = item.m_text + "\n" + text;
int idx = codeBlockEndExp.indexIn(text);
if (idx >= 0 && codeBlockEndExp.capturedTexts()[1].size() == startLeadingSpaces) {
// End block.
inBlock = false;
item.m_endBlock = block.blockNumber();
// See if it is a code block inside HTML comment.
if (!isBlockInsideCommentRegion(block)) {
qDebug() << "add one code block in lang" << item.m_lang;
codeBlocks.append(item);
}
}
} else {
int idx = codeBlockStartExp.indexIn(text);
if (idx >= 0) {
// Start block.
inBlock = true;
item.m_startBlock = block.blockNumber();
item.m_startPos = block.position();
item.m_text = text;
if (codeBlockStartExp.captureCount() == 2) {
item.m_lang = codeBlockStartExp.capturedTexts()[2];
}
startLeadingSpaces = codeBlockStartExp.capturedTexts()[1].size();
}
}
block = block.next();
}
m_numOfCodeBlockHighlightsToRecv = codeBlocks.size();
emit codeBlocksUpdated(codeBlocks);
return m_numOfCodeBlockHighlightsToRecv > 0; return m_numOfCodeBlockHighlightsToRecv > 0;
} }

View File

@ -9,6 +9,7 @@
#include <QString> #include <QString>
#include "vtextblockdata.h" #include "vtextblockdata.h"
#include "vconstants.h"
extern "C" { extern "C" {
#include <pmh_parser.h> #include <pmh_parser.h>
@ -298,9 +299,14 @@ private:
QVector<VElementRegion> m_headerRegions; QVector<VElementRegion> m_headerRegions;
// All verbatim blocks (by parser) number. // All verbatim blocks (by parser) number.
// It may be a code block inside fenced code block.
QSet<int> m_verbatimBlocks; QSet<int> m_verbatimBlocks;
// All fenced code blocks.
QVector<VCodeBlock> m_codeBlocks;
// Indexed by block number.
QHash<int, HighlightBlockState> m_codeBlocksState;
// Indexed by block number. // Indexed by block number.
QHash<int, HeaderBlockInfo> m_headerBlocks; QHash<int, HeaderBlockInfo> m_headerBlocks;
@ -343,15 +349,10 @@ private:
void resizeBuffer(int newCap); void resizeBuffer(int newCap);
void highlightCodeBlock(const QTextBlock &p_block, const QString &p_text); void highlightCodeBlock(int p_blockNumber, const QString &p_text);
void highlightMathJax(const QTextBlock &p_block, const QString &p_text); void highlightMathJax(const QTextBlock &p_block, const QString &p_text);
// Highlight links using regular expression.
// PEG Markdown Highlight treat URLs with spaces illegal. This function is
// intended to complement this.
void highlightLinkWithSpacesInURL(const QString &p_text);
void parse(); void parse();
void parseInternal(); void parseInternal();
@ -382,6 +383,9 @@ private:
// Fetch all the verbatim blocks from parsing result. // Fetch all the verbatim blocks from parsing result.
void initVerbatimBlocksFromResult(); void initVerbatimBlocksFromResult();
// Fetch all the fenced code blocks from parsing result.
void initFencedCodeBlocksFromResult();
// Fetch all the inlnie code regions from parsing result. // Fetch all the inlnie code regions from parsing result.
void initInlineCodeRegionsFromResult(); void initInlineCodeRegionsFromResult();

View File

@ -0,0 +1,4 @@
#ifndef MARKDOWNHIGHLIGHTERDATA_H
#define MARKDOWNHIGHLIGHTERDATA_H
#endif // MARKDOWNHIGHLIGHTERDATA_H

View File

@ -134,6 +134,10 @@ foreground: 9e9e9e
VERBATIM VERBATIM
foreground: 98c379 foreground: 98c379
font-family: Consolas, Monaco, Andale Mono, Monospace, Courier New font-family: Consolas, Monaco, Andale Mono, Monospace, Courier New
FENCEDCODEBLOCK
foreground: 98c379
font-family: Consolas, Monaco, Andale Mono, Monospace, Courier New
# [VNote] Codeblock sylte from HighlightJS (bold, italic, underlined, strikeout, color) # [VNote] Codeblock sylte from HighlightJS (bold, italic, underlined, strikeout, color)
# The last occurence of the same attribute takes effect # The last occurence of the same attribute takes effect
# Could specify multiple attribute in one line # Could specify multiple attribute in one line

View File

@ -132,6 +132,10 @@ foreground: 6e7686
VERBATIM VERBATIM
foreground: 98c379 foreground: 98c379
font-family: Consolas, Monaco, Andale Mono, Monospace, Courier New font-family: Consolas, Monaco, Andale Mono, Monospace, Courier New
FENCEDCODEBLOCK
foreground: 98c379
font-family: Consolas, Monaco, Andale Mono, Monospace, Courier New
# [VNote] Codeblock sylte from HighlightJS (bold, italic, underlined, strikeout, color) # [VNote] Codeblock sylte from HighlightJS (bold, italic, underlined, strikeout, color)
# The last occurence of the same attribute takes effect # The last occurence of the same attribute takes effect
# Could specify multiple attribute in one line # Could specify multiple attribute in one line

View File

@ -129,6 +129,10 @@ foreground: 93a1a1
VERBATIM VERBATIM
foreground: 673ab7 foreground: 673ab7
font-family: Consolas, Monaco, Andale Mono, Monospace, Courier New font-family: Consolas, Monaco, Andale Mono, Monospace, Courier New
FENCEDCODEBLOCK
foreground: 673ab7
font-family: Consolas, Monaco, Andale Mono, Monospace, Courier New
# [VNote] Codeblock sylte from HighlightJS (bold, italic, underlined, strikeout, color) # [VNote] Codeblock sylte from HighlightJS (bold, italic, underlined, strikeout, color)
# The last occurence of the same attribute takes effect # The last occurence of the same attribute takes effect
hljs-comment: 6c6c6c hljs-comment: 6c6c6c

View File

@ -130,6 +130,10 @@ foreground: 93a1a1
VERBATIM VERBATIM
foreground: 673ab7 foreground: 673ab7
font-family: Consolas, Monaco, Andale Mono, Monospace, Courier New font-family: Consolas, Monaco, Andale Mono, Monospace, Courier New
FENCEDCODEBLOCK
foreground: 673ab7
font-family: Consolas, Monaco, Andale Mono, Monospace, Courier New
# [VNote] Codeblock sylte from HighlightJS (bold, italic, underlined, strikeout, color) # [VNote] Codeblock sylte from HighlightJS (bold, italic, underlined, strikeout, color)
# The last occurence of the same attribute takes effect # The last occurence of the same attribute takes effect
# Could specify multiple attribute in one line # Could specify multiple attribute in one line

View File

@ -118,6 +118,8 @@ void VStyleParser::parseMarkdownStyle(const QString &styleStr)
if (markdownStyles) { if (markdownStyles) {
pmh_free_style_collection(markdownStyles); pmh_free_style_collection(markdownStyles);
} }
// markdownStyles is not indexed by element type.
markdownStyles = pmh_parse_styles(styleStr.toLocal8Bit().data(), markdownStyles = pmh_parse_styles(styleStr.toLocal8Bit().data(),
&markdownStyleErrorCB, this); &markdownStyleErrorCB, this);
} }
@ -131,6 +133,7 @@ QVector<HighlightingStyle> VStyleParser::fetchMarkdownStyles(const QFont &baseFo
if (!attr) { if (!attr) {
continue; continue;
} }
HighlightingStyle style; HighlightingStyle style;
style.type = attr->lang_element_type; style.type = attr->lang_element_type;
style.format = QTextCharFormatFromAttrs(attr, baseFont); style.format = QTextCharFormatFromAttrs(attr, baseFont);
@ -143,7 +146,18 @@ QHash<QString, QTextCharFormat> VStyleParser::fetchCodeBlockStyles(const QFont &
{ {
QHash<QString, QTextCharFormat> styles; QHash<QString, QTextCharFormat> styles;
pmh_style_attribute *attrs = markdownStyles->element_styles[pmh_VERBATIM]; pmh_style_attribute *attrs = NULL;
for (int i = 0; i < pmh_NUM_LANG_TYPES; ++i) {
pmh_style_attribute *tmp = markdownStyles->element_styles[i];
if (!tmp) {
continue;
}
if (tmp->lang_element_type == pmh_FENCEDCODEBLOCK) {
attrs = tmp;
break;
}
}
// First set up the base format. // First set up the base format.
QTextCharFormat baseFormat = QTextCharFormatFromAttrs(attrs, p_baseFont); QTextCharFormat baseFormat = QTextCharFormatFromAttrs(attrs, p_baseFont);

View File

@ -74,10 +74,15 @@ void VTextDocumentLayout::blockRangeFromRect(const QRectF &p_rect,
return; return;
} }
if (document()->blockCount() != m_blocks.size()) {
p_first = -1;
p_last = -1;
return;
}
p_first = -1; p_first = -1;
p_last = m_blocks.size() - 1; p_last = m_blocks.size() - 1;
int y = p_rect.y(); int y = p_rect.y();
Q_ASSERT(document()->blockCount() == m_blocks.size());
QTextBlock block = document()->firstBlock(); QTextBlock block = document()->firstBlock();
while (block.isValid()) { while (block.isValid()) {
const BlockInfo &info = m_blocks[block.blockNumber()]; const BlockInfo &info = m_blocks[block.blockNumber()];
@ -121,9 +126,11 @@ void VTextDocumentLayout::blockRangeFromRectBS(const QRectF &p_rect,
return; return;
} }
Q_ASSERT(document()->blockCount() == m_blocks.size()); if (document()->blockCount() != m_blocks.size()) {
p_first = -1;
} else {
p_first = findBlockByPosition(p_rect.topLeft()); p_first = findBlockByPosition(p_rect.topLeft());
}
if (p_first == -1) { if (p_first == -1) {
p_last = -1; p_last = -1;