From 7c9da2a37c92ebdaa4096182d063c387b0e73e25 Mon Sep 17 00:00:00 2001 From: Le Tan Date: Thu, 15 Mar 2018 22:00:14 +0800 Subject: [PATCH] search: support && and || logical operator - Space-separated keywords are combined as AND; - && and || for AND and OR; - Do not support using both && and ||; - When multiple keywords are specified, will not show all the matched lines. --- src/utils/vutils.cpp | 47 ++++++++ src/utils/vutils.h | 3 + src/vexporter.cpp | 45 +------ src/vsearch.cpp | 45 +++++-- src/vsearch.h | 58 +++------ src/vsearchconfig.h | 269 +++++++++++++++++++++++++++++++++++++++++- src/vsearchengine.cpp | 77 ++++++------ src/vsearchengine.h | 18 +-- 8 files changed, 404 insertions(+), 158 deletions(-) diff --git a/src/utils/vutils.cpp b/src/utils/vutils.cpp index 578dcb96..f78f65a2 100644 --- a/src/utils/vutils.cpp +++ b/src/utils/vutils.cpp @@ -1329,3 +1329,50 @@ bool VUtils::fixTextWithCaptainShortcut(QAction *p_act, const QString &p_shortcu return false; } + +QStringList VUtils::parseCombinedArgString(const QString &p_program) +{ + QStringList args; + QString tmp; + int quoteCount = 0; + bool inQuote = false; + + // handle quoting. tokens can be surrounded by double quotes + // "hello world". three consecutive double quotes represent + // the quote character itself. + for (int i = 0; i < p_program.size(); ++i) { + if (p_program.at(i) == QLatin1Char('"')) { + ++quoteCount; + if (quoteCount == 3) { + // third consecutive quote + quoteCount = 0; + tmp += p_program.at(i); + } + + continue; + } + + if (quoteCount) { + if (quoteCount == 1) { + inQuote = !inQuote; + } + + quoteCount = 0; + } + + if (!inQuote && p_program.at(i).isSpace()) { + if (!tmp.isEmpty()) { + args += tmp; + tmp.clear(); + } + } else { + tmp += p_program.at(i); + } + } + + if (!tmp.isEmpty()) { + args += tmp; + } + + return args; +} diff --git a/src/utils/vutils.h b/src/utils/vutils.h index 6f5a939e..bba25603 100644 --- a/src/utils/vutils.h +++ b/src/utils/vutils.h @@ -309,6 +309,9 @@ public: static bool fixTextWithCaptainShortcut(QAction *p_act, const QString &p_shortcut); + // From QProcess code. + static QStringList parseCombinedArgString(const QString &p_program); + // Regular expression for image link. // ![image title]( http://github.com/tamlok/vnote.jpg "alt \" text" ) // Captured texts (need to be trimmed): diff --git a/src/vexporter.cpp b/src/vexporter.cpp index fcb1af73..e0985cd5 100644 --- a/src/vexporter.cpp +++ b/src/vexporter.cpp @@ -62,47 +62,6 @@ void VExporter::prepareExport(const ExportOption &p_opt) prepareWKArguments(p_opt.m_pdfOpt); } -// From QProcess code. -static QStringList parseCombinedArgString(const QString &program) -{ - QStringList args; - QString tmp; - int quoteCount = 0; - bool inQuote = false; - - // handle quoting. tokens can be surrounded by double quotes - // "hello world". three consecutive double quotes represent - // the quote character itself. - for (int i = 0; i < program.size(); ++i) { - if (program.at(i) == QLatin1Char('"')) { - ++quoteCount; - if (quoteCount == 3) { - // third consecutive quote - quoteCount = 0; - tmp += program.at(i); - } - continue; - } - if (quoteCount) { - if (quoteCount == 1) - inQuote = !inQuote; - quoteCount = 0; - } - if (!inQuote && program.at(i).isSpace()) { - if (!tmp.isEmpty()) { - args += tmp; - tmp.clear(); - } - } else { - tmp += program.at(i); - } - } - if (!tmp.isEmpty()) - args += tmp; - - return args; -} - void VExporter::prepareWKArguments(const ExportPDFOption &p_opt) { m_wkArgs.clear(); @@ -154,7 +113,7 @@ void VExporter::prepareWKArguments(const ExportPDFOption &p_opt) // Append additional global option. if (!p_opt.m_wkExtraArgs.isEmpty()) { - m_wkArgs.append(parseCombinedArgString(p_opt.m_wkExtraArgs)); + m_wkArgs.append(VUtils::parseCombinedArgString(p_opt.m_wkExtraArgs)); } // TOC option. @@ -929,7 +888,7 @@ int VExporter::startProcess(const QString &p_program, const QStringList &p_args) int VExporter::startProcess(const QString &p_cmd) { - QStringList args = parseCombinedArgString(p_cmd); + QStringList args = VUtils::parseCombinedArgString(p_cmd); if (args.isEmpty()) { return -2; } diff --git a/src/vsearch.cpp b/src/vsearch.cpp index 3bf00c3e..4dc61717 100644 --- a/src/vsearch.cpp +++ b/src/vsearch.cpp @@ -136,15 +136,13 @@ void VSearch::searchFirstPhase(VFile *p_file, Q_ASSERT(testTarget(VSearchConfig::Note)); QString name = p_file->getName(); - if (!m_patternReg.isEmpty()) { - if (!matchOneLine(name, m_patternReg)) { - return; - } + if (!matchPattern(name)) { + return; } QString filePath = p_file->fetchPath(); if (testObject(VSearchConfig::Name)) { - if (matchOneLine(name, m_searchReg)) { + if (matchNonContent(name)) { VSearchResultItem *item = new VSearchResultItem(VSearchResultItem::Note, VSearchResultItem::LineNumber, name, @@ -195,7 +193,7 @@ void VSearch::searchFirstPhase(VDirectory *p_directory, if (testTarget(VSearchConfig::Folder) && testObject(VSearchConfig::Name)) { QString text = p_directory->getName(); - if (matchOneLine(text, m_searchReg)) { + if (matchNonContent(text)) { VSearchResultItem *item = new VSearchResultItem(VSearchResultItem::Folder, VSearchResultItem::LineNumber, text, @@ -248,7 +246,7 @@ void VSearch::searchFirstPhase(VNotebook *p_notebook, if (testTarget(VSearchConfig::Notebook) && testObject(VSearchConfig::Name)) { QString text = p_notebook->getName(); - if (matchOneLine(text, m_searchReg)) { + if (matchNonContent(text)) { VSearchResultItem *item = new VSearchResultItem(VSearchResultItem::Notebook, VSearchResultItem::LineNumber, text, @@ -295,7 +293,7 @@ VSearchResultItem *VSearch::searchForOutline(const VFile *p_file) const continue; } - if (!matchOneLine(it.m_name, m_searchReg)) { + if (!matchNonContent(it.m_name)) { continue; } @@ -326,8 +324,14 @@ VSearchResultItem *VSearch::searchForContent(const VFile *p_file) const int pos = 0; int size = content.size(); QRegExp newLineReg = QRegExp("\\n|\\r\\n|\\r"); - Qt::CaseSensitivity cs = testOption(VSearchConfig::CaseSensitive) - ? Qt::CaseSensitive : Qt::CaseInsensitive; + VSearchToken &contentToken = m_config->m_contentToken; + bool singleToken = contentToken.tokenSize() == 1; + if (!singleToken) { + contentToken.startBatchMode(); + } + + bool allMatched = false; + while (pos < size) { int idx = content.indexOf(newLineReg, pos); if (idx == -1) { @@ -337,10 +341,10 @@ VSearchResultItem *VSearch::searchForContent(const VFile *p_file) const if (idx > pos) { QString lineText = content.mid(pos, idx - pos); bool matched = false; - if (m_contentSearchReg.isEmpty()) { - matched = lineText.contains(m_config->m_keyword, cs); + if (singleToken) { + matched = contentToken.matched(lineText); } else { - matched = (m_contentSearchReg.indexIn(lineText) != -1); + matched = contentToken.matchBatchMode(lineText); } if (matched) { @@ -360,10 +364,25 @@ VSearchResultItem *VSearch::searchForContent(const VFile *p_file) const break; } + if (!singleToken && contentToken.readyToEndBatchMode(allMatched)) { + break; + } + pos = idx + newLineReg.matchedLength(); ++lineNum; } + if (!singleToken) { + contentToken.readyToEndBatchMode(allMatched); + contentToken.endBatchMode(); + + if (!allMatched && item) { + // This file does not meet all the tokens. + delete item; + item = NULL; + } + } + return item; } diff --git a/src/vsearch.h b/src/vsearch.h index 24e43b16..b8d0e220 100644 --- a/src/vsearch.h +++ b/src/vsearch.h @@ -64,7 +64,9 @@ private: bool testOption(VSearchConfig::Option p_option) const; - bool matchOneLine(const QString &p_text, const QRegExp &p_reg) const; + bool matchNonContent(const QString &p_text) const; + + bool matchPattern(const QString &p_name) const; VSearchResultItem *searchForOutline(const VFile *p_file) const; @@ -78,13 +80,6 @@ private: ISearchEngine *m_engine; - // Search reg used for name, outline, tag. - QRegExp m_searchReg; - - // Search reg used for content. - // We use raw string to speed up if it is empty. - QRegExp m_contentSearchReg; - // Wildcard reg to for file name pattern. QRegExp m_patternReg; }; @@ -99,40 +94,6 @@ inline void VSearch::setConfig(QSharedPointer p_config) { m_config = p_config; - // Compile reg. - const QString &keyword = m_config->m_keyword; - m_contentSearchReg = QRegExp(); - if (keyword.isEmpty()) { - m_searchReg = QRegExp(); - return; - } - - Qt::CaseSensitivity cs = testOption(VSearchConfig::CaseSensitive) - ? Qt::CaseSensitive : Qt::CaseInsensitive; - if (testOption(VSearchConfig::RegularExpression)) { - m_searchReg = QRegExp(keyword, cs); - m_contentSearchReg = QRegExp(keyword, cs); - } else { - if (testOption(VSearchConfig::Fuzzy)) { - QString wildcardText(keyword.size() * 2 + 1, '*'); - for (int i = 0, j = 1; i < keyword.size(); ++i, j += 2) { - wildcardText[j] = keyword[i]; - } - - m_searchReg = QRegExp(wildcardText, cs, QRegExp::Wildcard); - } else { - QString pattern = QRegExp::escape(keyword); - if (testOption(VSearchConfig::WholeWordOnly)) { - pattern = "\\b" + pattern + "\\b"; - - // We only use m_contentSearchReg when WholeWordOnly is checked. - m_contentSearchReg = QRegExp(pattern, cs); - } - - m_searchReg = QRegExp(pattern, cs); - } - } - if (m_config->m_pattern.isEmpty()) { m_patternReg = QRegExp(); } else { @@ -155,8 +116,17 @@ inline bool VSearch::testOption(VSearchConfig::Option p_option) const return p_option & m_config->m_option; } -inline bool VSearch::matchOneLine(const QString &p_text, const QRegExp &p_reg) const +inline bool VSearch::matchNonContent(const QString &p_text) const { - return p_reg.indexIn(p_text) != -1; + return m_config->m_token.matched(p_text); +} + +inline bool VSearch::matchPattern(const QString &p_name) const +{ + if (m_patternReg.isEmpty()) { + return true; + } + + return p_name.contains(m_patternReg); } #endif // VSEARCH_H diff --git a/src/vsearchconfig.h b/src/vsearchconfig.h index 8281e5f9..5bc16a71 100644 --- a/src/vsearchconfig.h +++ b/src/vsearchconfig.h @@ -4,6 +4,185 @@ #include #include #include +#include +#include + +#include "utils/vutils.h" + + +struct VSearchToken +{ + enum Type + { + RawString = 0, + RegularExpression + }; + + enum Operator + { + And = 0, + Or + }; + + VSearchToken() + : m_type(Type::RawString), + m_op(Operator::And), + m_caseSensitivity(Qt::CaseSensitive) + { + } + + void clear() + { + m_keywords.clear(); + m_regs.clear(); + } + + void append(const QString &p_rawStr) + { + m_keywords.append(p_rawStr); + } + + void append(const QRegExp &p_reg) + { + m_regs.append(p_reg); + } + + QString toString() const + { + return QString("token %1 %2 %3 %4 %5").arg(m_type) + .arg(m_op) + .arg(m_caseSensitivity) + .arg(m_keywords.size()) + .arg(m_regs.size()); + } + + // Whether @p_text match all the constraint. + bool matched(const QString &p_text) const + { + int size = m_keywords.size(); + if (m_type == Type::RegularExpression) { + size = m_regs.size(); + } + + if (size == 0) { + return false; + } + + bool ret = m_op == Operator::And ? true : false; + for (int i = 0; i < size; ++i) { + bool tmp = false; + if (m_type == Type::RawString) { + tmp = p_text.contains(m_keywords[i], m_caseSensitivity); + } else { + tmp = p_text.contains(m_regs[i]); + } + + if (tmp) { + if (m_op == Operator::Or) { + ret = true; + break; + } + } else { + if (m_op == Operator::And) { + ret = false; + break; + } + } + } + + return ret; + } + + void startBatchMode() + { + int size = m_type == Type::RawString ? m_keywords.size() : m_regs.size(); + m_matchesInBatch.resize(size); + m_matchesInBatch.fill(false); + m_numOfMatches = 0; + } + + // Match one string in batch mode. + // Returns true if @p_text matches one. + bool matchBatchMode(const QString &p_text) + { + bool ret = false; + int size = m_matchesInBatch.size(); + for (int i = 0; i < size; ++i) { + if (m_matchesInBatch[i]) { + continue; + } + + bool tmp = false; + if (m_type == Type::RawString) { + tmp = p_text.contains(m_keywords[i], m_caseSensitivity); + } else { + tmp = p_text.contains(m_regs[i]); + } + + if (tmp) { + m_matchesInBatch[i] = true; + ++m_numOfMatches; + ret = true; + } + } + + return ret; + } + + // Whether it is OK to finished batch mode. + // @p_matched: the overall match result. + bool readyToEndBatchMode(bool &p_matched) const + { + if (m_op == VSearchToken::And) { + // We need all the tokens matched. + if (m_numOfMatches == m_matchesInBatch.size()) { + p_matched = true; + return true; + } else { + p_matched = false; + return false; + } + } else { + // We only need one match. + if (m_numOfMatches > 0) { + p_matched = true; + return true; + } else { + p_matched = false; + return false; + } + } + } + + void endBatchMode() + { + m_matchesInBatch.clear(); + m_numOfMatches = 0; + } + + int tokenSize() const + { + return m_type == Type::RawString ? m_keywords.size() : m_regs.size(); + } + + VSearchToken::Type m_type; + + VSearchToken::Operator m_op; + + Qt::CaseSensitivity m_caseSensitivity; + + // Valid at RawString. + QVector m_keywords; + + // Valid at RegularExpression. + QVector m_regs; + + // Bitmap for batch mode. + // True if m_regs[i] or m_keywords[i] has been matched. + QVector m_matchesInBatch; + + int m_numOfMatches; +}; struct VSearchConfig @@ -49,6 +228,7 @@ struct VSearchConfig RegularExpression = 0x8UL }; + VSearchConfig() : VSearchConfig(Scope::NoneScope, Object::NoneObject, @@ -73,9 +253,88 @@ struct VSearchConfig m_target(p_target), m_engine(p_engine), m_option(p_option), - m_keyword(p_keyword), m_pattern(p_pattern) { + compileToken(p_keyword); + } + + void compileToken(const QString &p_keyword) + { + m_token.clear(); + m_contentToken.clear(); + if (p_keyword.isEmpty()) { + return; + } + + Qt::CaseSensitivity cs = m_option & VSearchConfig::CaseSensitive + ? Qt::CaseSensitive : Qt::CaseInsensitive; + bool useReg = m_option & VSearchConfig::RegularExpression; + bool wwo = m_option & VSearchConfig::WholeWordOnly; + bool fuzzy = m_option & VSearchConfig::Fuzzy; + + m_token.m_caseSensitivity = cs; + m_contentToken.m_caseSensitivity = cs; + + if (useReg) { + m_token.m_type = VSearchToken::RegularExpression; + m_contentToken.m_type = VSearchToken::RegularExpression; + } else { + if (fuzzy) { + m_token.m_type = VSearchToken::RegularExpression; + m_contentToken.m_type = VSearchToken::RawString; + } else if (wwo) { + m_token.m_type = VSearchToken::RegularExpression; + m_contentToken.m_type = VSearchToken::RegularExpression; + } else { + m_token.m_type = VSearchToken::RawString; + m_contentToken.m_type = VSearchToken::RawString; + } + } + + VSearchToken::Operator op = VSearchToken::And; + + // """ to input a "; + // && for AND, || for OR; + QStringList args = VUtils::parseCombinedArgString(p_keyword); + for (auto const & arg : args) { + if (arg == QStringLiteral("&&")) { + op = VSearchToken::And; + continue; + } else if (arg == QStringLiteral("||")) { + op = VSearchToken::Or; + continue; + } + + if (useReg) { + QRegExp reg(arg, cs); + m_token.append(reg); + m_contentToken.append(reg); + } else { + if (fuzzy) { + QString wildcardText(arg.size() * 2 + 1, '*'); + for (int i = 0, j = 1; i < arg.size(); ++i, j += 2) { + wildcardText[j] = arg[i]; + } + + QRegExp reg(wildcardText, cs, QRegExp::Wildcard); + m_token.append(reg); + m_contentToken.append(arg); + } else if (wwo) { + QString pattern = QRegExp::escape(arg); + pattern = "\\b" + pattern + "\\b"; + + QRegExp reg(pattern, cs); + m_token.append(reg); + m_contentToken.append(reg); + } else { + m_token.append(arg); + m_contentToken.append(arg); + } + } + } + + m_token.m_op = op; + m_contentToken.m_op = op; } QStringList toConfig() const @@ -114,10 +373,14 @@ struct VSearchConfig int m_engine; int m_option; - QString m_keyword; - // Wildcard pattern to filter file. QString m_pattern; + + // Token for name, outline, and tag. + VSearchToken m_token; + + // Token for content. + VSearchToken m_contentToken; }; diff --git a/src/vsearchengine.cpp b/src/vsearchengine.cpp index 669888ef..f5e25e01 100644 --- a/src/vsearchengine.cpp +++ b/src/vsearchengine.cpp @@ -14,14 +14,10 @@ VSearchEngineWorker::VSearchEngineWorker(QObject *p_parent) } void VSearchEngineWorker::setData(const QStringList &p_files, - const QRegExp &p_reg, - const QString &p_keyword, - Qt::CaseSensitivity p_cs) + const VSearchToken &p_token) { m_files = p_files; - m_reg = p_reg; - m_keyword = p_keyword; - m_caseSensitivity = p_cs; + m_token = p_token; } void VSearchEngineWorker::stop() @@ -71,6 +67,14 @@ VSearchResultItem *VSearchEngineWorker::searchFile(const QString &p_fileName) VSearchResultItem *item = NULL; QString line; QTextStream in(&file); + + bool singleToken = m_token.tokenSize() == 1; + if (!singleToken) { + m_token.startBatchMode(); + } + + bool allMatched = false; + while (!in.atEnd()) { if (m_stop.load() == 1) { m_state = VSearchState::Cancelled; @@ -78,14 +82,12 @@ VSearchResultItem *VSearchEngineWorker::searchFile(const QString &p_fileName) break; } - bool matched = false; line = in.readLine(); - if (m_reg.isEmpty()) { - if (line.contains(m_keyword, m_caseSensitivity)) { - matched = true; - } - } else if (m_reg.indexIn(line) != -1) { - matched = true; + bool matched = false; + if (singleToken) { + matched = m_token.matched(line); + } else { + matched = m_token.matchBatchMode(line); } if (matched) { @@ -100,9 +102,23 @@ VSearchResultItem *VSearchEngineWorker::searchFile(const QString &p_fileName) item->m_matches.append(sitem); } + if (!singleToken && m_token.readyToEndBatchMode(allMatched)) { + break; + } + ++lineNum; } + if (!singleToken) { + m_token.readyToEndBatchMode(allMatched); + m_token.endBatchMode(); + + if (!allMatched && item) { + delete item; + item = NULL; + } + } + return item; } @@ -129,23 +145,14 @@ void VSearchEngine::search(const QSharedPointer &p_config, m_result = p_result; - QRegExp reg = compileRegExpFromConfig(p_config); - Qt::CaseSensitivity cs = (p_config->m_option & VSearchConfig::CaseSensitive) - ? Qt::CaseSensitive : Qt::CaseInsensitive; - clearAllWorkers(); m_workers.reserve(numThread); m_finishedWorkers = 0; int totalSize = m_result->m_secondPhaseItems.size(); int step = totalSize / numThread; int remain = totalSize % numThread; - - for (int i = 0; i < numThread; ++i) { - int start = i * step; - if (start >= totalSize) { - break; - } - + int start = 0; + for (int i = 0; i < numThread && start < totalSize; ++i) { int len = step; if (remain) { ++len; @@ -158,9 +165,7 @@ void VSearchEngine::search(const QSharedPointer &p_config, VSearchEngineWorker *th = new VSearchEngineWorker(this); th->setData(m_result->m_secondPhaseItems.mid(start, len), - reg, - p_config->m_keyword, - cs); + p_config->m_contentToken); connect(th, &VSearchEngineWorker::finished, this, &VSearchEngine::handleWorkerFinished); connect(th, &VSearchEngineWorker::resultItemReady, @@ -170,27 +175,13 @@ void VSearchEngine::search(const QSharedPointer &p_config, m_workers.append(th); th->start(); + + start += len; } qDebug() << "schedule tasks to threads" << m_workers.size() << totalSize << step; } -QRegExp VSearchEngine::compileRegExpFromConfig(const QSharedPointer &p_config) const -{ - const QString &keyword = p_config->m_keyword; - Qt::CaseSensitivity cs = (p_config->m_option & VSearchConfig::CaseSensitive) - ? Qt::CaseSensitive : Qt::CaseInsensitive; - if (p_config->m_option & VSearchConfig::RegularExpression) { - return QRegExp(keyword, cs); - } else if (p_config->m_option & VSearchConfig::WholeWordOnly) { - QString pattern = QRegExp::escape(keyword); - pattern = "\\b" + pattern + "\\b"; - return QRegExp(pattern, cs); - } else { - return QRegExp(); - } -} - void VSearchEngine::stop() { qDebug() << "VSearchEngine asked to stop"; diff --git a/src/vsearchengine.h b/src/vsearchengine.h index 33948830..ac7c57b4 100644 --- a/src/vsearchengine.h +++ b/src/vsearchengine.h @@ -1,10 +1,13 @@ #ifndef VSEARCHENGINE_H #define VSEARCHENGINE_H +#include "isearchengine.h" + #include #include #include -#include "isearchengine.h" + +#include "vsearchconfig.h" class VSearchEngineWorker : public QThread { @@ -16,9 +19,7 @@ public: explicit VSearchEngineWorker(QObject *p_parent = nullptr); void setData(const QStringList &p_files, - const QRegExp &p_reg, - const QString &p_keyword, - Qt::CaseSensitivity p_cs); + const VSearchToken &p_token); public slots: void stop(); @@ -38,11 +39,7 @@ private: QStringList m_files; - QRegExp m_reg; - - QString m_keyword; - - Qt::CaseSensitivity m_caseSensitivity; + VSearchToken m_token; VSearchState m_state; @@ -76,9 +73,6 @@ private slots: void handleWorkerFinished(); private: - // Returns an empty object if raw string is preferred. - QRegExp compileRegExpFromConfig(const QSharedPointer &p_config) const; - void clearAllWorkers(); int m_finishedWorkers;