From abce24773c019ee8579a6c438656ff7f78571207 Mon Sep 17 00:00:00 2001 From: iaom <18504285112@163.com> Date: Mon, 17 May 2021 14:47:39 +0800 Subject: [PATCH] Fix:Index process crashed when parsing some wps templates. --- libchinese-segmentation/chinese-segmentation.cpp | 8 ++++---- libchinese-segmentation/chinese-segmentation.h | 2 +- libsearch/index/construct-document.cpp | 2 +- libsearch/index/document.cpp | 2 +- libsearch/index/document.h | 2 +- libsearch/index/index-generator.cpp | 2 +- libsearch/index/search-manager.cpp | 5 ++--- libsearch/parser/binary-parser.cpp | 5 ++--- 8 files changed, 13 insertions(+), 15 deletions(-) diff --git a/libchinese-segmentation/chinese-segmentation.cpp b/libchinese-segmentation/chinese-segmentation.cpp index ea80a67..477f5b7 100644 --- a/libchinese-segmentation/chinese-segmentation.cpp +++ b/libchinese-segmentation/chinese-segmentation.cpp @@ -52,10 +52,10 @@ ChineseSegmentation *ChineseSegmentation::getInstance() { return global_instance_chinese_segmentation; } -QVector ChineseSegmentation::callSegement(QString str) { - std::string s; - s = str.toStdString(); - str.squeeze(); +QVector ChineseSegmentation::callSegement(std::string s) { +// std::string s; +// s = str.toStdString(); +// str.squeeze(); const size_t topk = -1; std::vector keywordres; diff --git a/libchinese-segmentation/chinese-segmentation.h b/libchinese-segmentation/chinese-segmentation.h index c027608..4a12c23 100644 --- a/libchinese-segmentation/chinese-segmentation.h +++ b/libchinese-segmentation/chinese-segmentation.h @@ -47,7 +47,7 @@ class CHINESESEGMENTATION_EXPORT ChineseSegmentation { public: static ChineseSegmentation *getInstance(); ~ChineseSegmentation(); - QVector callSegement(QString str); + QVector callSegement(std::string s); void convert(std::vector& keywordres, QVector& kw); private: static QMutex m_mutex; diff --git a/libsearch/index/construct-document.cpp b/libsearch/index/construct-document.cpp index a479ba2..5be7af1 100644 --- a/libsearch/index/construct-document.cpp +++ b/libsearch/index/construct-document.cpp @@ -111,7 +111,7 @@ void ConstructDocumentForContent::run() { QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(m_path)); QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(m_path.section("/", 0, -2, QString::SectionIncludeLeadingSep))); - QVector term = ChineseSegmentation::getInstance()->callSegement(content.left(20480000)); + QVector term = ChineseSegmentation::getInstance()->callSegement(content.left(20480000).toStdString()); Document doc; doc.setData(content); diff --git a/libsearch/index/document.cpp b/libsearch/index/document.cpp index 6200c0c..68b21df 100644 --- a/libsearch/index/document.cpp +++ b/libsearch/index/document.cpp @@ -20,7 +20,7 @@ #include "document.h" #include using namespace Zeeker; -void Document::setData(QString data) { +void Document::setData(QString &data) { if(data.isEmpty()) return; m_document.set_data(data.toStdString()); diff --git a/libsearch/index/document.h b/libsearch/index/document.h index f6b8c20..d4549e8 100644 --- a/libsearch/index/document.h +++ b/libsearch/index/document.h @@ -39,7 +39,7 @@ public: m_index_text = other.m_index_text; m_unique_term = other.m_unique_term; } - void setData(QString data); + void setData(QString &data); void addPosting(std::string term, QVector offset, int weight = 1); void addPosting(std::string term, unsigned int offset, int weight = 1); void addTerm(QString term); diff --git a/libsearch/index/index-generator.cpp b/libsearch/index/index-generator.cpp index e650271..8c02122 100644 --- a/libsearch/index/index-generator.cpp +++ b/libsearch/index/index-generator.cpp @@ -354,7 +354,7 @@ Document IndexGenerator::GenerateContentDocument(const QString &path) { QString upTerm; FileReader::getTextContent(path, content); - term = ChineseSegmentation::getInstance()->callSegement(content); + term = ChineseSegmentation::getInstance()->callSegement(content.toStdString()); // QStringList term = content.split(""); doc.setData(content); diff --git a/libsearch/index/search-manager.cpp b/libsearch/index/search-manager.cpp index 68e8605..ce75385 100644 --- a/libsearch/index/search-manager.cpp +++ b/libsearch/index/search-manager.cpp @@ -273,7 +273,7 @@ int FileContentSearch::keywordSearchContent() { ret.erase(ret.begin(), ret.end()); ::friso::ResultMap().swap(ret); */ - QVector sKeyWord = ChineseSegmentation::getInstance()->callSegement(m_keyword); + QVector sKeyWord = ChineseSegmentation::getInstance()->callSegement(m_keyword.toStdString()); //Creat a query std::string words; for(int i = 0; i < sKeyWord.size(); i++) { @@ -419,6 +419,7 @@ void DirectSearch::run() { // QDir::Hidden dir.setFilter(QDir::Dirs | QDir::Files | QDir::NoDotAndDotDot); dir.setSorting(QDir::DirsFirst); + QStringList blockList = GlobalSettings::getInstance()->getBlockDirs(); while(!bfs.empty()) { dir.setPath(bfs.dequeue()); list = dir.entryInfoList(); @@ -426,8 +427,6 @@ void DirectSearch::run() { if (i.isDir() && (!(i.isSymLink()))) { bool findIndex = false; - - QStringList blockList = GlobalSettings::getInstance()->getBlockDirs(); for (QString j : blockList) { if (i.absoluteFilePath().startsWith(j.prepend("/"))) { findIndex = true; diff --git a/libsearch/parser/binary-parser.cpp b/libsearch/parser/binary-parser.cpp index 698297a..0f927c5 100644 --- a/libsearch/parser/binary-parser.cpp +++ b/libsearch/parser/binary-parser.cpp @@ -4457,15 +4457,14 @@ bool bReadBuffer(FILE *pFile, ULONG ulStartBlock, ULONG ulBegin, ulIndex; size_t tLen; - for(ulIndex = ulStartBlock; - ulIndex != END_OF_CHAIN && tToRead != 0; - ulIndex = aulBlockDepot[ulIndex]) { + for(ulIndex = ulStartBlock;ulIndex != END_OF_CHAIN && tToRead != 0;ulIndex = aulBlockDepot[ulIndex]) { if(ulIndex >= (ULONG)tBlockDepotLen) { if(tBlockSize >= BIG_BLOCK_SIZE) { qWarning() << "The Big Block Depot is damaged"; } else { qWarning() << "The Small Block Depot is damaged"; } + return (tToRead == 0); } if(ulOffset >= (ULONG)tBlockSize) { ulOffset -= tBlockSize;