From 601f9d636dec25acc8045c13857edcdeb2eb2dcc Mon Sep 17 00:00:00 2001 From: zhangpengfei Date: Sun, 3 Jan 2021 16:01:35 +0800 Subject: [PATCH] Optimized text content search. --- libsearch/index/document.cpp | 5 +++- libsearch/index/file-searcher.cpp | 43 +++++++++++++++++++++++-------- libsearch/index/file-searcher.h | 2 +- src/main.cpp | 4 +-- 4 files changed, 39 insertions(+), 15 deletions(-) diff --git a/libsearch/index/document.cpp b/libsearch/index/document.cpp index 980ca67..95056b8 100644 --- a/libsearch/index/document.cpp +++ b/libsearch/index/document.cpp @@ -25,8 +25,11 @@ void Document::setData(QString data) void Document::addterm(std::string term,QVector offset, int weight) { - if(term == ""||term.length() > 240) + if(term == "") return; + if(term.length() > 240) + term = QString::fromStdString(term).left(30).toStdString(); + for(size_t i : offset) { m_document->add_posting(term,i,weight); diff --git a/libsearch/index/file-searcher.cpp b/libsearch/index/file-searcher.cpp index 2641f15..9da3860 100644 --- a/libsearch/index/file-searcher.cpp +++ b/libsearch/index/file-searcher.cpp @@ -1,6 +1,7 @@ #include "file-searcher.h" #include #include +#include FileSearcher::FileSearcher(QObject *parent) : QObject(parent) { @@ -83,19 +84,33 @@ void FileSearcher::onKeywordSearchContent(QString keyword, int begin, int num) Xapian::Database db(CONTENT_INDEX_PATH); Xapian::Enquire enquire(db); Xapian::QueryParser qp; -// qp.set_default_op(Xapian::Query::OP_PHRASE); + qp.set_default_op(Xapian::Query::OP_AND); qp.set_database(db); + QVector sKeyWord = ChineseSegmentation::callSegement(&keyword); //Creat a query - Xapian::Query queryPhrase = qp.parse_query(keyword.toStdString()); + std::string words; + for(int i=0;i v; +// for(int i=0;icontentResult(searchResult); - qDebug()< FileSearcher::getContentResult(Xapian::MSet &result, QString &keyWord) +QMap FileSearcher::getContentResult(Xapian::MSet &result, std::string &keyWord) { //QStringList *pathTobeDelete = new QStringList; //Delete those path doc which is not already exist. - int size = keyWord.size(); + QString wordTobeFound = QString::fromStdString(keyWord).section(" ",0,0); + int size = wordTobeFound.size(); + int totalSize = QString::fromStdString(keyWord).size(); + if(totalSize < 5) + totalSize = 5; QMap searchResult; if(result.size() == 0) return searchResult; @@ -171,13 +190,15 @@ QMap FileSearcher::getContentResult(Xapian::MSet &result, Q // Construct snippets containing keyword. QStringList snippets; auto term = doc.termlist_begin(); - term.skip_to(keyWord.toStdString()); - for(auto pos = term.positionlist_begin();pos != term.positionlist_end();++pos) + term.skip_to(wordTobeFound.toStdString()); + int count =0; + for(auto pos = term.positionlist_begin();pos != term.positionlist_end()&&count < 6;++pos) { QByteArray snippetByte = QByteArray::fromStdString(data); - QString snippet = "..."+QString(snippetByte.left(*pos)).right(size +5) + QString(snippetByte.mid(*pos,-1)).left(size+5) + "..."; + QString snippet = "..."+QString(snippetByte.left(*pos)).right(size +totalSize) + QString(snippetByte.mid(*pos,-1)).left(size+totalSize) + "..."; // qDebug()<onKeywordSearchContent("重要器官移植⑤白血病"); /*-------------文本搜索 Test End-----------------*/ qRegisterMetaType>("QVector");