From 9ee1b37693f5ce6890285ae9289e0da473cc054d Mon Sep 17 00:00:00 2001 From: zhangpengfei Date: Sat, 2 Jan 2021 17:21:38 +0800 Subject: [PATCH] Store the content text for search and generate snippets when performing text content search. --- libsearch/index/document.cpp | 9 +++-- libsearch/index/document.h | 3 +- libsearch/index/file-searcher.cpp | 54 ++++++++++++++++++++++++++--- libsearch/index/file-searcher.h | 6 ++-- libsearch/index/filetypefilter.cpp | 4 +-- libsearch/index/filetypefilter.h | 15 ++++---- libsearch/index/index-generator.cpp | 7 ++-- src/main.cpp | 4 +-- 8 files changed, 77 insertions(+), 25 deletions(-) diff --git a/libsearch/index/document.cpp b/libsearch/index/document.cpp index 0c8bde4..980ca67 100644 --- a/libsearch/index/document.cpp +++ b/libsearch/index/document.cpp @@ -23,11 +23,14 @@ void Document::setData(QString data) m_document->set_data(data.toStdString()); } -void Document::addterm(std::string term, int weight) +void Document::addterm(std::string term,QVector offset, int weight) { - if(term == "") + if(term == ""||term.length() > 240) return; - m_document->add_term(term,weight); + for(size_t i : offset) + { + m_document->add_posting(term,i,weight); + } } void Document::addValue(QString value) diff --git a/libsearch/index/document.h b/libsearch/index/document.h index 807aecc..96d5ee1 100644 --- a/libsearch/index/document.h +++ b/libsearch/index/document.h @@ -4,6 +4,7 @@ #include #include #include +#include class Document { @@ -11,7 +12,7 @@ public: Document(); ~Document(); void setData(QString data); - void addterm(std::string term,int weight =1); + void addterm(std::string term, QVector offset, int weight =1); void addValue(QString value); void setUniqueTerm(QString term); std::string getUniqueTerm(); diff --git a/libsearch/index/file-searcher.cpp b/libsearch/index/file-searcher.cpp index 3e744e6..2641f15 100644 --- a/libsearch/index/file-searcher.cpp +++ b/libsearch/index/file-searcher.cpp @@ -75,7 +75,7 @@ void FileSearcher::onKeywordSearch(QString keyword, int begin, int num) void FileSearcher::onKeywordSearchContent(QString keyword, int begin, int num) { - QStringList searchResult; + QMap searchResult = QMap(); try { qDebug()<<"--content search start--"; @@ -83,11 +83,11 @@ void FileSearcher::onKeywordSearchContent(QString keyword, int begin, int num) Xapian::Database db(CONTENT_INDEX_PATH); Xapian::Enquire enquire(db); Xapian::QueryParser qp; - qp.set_default_op(Xapian::Query::OP_PHRASE); +// qp.set_default_op(Xapian::Query::OP_PHRASE); qp.set_database(db); //Creat a query - Xapian::Query queryPhrase = qp.parse_query(keyword.toStdString(),Xapian::QueryParser::FLAG_PHRASE); + Xapian::Query queryPhrase = qp.parse_query(keyword.toStdString()); qDebug()< resultV); - void contentResult(QStringList resultL); + void result(QVector resultP); + void contentResult(QMap resultC); private: QStringList getResult(Xapian::MSet &result); + QMap getContentResult(Xapian::MSet &result,QString &keyWord); }; #endif // FILESEARCHER_H diff --git a/libsearch/index/filetypefilter.cpp b/libsearch/index/filetypefilter.cpp index 9b908d2..f53f0b6 100644 --- a/libsearch/index/filetypefilter.cpp +++ b/libsearch/index/filetypefilter.cpp @@ -21,7 +21,7 @@ void FileTypeFilter::DoSomething(const QFileInfo& fileInfo){ // qDebug() << qmt.preferredSuffix(); for (auto i : this->targetFileTypeVec){ if (fileInfo.fileName().endsWith(i)){ -// qDebug() << fileInfo.fileName(); + qDebug() << fileInfo.fileName(); this->result->append(fileInfo.absoluteFilePath()); } } @@ -36,7 +36,7 @@ QList* FileTypeFilter::getTargetFileAbsolutePath(){ void FileTypeFilter::Test(){ IndexGenerator* ig = IndexGenerator::getInstance(); // this->result = new QList(); -// this->result->append(QString("/home/zpf/桌面/DOCX 文档.docx")); +// this->result->append(QString("/home/zpf/桌面/DOCX 文档(1).docx")); ig->creatAllIndex(this->result); } diff --git a/libsearch/index/filetypefilter.h b/libsearch/index/filetypefilter.h index 4f517ba..cdefc09 100644 --- a/libsearch/index/filetypefilter.h +++ b/libsearch/index/filetypefilter.h @@ -18,13 +18,14 @@ public: void Test(); Q_SIGNALS: private: - const QVector targetFileTypeVec ={ /*QString(".doc"),*/ - QString(".docx")/*,*/ - /*QString(".ppt"), - QString(".pptx"), - QString(".xls"), - QString(".xlsx"), - QString(".txt")*/}; + const QVector targetFileTypeVec ={ +// QString(".doc"), + QString(".docx"), +/* QString(".ppt"), + QString(".pptx"), + QString(".xls"), + QString(".xlsx"), + QString(".txt")*/}; QList* result; }; diff --git a/libsearch/index/index-generator.cpp b/libsearch/index/index-generator.cpp index 009feda..32642dc 100644 --- a/libsearch/index/index-generator.cpp +++ b/libsearch/index/index-generator.cpp @@ -201,7 +201,6 @@ Document IndexGenerator::GenerateDocument(const QVector &list) doc.setData(sourcePath); doc.setUniqueTerm(uniqueterm); doc.addValue(list.at(2)); - if(list.at(2) == QString("1")) QStringList temp; temp.append(index_text); // temp.append(pinyin_text_list); @@ -219,11 +218,12 @@ Document IndexGenerator::GenerateContentDocument(const QString &path) QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(path)); QVector term = ChineseSegmentation::callSegement(content); Document doc; - doc.setData(path); + doc.setData(*content); doc.setUniqueTerm(uniqueterm); + doc.addValue(path); for(int i = 0;i(term.at(i).weight)); + doc.addterm(term.at(i).word,term.at(i).offsets,static_cast(term.at(i).weight)); } return doc; @@ -322,6 +322,7 @@ bool IndexGenerator::deleteAllIndex(QStringList *pathlist) { qDebug()<<"--delete start--"; m_datebase_path->delete_document(uniqueterm); + m_database_content->delete_document(uniqueterm); qDebug()<<"delete md5"<commit(); qDebug()<< "--delete finish--"; diff --git a/src/main.cpp b/src/main.cpp index 3602e42..8328cb1 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -69,8 +69,8 @@ int main(int argc, char *argv[]) /*-------------InotyifyRefact Test End-----------------*/ /*-------------文本搜索 Test start-----------------*/ -// FileSearcher *search = new FileSearcher(); -// search->onKeywordSearchContent("麒麟"); + FileSearcher *search = new FileSearcher(); + search->onKeywordSearchContent("测试"); /*-------------文本搜索 Test End-----------------*/ qRegisterMetaType>("QVector");