Merge pull request #38 from iaom/0103-dev

Optimized text content search.
This commit is contained in:
张佳萍 2021-01-03 16:06:04 +08:00 committed by GitHub
commit 7bab5db46b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 39 additions and 15 deletions

View File

@ -25,8 +25,11 @@ void Document::setData(QString data)
void Document::addterm(std::string term,QVector<size_t> offset, int weight) void Document::addterm(std::string term,QVector<size_t> offset, int weight)
{ {
if(term == ""||term.length() > 240) if(term == "")
return; return;
if(term.length() > 240)
term = QString::fromStdString(term).left(30).toStdString();
for(size_t i : offset) for(size_t i : offset)
{ {
m_document->add_posting(term,i,weight); m_document->add_posting(term,i,weight);

View File

@ -1,6 +1,7 @@
#include "file-searcher.h" #include "file-searcher.h"
#include <QFileInfo> #include <QFileInfo>
#include <QDebug> #include <QDebug>
#include <chinese-segmentation.h>
FileSearcher::FileSearcher(QObject *parent) : QObject(parent) FileSearcher::FileSearcher(QObject *parent) : QObject(parent)
{ {
@ -83,19 +84,33 @@ void FileSearcher::onKeywordSearchContent(QString keyword, int begin, int num)
Xapian::Database db(CONTENT_INDEX_PATH); Xapian::Database db(CONTENT_INDEX_PATH);
Xapian::Enquire enquire(db); Xapian::Enquire enquire(db);
Xapian::QueryParser qp; Xapian::QueryParser qp;
// qp.set_default_op(Xapian::Query::OP_PHRASE); qp.set_default_op(Xapian::Query::OP_AND);
qp.set_database(db); qp.set_database(db);
QVector<SKeyWord> sKeyWord = ChineseSegmentation::callSegement(&keyword);
//Creat a query //Creat a query
Xapian::Query queryPhrase = qp.parse_query(keyword.toStdString()); std::string words;
for(int i=0;i<sKeyWord.size();i++)
{
words.append(sKeyWord.at(i).word).append(" ");
}
Xapian::Query query = qp.parse_query(words);
qDebug()<<QString::fromStdString(queryPhrase.get_description()); // std::vector<Xapian::Query> v;
// for(int i=0;i<sKeyWord.size();i++)
// {
// v.push_back(Xapian::Query(sKeyWord.at(i).word));
// qDebug()<<QString::fromStdString(sKeyWord.at(i).word);
// }
// Xapian::Query queryPhrase =Xapian::Query(Xapian::Query::OP_AND, v.begin(), v.end());
qDebug()<<QString::fromStdString(query.get_description());
enquire.set_query(queryPhrase); enquire.set_query(query);
//dir result //dir result
Xapian::MSet result = enquire.get_mset(begin, begin+num); Xapian::MSet result = enquire.get_mset(begin, begin+num);
qDebug()<< "find results count=" <<static_cast<int>(result.get_matches_estimated()); qDebug()<< "find results count=" <<static_cast<int>(result.get_matches_estimated());
searchResult = getContentResult(result,keyword);
searchResult = getContentResult(result,words);
qDebug()<< "--content search finish--"; qDebug()<< "--content search finish--";
} }
@ -106,7 +121,7 @@ void FileSearcher::onKeywordSearchContent(QString keyword, int begin, int num)
return; return;
} }
Q_EMIT this->contentResult(searchResult); Q_EMIT this->contentResult(searchResult);
qDebug()<<searchResult; // qDebug()<<searchResult;
return; return;
} }
@ -143,12 +158,16 @@ QStringList FileSearcher::getResult(Xapian::MSet &result)
return searchResult; return searchResult;
} }
QMap<QString,QStringList> FileSearcher::getContentResult(Xapian::MSet &result, QString &keyWord) QMap<QString,QStringList> FileSearcher::getContentResult(Xapian::MSet &result, std::string &keyWord)
{ {
//QStringList *pathTobeDelete = new QStringList; //QStringList *pathTobeDelete = new QStringList;
//Delete those path doc which is not already exist. //Delete those path doc which is not already exist.
int size = keyWord.size(); QString wordTobeFound = QString::fromStdString(keyWord).section(" ",0,0);
int size = wordTobeFound.size();
int totalSize = QString::fromStdString(keyWord).size();
if(totalSize < 5)
totalSize = 5;
QMap<QString,QStringList> searchResult; QMap<QString,QStringList> searchResult;
if(result.size() == 0) if(result.size() == 0)
return searchResult; return searchResult;
@ -171,13 +190,15 @@ QMap<QString,QStringList> FileSearcher::getContentResult(Xapian::MSet &result, Q
// Construct snippets containing keyword. // Construct snippets containing keyword.
QStringList snippets; QStringList snippets;
auto term = doc.termlist_begin(); auto term = doc.termlist_begin();
term.skip_to(keyWord.toStdString()); term.skip_to(wordTobeFound.toStdString());
for(auto pos = term.positionlist_begin();pos != term.positionlist_end();++pos) int count =0;
for(auto pos = term.positionlist_begin();pos != term.positionlist_end()&&count < 6;++pos)
{ {
QByteArray snippetByte = QByteArray::fromStdString(data); QByteArray snippetByte = QByteArray::fromStdString(data);
QString snippet = "..."+QString(snippetByte.left(*pos)).right(size +5) + QString(snippetByte.mid(*pos,-1)).left(size+5) + "..."; QString snippet = "..."+QString(snippetByte.left(*pos)).right(size +totalSize) + QString(snippetByte.mid(*pos,-1)).left(size+totalSize) + "...";
// qDebug()<<snippet; // qDebug()<<snippet;
snippets.append(snippet); snippets.append(snippet);
++count;
} }
searchResult.insert(path,snippets); searchResult.insert(path,snippets);
qDebug()<< "path="<< path << ",weight=" <<docScoreWeight << ",percent=" << docScorePercent; qDebug()<< "path="<< path << ",weight=" <<docScoreWeight << ",percent=" << docScorePercent;

View File

@ -25,7 +25,7 @@ Q_SIGNALS:
void contentResult(QMap<QString,QStringList> resultC); void contentResult(QMap<QString,QStringList> resultC);
private: private:
QStringList getResult(Xapian::MSet &result); QStringList getResult(Xapian::MSet &result);
QMap<QString,QStringList> getContentResult(Xapian::MSet &result,QString &keyWord); QMap<QString,QStringList> getContentResult(Xapian::MSet &result,std::string &keyWord);
}; };
#endif // FILESEARCHER_H #endif // FILESEARCHER_H

View File

@ -69,8 +69,8 @@ int main(int argc, char *argv[])
/*-------------InotyifyRefact Test End-----------------*/ /*-------------InotyifyRefact Test End-----------------*/
/*-------------文本搜索 Test start-----------------*/ /*-------------文本搜索 Test start-----------------*/
FileSearcher *search = new FileSearcher(); // FileSearcher *search = new FileSearcher();
search->onKeywordSearchContent("测试"); // search->onKeywordSearchContent("重要器官移植⑤白血病");
/*-------------文本搜索 Test End-----------------*/ /*-------------文本搜索 Test End-----------------*/
qRegisterMetaType<QVector<QStringList>>("QVector<QStringList>"); qRegisterMetaType<QVector<QStringList>>("QVector<QStringList>");