Optimized text content search.
This commit is contained in:
parent
8b86e40046
commit
601f9d636d
|
@ -25,8 +25,11 @@ void Document::setData(QString data)
|
|||
|
||||
void Document::addterm(std::string term,QVector<size_t> offset, int weight)
|
||||
{
|
||||
if(term == ""||term.length() > 240)
|
||||
if(term == "")
|
||||
return;
|
||||
if(term.length() > 240)
|
||||
term = QString::fromStdString(term).left(30).toStdString();
|
||||
|
||||
for(size_t i : offset)
|
||||
{
|
||||
m_document->add_posting(term,i,weight);
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#include "file-searcher.h"
|
||||
#include <QFileInfo>
|
||||
#include <QDebug>
|
||||
#include <chinese-segmentation.h>
|
||||
|
||||
FileSearcher::FileSearcher(QObject *parent) : QObject(parent)
|
||||
{
|
||||
|
@ -83,19 +84,33 @@ void FileSearcher::onKeywordSearchContent(QString keyword, int begin, int num)
|
|||
Xapian::Database db(CONTENT_INDEX_PATH);
|
||||
Xapian::Enquire enquire(db);
|
||||
Xapian::QueryParser qp;
|
||||
// qp.set_default_op(Xapian::Query::OP_PHRASE);
|
||||
qp.set_default_op(Xapian::Query::OP_AND);
|
||||
qp.set_database(db);
|
||||
|
||||
QVector<SKeyWord> sKeyWord = ChineseSegmentation::callSegement(&keyword);
|
||||
//Creat a query
|
||||
Xapian::Query queryPhrase = qp.parse_query(keyword.toStdString());
|
||||
std::string words;
|
||||
for(int i=0;i<sKeyWord.size();i++)
|
||||
{
|
||||
words.append(sKeyWord.at(i).word).append(" ");
|
||||
}
|
||||
Xapian::Query query = qp.parse_query(words);
|
||||
|
||||
qDebug()<<QString::fromStdString(queryPhrase.get_description());
|
||||
// std::vector<Xapian::Query> v;
|
||||
// for(int i=0;i<sKeyWord.size();i++)
|
||||
// {
|
||||
// v.push_back(Xapian::Query(sKeyWord.at(i).word));
|
||||
// qDebug()<<QString::fromStdString(sKeyWord.at(i).word);
|
||||
// }
|
||||
// Xapian::Query queryPhrase =Xapian::Query(Xapian::Query::OP_AND, v.begin(), v.end());
|
||||
qDebug()<<QString::fromStdString(query.get_description());
|
||||
|
||||
enquire.set_query(queryPhrase);
|
||||
enquire.set_query(query);
|
||||
//dir result
|
||||
Xapian::MSet result = enquire.get_mset(begin, begin+num);
|
||||
qDebug()<< "find results count=" <<static_cast<int>(result.get_matches_estimated());
|
||||
searchResult = getContentResult(result,keyword);
|
||||
|
||||
searchResult = getContentResult(result,words);
|
||||
|
||||
qDebug()<< "--content search finish--";
|
||||
}
|
||||
|
@ -106,7 +121,7 @@ void FileSearcher::onKeywordSearchContent(QString keyword, int begin, int num)
|
|||
return;
|
||||
}
|
||||
Q_EMIT this->contentResult(searchResult);
|
||||
qDebug()<<searchResult;
|
||||
// qDebug()<<searchResult;
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -143,12 +158,16 @@ QStringList FileSearcher::getResult(Xapian::MSet &result)
|
|||
return searchResult;
|
||||
}
|
||||
|
||||
QMap<QString,QStringList> FileSearcher::getContentResult(Xapian::MSet &result, QString &keyWord)
|
||||
QMap<QString,QStringList> FileSearcher::getContentResult(Xapian::MSet &result, std::string &keyWord)
|
||||
{
|
||||
//QStringList *pathTobeDelete = new QStringList;
|
||||
//Delete those path doc which is not already exist.
|
||||
|
||||
int size = keyWord.size();
|
||||
QString wordTobeFound = QString::fromStdString(keyWord).section(" ",0,0);
|
||||
int size = wordTobeFound.size();
|
||||
int totalSize = QString::fromStdString(keyWord).size();
|
||||
if(totalSize < 5)
|
||||
totalSize = 5;
|
||||
QMap<QString,QStringList> searchResult;
|
||||
if(result.size() == 0)
|
||||
return searchResult;
|
||||
|
@ -171,13 +190,15 @@ QMap<QString,QStringList> FileSearcher::getContentResult(Xapian::MSet &result, Q
|
|||
// Construct snippets containing keyword.
|
||||
QStringList snippets;
|
||||
auto term = doc.termlist_begin();
|
||||
term.skip_to(keyWord.toStdString());
|
||||
for(auto pos = term.positionlist_begin();pos != term.positionlist_end();++pos)
|
||||
term.skip_to(wordTobeFound.toStdString());
|
||||
int count =0;
|
||||
for(auto pos = term.positionlist_begin();pos != term.positionlist_end()&&count < 6;++pos)
|
||||
{
|
||||
QByteArray snippetByte = QByteArray::fromStdString(data);
|
||||
QString snippet = "..."+QString(snippetByte.left(*pos)).right(size +5) + QString(snippetByte.mid(*pos,-1)).left(size+5) + "...";
|
||||
QString snippet = "..."+QString(snippetByte.left(*pos)).right(size +totalSize) + QString(snippetByte.mid(*pos,-1)).left(size+totalSize) + "...";
|
||||
// qDebug()<<snippet;
|
||||
snippets.append(snippet);
|
||||
++count;
|
||||
}
|
||||
searchResult.insert(path,snippets);
|
||||
qDebug()<< "path="<< path << ",weight=" <<docScoreWeight << ",percent=" << docScorePercent;
|
||||
|
|
|
@ -25,7 +25,7 @@ Q_SIGNALS:
|
|||
void contentResult(QMap<QString,QStringList> resultC);
|
||||
private:
|
||||
QStringList getResult(Xapian::MSet &result);
|
||||
QMap<QString,QStringList> getContentResult(Xapian::MSet &result,QString &keyWord);
|
||||
QMap<QString,QStringList> getContentResult(Xapian::MSet &result,std::string &keyWord);
|
||||
};
|
||||
|
||||
#endif // FILESEARCHER_H
|
||||
|
|
|
@ -69,8 +69,8 @@ int main(int argc, char *argv[])
|
|||
/*-------------InotyifyRefact Test End-----------------*/
|
||||
|
||||
/*-------------文本搜索 Test start-----------------*/
|
||||
FileSearcher *search = new FileSearcher();
|
||||
search->onKeywordSearchContent("测试");
|
||||
// FileSearcher *search = new FileSearcher();
|
||||
// search->onKeywordSearchContent("重要器官移植⑤白血病");
|
||||
/*-------------文本搜索 Test End-----------------*/
|
||||
|
||||
qRegisterMetaType<QVector<QStringList>>("QVector<QStringList>");
|
||||
|
|
Loading…
Reference in New Issue