Merge pull request #38 from iaom/0103-dev
Optimized text content search.
This commit is contained in:
commit
7bab5db46b
|
@ -25,8 +25,11 @@ void Document::setData(QString data)
|
||||||
|
|
||||||
void Document::addterm(std::string term,QVector<size_t> offset, int weight)
|
void Document::addterm(std::string term,QVector<size_t> offset, int weight)
|
||||||
{
|
{
|
||||||
if(term == ""||term.length() > 240)
|
if(term == "")
|
||||||
return;
|
return;
|
||||||
|
if(term.length() > 240)
|
||||||
|
term = QString::fromStdString(term).left(30).toStdString();
|
||||||
|
|
||||||
for(size_t i : offset)
|
for(size_t i : offset)
|
||||||
{
|
{
|
||||||
m_document->add_posting(term,i,weight);
|
m_document->add_posting(term,i,weight);
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
#include "file-searcher.h"
|
#include "file-searcher.h"
|
||||||
#include <QFileInfo>
|
#include <QFileInfo>
|
||||||
#include <QDebug>
|
#include <QDebug>
|
||||||
|
#include <chinese-segmentation.h>
|
||||||
|
|
||||||
FileSearcher::FileSearcher(QObject *parent) : QObject(parent)
|
FileSearcher::FileSearcher(QObject *parent) : QObject(parent)
|
||||||
{
|
{
|
||||||
|
@ -83,19 +84,33 @@ void FileSearcher::onKeywordSearchContent(QString keyword, int begin, int num)
|
||||||
Xapian::Database db(CONTENT_INDEX_PATH);
|
Xapian::Database db(CONTENT_INDEX_PATH);
|
||||||
Xapian::Enquire enquire(db);
|
Xapian::Enquire enquire(db);
|
||||||
Xapian::QueryParser qp;
|
Xapian::QueryParser qp;
|
||||||
// qp.set_default_op(Xapian::Query::OP_PHRASE);
|
qp.set_default_op(Xapian::Query::OP_AND);
|
||||||
qp.set_database(db);
|
qp.set_database(db);
|
||||||
|
|
||||||
|
QVector<SKeyWord> sKeyWord = ChineseSegmentation::callSegement(&keyword);
|
||||||
//Creat a query
|
//Creat a query
|
||||||
Xapian::Query queryPhrase = qp.parse_query(keyword.toStdString());
|
std::string words;
|
||||||
|
for(int i=0;i<sKeyWord.size();i++)
|
||||||
|
{
|
||||||
|
words.append(sKeyWord.at(i).word).append(" ");
|
||||||
|
}
|
||||||
|
Xapian::Query query = qp.parse_query(words);
|
||||||
|
|
||||||
qDebug()<<QString::fromStdString(queryPhrase.get_description());
|
// std::vector<Xapian::Query> v;
|
||||||
|
// for(int i=0;i<sKeyWord.size();i++)
|
||||||
|
// {
|
||||||
|
// v.push_back(Xapian::Query(sKeyWord.at(i).word));
|
||||||
|
// qDebug()<<QString::fromStdString(sKeyWord.at(i).word);
|
||||||
|
// }
|
||||||
|
// Xapian::Query queryPhrase =Xapian::Query(Xapian::Query::OP_AND, v.begin(), v.end());
|
||||||
|
qDebug()<<QString::fromStdString(query.get_description());
|
||||||
|
|
||||||
enquire.set_query(queryPhrase);
|
enquire.set_query(query);
|
||||||
//dir result
|
//dir result
|
||||||
Xapian::MSet result = enquire.get_mset(begin, begin+num);
|
Xapian::MSet result = enquire.get_mset(begin, begin+num);
|
||||||
qDebug()<< "find results count=" <<static_cast<int>(result.get_matches_estimated());
|
qDebug()<< "find results count=" <<static_cast<int>(result.get_matches_estimated());
|
||||||
searchResult = getContentResult(result,keyword);
|
|
||||||
|
searchResult = getContentResult(result,words);
|
||||||
|
|
||||||
qDebug()<< "--content search finish--";
|
qDebug()<< "--content search finish--";
|
||||||
}
|
}
|
||||||
|
@ -106,7 +121,7 @@ void FileSearcher::onKeywordSearchContent(QString keyword, int begin, int num)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
Q_EMIT this->contentResult(searchResult);
|
Q_EMIT this->contentResult(searchResult);
|
||||||
qDebug()<<searchResult;
|
// qDebug()<<searchResult;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -143,12 +158,16 @@ QStringList FileSearcher::getResult(Xapian::MSet &result)
|
||||||
return searchResult;
|
return searchResult;
|
||||||
}
|
}
|
||||||
|
|
||||||
QMap<QString,QStringList> FileSearcher::getContentResult(Xapian::MSet &result, QString &keyWord)
|
QMap<QString,QStringList> FileSearcher::getContentResult(Xapian::MSet &result, std::string &keyWord)
|
||||||
{
|
{
|
||||||
//QStringList *pathTobeDelete = new QStringList;
|
//QStringList *pathTobeDelete = new QStringList;
|
||||||
//Delete those path doc which is not already exist.
|
//Delete those path doc which is not already exist.
|
||||||
|
|
||||||
int size = keyWord.size();
|
QString wordTobeFound = QString::fromStdString(keyWord).section(" ",0,0);
|
||||||
|
int size = wordTobeFound.size();
|
||||||
|
int totalSize = QString::fromStdString(keyWord).size();
|
||||||
|
if(totalSize < 5)
|
||||||
|
totalSize = 5;
|
||||||
QMap<QString,QStringList> searchResult;
|
QMap<QString,QStringList> searchResult;
|
||||||
if(result.size() == 0)
|
if(result.size() == 0)
|
||||||
return searchResult;
|
return searchResult;
|
||||||
|
@ -171,13 +190,15 @@ QMap<QString,QStringList> FileSearcher::getContentResult(Xapian::MSet &result, Q
|
||||||
// Construct snippets containing keyword.
|
// Construct snippets containing keyword.
|
||||||
QStringList snippets;
|
QStringList snippets;
|
||||||
auto term = doc.termlist_begin();
|
auto term = doc.termlist_begin();
|
||||||
term.skip_to(keyWord.toStdString());
|
term.skip_to(wordTobeFound.toStdString());
|
||||||
for(auto pos = term.positionlist_begin();pos != term.positionlist_end();++pos)
|
int count =0;
|
||||||
|
for(auto pos = term.positionlist_begin();pos != term.positionlist_end()&&count < 6;++pos)
|
||||||
{
|
{
|
||||||
QByteArray snippetByte = QByteArray::fromStdString(data);
|
QByteArray snippetByte = QByteArray::fromStdString(data);
|
||||||
QString snippet = "..."+QString(snippetByte.left(*pos)).right(size +5) + QString(snippetByte.mid(*pos,-1)).left(size+5) + "...";
|
QString snippet = "..."+QString(snippetByte.left(*pos)).right(size +totalSize) + QString(snippetByte.mid(*pos,-1)).left(size+totalSize) + "...";
|
||||||
// qDebug()<<snippet;
|
// qDebug()<<snippet;
|
||||||
snippets.append(snippet);
|
snippets.append(snippet);
|
||||||
|
++count;
|
||||||
}
|
}
|
||||||
searchResult.insert(path,snippets);
|
searchResult.insert(path,snippets);
|
||||||
qDebug()<< "path="<< path << ",weight=" <<docScoreWeight << ",percent=" << docScorePercent;
|
qDebug()<< "path="<< path << ",weight=" <<docScoreWeight << ",percent=" << docScorePercent;
|
||||||
|
|
|
@ -25,7 +25,7 @@ Q_SIGNALS:
|
||||||
void contentResult(QMap<QString,QStringList> resultC);
|
void contentResult(QMap<QString,QStringList> resultC);
|
||||||
private:
|
private:
|
||||||
QStringList getResult(Xapian::MSet &result);
|
QStringList getResult(Xapian::MSet &result);
|
||||||
QMap<QString,QStringList> getContentResult(Xapian::MSet &result,QString &keyWord);
|
QMap<QString,QStringList> getContentResult(Xapian::MSet &result,std::string &keyWord);
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // FILESEARCHER_H
|
#endif // FILESEARCHER_H
|
||||||
|
|
|
@ -69,8 +69,8 @@ int main(int argc, char *argv[])
|
||||||
/*-------------InotyifyRefact Test End-----------------*/
|
/*-------------InotyifyRefact Test End-----------------*/
|
||||||
|
|
||||||
/*-------------文本搜索 Test start-----------------*/
|
/*-------------文本搜索 Test start-----------------*/
|
||||||
FileSearcher *search = new FileSearcher();
|
// FileSearcher *search = new FileSearcher();
|
||||||
search->onKeywordSearchContent("测试");
|
// search->onKeywordSearchContent("重要器官移植⑤白血病");
|
||||||
/*-------------文本搜索 Test End-----------------*/
|
/*-------------文本搜索 Test End-----------------*/
|
||||||
|
|
||||||
qRegisterMetaType<QVector<QStringList>>("QVector<QStringList>");
|
qRegisterMetaType<QVector<QStringList>>("QVector<QStringList>");
|
||||||
|
|
Loading…
Reference in New Issue