commit
8b86e40046
|
@ -23,11 +23,14 @@ void Document::setData(QString data)
|
|||
m_document->set_data(data.toStdString());
|
||||
}
|
||||
|
||||
void Document::addterm(std::string term, int weight)
|
||||
void Document::addterm(std::string term,QVector<size_t> offset, int weight)
|
||||
{
|
||||
if(term == "")
|
||||
if(term == ""||term.length() > 240)
|
||||
return;
|
||||
m_document->add_term(term,weight);
|
||||
for(size_t i : offset)
|
||||
{
|
||||
m_document->add_posting(term,i,weight);
|
||||
}
|
||||
}
|
||||
|
||||
void Document::addValue(QString value)
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
#include <xapian.h>
|
||||
#include <QString>
|
||||
#include <QStringList>
|
||||
#include <QVector>
|
||||
|
||||
class Document
|
||||
{
|
||||
|
@ -11,7 +12,7 @@ public:
|
|||
Document();
|
||||
~Document();
|
||||
void setData(QString data);
|
||||
void addterm(std::string term,int weight =1);
|
||||
void addterm(std::string term, QVector<size_t> offset, int weight =1);
|
||||
void addValue(QString value);
|
||||
void setUniqueTerm(QString term);
|
||||
std::string getUniqueTerm();
|
||||
|
|
|
@ -75,7 +75,7 @@ void FileSearcher::onKeywordSearch(QString keyword, int begin, int num)
|
|||
|
||||
void FileSearcher::onKeywordSearchContent(QString keyword, int begin, int num)
|
||||
{
|
||||
QStringList searchResult;
|
||||
QMap<QString,QStringList> searchResult = QMap<QString,QStringList>();
|
||||
try
|
||||
{
|
||||
qDebug()<<"--content search start--";
|
||||
|
@ -83,11 +83,11 @@ void FileSearcher::onKeywordSearchContent(QString keyword, int begin, int num)
|
|||
Xapian::Database db(CONTENT_INDEX_PATH);
|
||||
Xapian::Enquire enquire(db);
|
||||
Xapian::QueryParser qp;
|
||||
qp.set_default_op(Xapian::Query::OP_PHRASE);
|
||||
// qp.set_default_op(Xapian::Query::OP_PHRASE);
|
||||
qp.set_database(db);
|
||||
|
||||
//Creat a query
|
||||
Xapian::Query queryPhrase = qp.parse_query(keyword.toStdString(),Xapian::QueryParser::FLAG_PHRASE);
|
||||
Xapian::Query queryPhrase = qp.parse_query(keyword.toStdString());
|
||||
|
||||
qDebug()<<QString::fromStdString(queryPhrase.get_description());
|
||||
|
||||
|
@ -95,7 +95,7 @@ void FileSearcher::onKeywordSearchContent(QString keyword, int begin, int num)
|
|||
//dir result
|
||||
Xapian::MSet result = enquire.get_mset(begin, begin+num);
|
||||
qDebug()<< "find results count=" <<static_cast<int>(result.get_matches_estimated());
|
||||
searchResult = getResult(result);
|
||||
searchResult = getContentResult(result,keyword);
|
||||
|
||||
qDebug()<< "--content search finish--";
|
||||
}
|
||||
|
@ -118,6 +118,7 @@ QStringList FileSearcher::getResult(Xapian::MSet &result)
|
|||
QStringList searchResult = QStringList();
|
||||
if(result.size() == 0)
|
||||
return searchResult;
|
||||
|
||||
for (auto it = result.begin(); it != result.end(); ++it)
|
||||
{
|
||||
Xapian::Document doc = it.get_document();
|
||||
|
@ -135,10 +136,53 @@ QStringList FileSearcher::getResult(Xapian::MSet &result)
|
|||
{
|
||||
searchResult.append(QString::fromStdString(data));
|
||||
}
|
||||
|
||||
qDebug()<< "doc="<< QString::fromStdString(data) << ",weight=" <<docScoreWeight << ",percent=" << docScorePercent;
|
||||
}
|
||||
// if(!pathTobeDelete->isEmpty())
|
||||
// deleteAllIndex(pathTobeDelete)
|
||||
return searchResult;
|
||||
}
|
||||
|
||||
QMap<QString,QStringList> FileSearcher::getContentResult(Xapian::MSet &result, QString &keyWord)
|
||||
{
|
||||
//QStringList *pathTobeDelete = new QStringList;
|
||||
//Delete those path doc which is not already exist.
|
||||
|
||||
int size = keyWord.size();
|
||||
QMap<QString,QStringList> searchResult;
|
||||
if(result.size() == 0)
|
||||
return searchResult;
|
||||
|
||||
for (auto it = result.begin(); it != result.end(); ++it)
|
||||
{
|
||||
Xapian::Document doc = it.get_document();
|
||||
std::string data = doc.get_data();
|
||||
double docScoreWeight = it.get_weight();
|
||||
Xapian::percent docScorePercent = it.get_percent();
|
||||
QString path = QString::fromStdString(doc.get_value(1));
|
||||
QFileInfo *info = new QFileInfo(path);
|
||||
|
||||
if(!info->exists())
|
||||
{
|
||||
// pathTobeDelete->append(QString::fromStdString(data));
|
||||
qDebug()<<path<<"is not exist!!";
|
||||
continue;
|
||||
}
|
||||
// Construct snippets containing keyword.
|
||||
QStringList snippets;
|
||||
auto term = doc.termlist_begin();
|
||||
term.skip_to(keyWord.toStdString());
|
||||
for(auto pos = term.positionlist_begin();pos != term.positionlist_end();++pos)
|
||||
{
|
||||
QByteArray snippetByte = QByteArray::fromStdString(data);
|
||||
QString snippet = "..."+QString(snippetByte.left(*pos)).right(size +5) + QString(snippetByte.mid(*pos,-1)).left(size+5) + "...";
|
||||
// qDebug()<<snippet;
|
||||
snippets.append(snippet);
|
||||
}
|
||||
searchResult.insert(path,snippets);
|
||||
qDebug()<< "path="<< path << ",weight=" <<docScoreWeight << ",percent=" << docScorePercent;
|
||||
}
|
||||
// if(!pathTobeDelete->isEmpty())
|
||||
// deleteAllIndex(pathTobeDelete)
|
||||
return searchResult;
|
||||
}
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
#include <xapian.h>
|
||||
#include <QStandardPaths>
|
||||
#include <QVector>
|
||||
#include <QMap>
|
||||
#define INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/index_data").toStdString()
|
||||
#define CONTENT_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/content_index_data").toStdString()
|
||||
|
||||
|
@ -20,10 +21,11 @@ public Q_SLOTS:
|
|||
void onKeywordSearchContent(QString keyword, int begin = 0, int num = 20);
|
||||
|
||||
Q_SIGNALS:
|
||||
void result(QVector<QStringList> resultV);
|
||||
void contentResult(QStringList resultL);
|
||||
void result(QVector<QStringList> resultP);
|
||||
void contentResult(QMap<QString,QStringList> resultC);
|
||||
private:
|
||||
QStringList getResult(Xapian::MSet &result);
|
||||
QMap<QString,QStringList> getContentResult(Xapian::MSet &result,QString &keyWord);
|
||||
};
|
||||
|
||||
#endif // FILESEARCHER_H
|
||||
|
|
|
@ -21,7 +21,7 @@ void FileTypeFilter::DoSomething(const QFileInfo& fileInfo){
|
|||
// qDebug() << qmt.preferredSuffix();
|
||||
for (auto i : this->targetFileTypeVec){
|
||||
if (fileInfo.fileName().endsWith(i)){
|
||||
// qDebug() << fileInfo.fileName();
|
||||
qDebug() << fileInfo.fileName();
|
||||
this->result->append(fileInfo.absoluteFilePath());
|
||||
}
|
||||
}
|
||||
|
@ -36,7 +36,7 @@ QList<QString>* FileTypeFilter::getTargetFileAbsolutePath(){
|
|||
void FileTypeFilter::Test(){
|
||||
IndexGenerator* ig = IndexGenerator::getInstance();
|
||||
// this->result = new QList<QString>();
|
||||
// this->result->append(QString("/home/zpf/桌面/DOCX 文档.docx"));
|
||||
// this->result->append(QString("/home/zpf/桌面/DOCX 文档(1).docx"));
|
||||
|
||||
ig->creatAllIndex(this->result);
|
||||
}
|
||||
|
|
|
@ -18,13 +18,14 @@ public:
|
|||
void Test();
|
||||
Q_SIGNALS:
|
||||
private:
|
||||
const QVector<QString> targetFileTypeVec ={ /*QString(".doc"),*/
|
||||
QString(".docx")/*,*/
|
||||
/*QString(".ppt"),
|
||||
QString(".pptx"),
|
||||
QString(".xls"),
|
||||
QString(".xlsx"),
|
||||
QString(".txt")*/};
|
||||
const QVector<QString> targetFileTypeVec ={
|
||||
// QString(".doc"),
|
||||
QString(".docx"),
|
||||
/* QString(".ppt"),
|
||||
QString(".pptx"),
|
||||
QString(".xls"),
|
||||
QString(".xlsx"),
|
||||
QString(".txt")*/};
|
||||
QList<QString>* result;
|
||||
|
||||
};
|
||||
|
|
|
@ -201,7 +201,6 @@ Document IndexGenerator::GenerateDocument(const QVector<QString> &list)
|
|||
doc.setData(sourcePath);
|
||||
doc.setUniqueTerm(uniqueterm);
|
||||
doc.addValue(list.at(2));
|
||||
if(list.at(2) == QString("1"))
|
||||
QStringList temp;
|
||||
temp.append(index_text);
|
||||
// temp.append(pinyin_text_list);
|
||||
|
@ -219,11 +218,12 @@ Document IndexGenerator::GenerateContentDocument(const QString &path)
|
|||
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(path));
|
||||
QVector<SKeyWord> term = ChineseSegmentation::callSegement(content);
|
||||
Document doc;
|
||||
doc.setData(path);
|
||||
doc.setData(*content);
|
||||
doc.setUniqueTerm(uniqueterm);
|
||||
doc.addValue(path);
|
||||
for(int i = 0;i<term.size();++i)
|
||||
{
|
||||
doc.addterm(term.at(i).word,static_cast<int>(term.at(i).weight));
|
||||
doc.addterm(term.at(i).word,term.at(i).offsets,static_cast<int>(term.at(i).weight));
|
||||
|
||||
}
|
||||
return doc;
|
||||
|
@ -322,6 +322,7 @@ bool IndexGenerator::deleteAllIndex(QStringList *pathlist)
|
|||
{
|
||||
qDebug()<<"--delete start--";
|
||||
m_datebase_path->delete_document(uniqueterm);
|
||||
m_database_content->delete_document(uniqueterm);
|
||||
qDebug()<<"delete md5"<<QString::fromStdString(uniqueterm);
|
||||
m_datebase_path->commit();
|
||||
qDebug()<< "--delete finish--";
|
||||
|
|
|
@ -69,8 +69,8 @@ int main(int argc, char *argv[])
|
|||
/*-------------InotyifyRefact Test End-----------------*/
|
||||
|
||||
/*-------------文本搜索 Test start-----------------*/
|
||||
// FileSearcher *search = new FileSearcher();
|
||||
// search->onKeywordSearchContent("麒麟");
|
||||
FileSearcher *search = new FileSearcher();
|
||||
search->onKeywordSearchContent("测试");
|
||||
/*-------------文本搜索 Test End-----------------*/
|
||||
|
||||
qRegisterMetaType<QVector<QStringList>>("QVector<QStringList>");
|
||||
|
|
Loading…
Reference in New Issue