commit
8b86e40046
|
@ -23,11 +23,14 @@ void Document::setData(QString data)
|
||||||
m_document->set_data(data.toStdString());
|
m_document->set_data(data.toStdString());
|
||||||
}
|
}
|
||||||
|
|
||||||
void Document::addterm(std::string term, int weight)
|
void Document::addterm(std::string term,QVector<size_t> offset, int weight)
|
||||||
{
|
{
|
||||||
if(term == "")
|
if(term == ""||term.length() > 240)
|
||||||
return;
|
return;
|
||||||
m_document->add_term(term,weight);
|
for(size_t i : offset)
|
||||||
|
{
|
||||||
|
m_document->add_posting(term,i,weight);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Document::addValue(QString value)
|
void Document::addValue(QString value)
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
#include <xapian.h>
|
#include <xapian.h>
|
||||||
#include <QString>
|
#include <QString>
|
||||||
#include <QStringList>
|
#include <QStringList>
|
||||||
|
#include <QVector>
|
||||||
|
|
||||||
class Document
|
class Document
|
||||||
{
|
{
|
||||||
|
@ -11,7 +12,7 @@ public:
|
||||||
Document();
|
Document();
|
||||||
~Document();
|
~Document();
|
||||||
void setData(QString data);
|
void setData(QString data);
|
||||||
void addterm(std::string term,int weight =1);
|
void addterm(std::string term, QVector<size_t> offset, int weight =1);
|
||||||
void addValue(QString value);
|
void addValue(QString value);
|
||||||
void setUniqueTerm(QString term);
|
void setUniqueTerm(QString term);
|
||||||
std::string getUniqueTerm();
|
std::string getUniqueTerm();
|
||||||
|
|
|
@ -75,7 +75,7 @@ void FileSearcher::onKeywordSearch(QString keyword, int begin, int num)
|
||||||
|
|
||||||
void FileSearcher::onKeywordSearchContent(QString keyword, int begin, int num)
|
void FileSearcher::onKeywordSearchContent(QString keyword, int begin, int num)
|
||||||
{
|
{
|
||||||
QStringList searchResult;
|
QMap<QString,QStringList> searchResult = QMap<QString,QStringList>();
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
qDebug()<<"--content search start--";
|
qDebug()<<"--content search start--";
|
||||||
|
@ -83,11 +83,11 @@ void FileSearcher::onKeywordSearchContent(QString keyword, int begin, int num)
|
||||||
Xapian::Database db(CONTENT_INDEX_PATH);
|
Xapian::Database db(CONTENT_INDEX_PATH);
|
||||||
Xapian::Enquire enquire(db);
|
Xapian::Enquire enquire(db);
|
||||||
Xapian::QueryParser qp;
|
Xapian::QueryParser qp;
|
||||||
qp.set_default_op(Xapian::Query::OP_PHRASE);
|
// qp.set_default_op(Xapian::Query::OP_PHRASE);
|
||||||
qp.set_database(db);
|
qp.set_database(db);
|
||||||
|
|
||||||
//Creat a query
|
//Creat a query
|
||||||
Xapian::Query queryPhrase = qp.parse_query(keyword.toStdString(),Xapian::QueryParser::FLAG_PHRASE);
|
Xapian::Query queryPhrase = qp.parse_query(keyword.toStdString());
|
||||||
|
|
||||||
qDebug()<<QString::fromStdString(queryPhrase.get_description());
|
qDebug()<<QString::fromStdString(queryPhrase.get_description());
|
||||||
|
|
||||||
|
@ -95,7 +95,7 @@ void FileSearcher::onKeywordSearchContent(QString keyword, int begin, int num)
|
||||||
//dir result
|
//dir result
|
||||||
Xapian::MSet result = enquire.get_mset(begin, begin+num);
|
Xapian::MSet result = enquire.get_mset(begin, begin+num);
|
||||||
qDebug()<< "find results count=" <<static_cast<int>(result.get_matches_estimated());
|
qDebug()<< "find results count=" <<static_cast<int>(result.get_matches_estimated());
|
||||||
searchResult = getResult(result);
|
searchResult = getContentResult(result,keyword);
|
||||||
|
|
||||||
qDebug()<< "--content search finish--";
|
qDebug()<< "--content search finish--";
|
||||||
}
|
}
|
||||||
|
@ -118,6 +118,7 @@ QStringList FileSearcher::getResult(Xapian::MSet &result)
|
||||||
QStringList searchResult = QStringList();
|
QStringList searchResult = QStringList();
|
||||||
if(result.size() == 0)
|
if(result.size() == 0)
|
||||||
return searchResult;
|
return searchResult;
|
||||||
|
|
||||||
for (auto it = result.begin(); it != result.end(); ++it)
|
for (auto it = result.begin(); it != result.end(); ++it)
|
||||||
{
|
{
|
||||||
Xapian::Document doc = it.get_document();
|
Xapian::Document doc = it.get_document();
|
||||||
|
@ -135,10 +136,53 @@ QStringList FileSearcher::getResult(Xapian::MSet &result)
|
||||||
{
|
{
|
||||||
searchResult.append(QString::fromStdString(data));
|
searchResult.append(QString::fromStdString(data));
|
||||||
}
|
}
|
||||||
|
|
||||||
qDebug()<< "doc="<< QString::fromStdString(data) << ",weight=" <<docScoreWeight << ",percent=" << docScorePercent;
|
qDebug()<< "doc="<< QString::fromStdString(data) << ",weight=" <<docScoreWeight << ",percent=" << docScorePercent;
|
||||||
}
|
}
|
||||||
// if(!pathTobeDelete->isEmpty())
|
// if(!pathTobeDelete->isEmpty())
|
||||||
// deleteAllIndex(pathTobeDelete)
|
// deleteAllIndex(pathTobeDelete)
|
||||||
return searchResult;
|
return searchResult;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
QMap<QString,QStringList> FileSearcher::getContentResult(Xapian::MSet &result, QString &keyWord)
|
||||||
|
{
|
||||||
|
//QStringList *pathTobeDelete = new QStringList;
|
||||||
|
//Delete those path doc which is not already exist.
|
||||||
|
|
||||||
|
int size = keyWord.size();
|
||||||
|
QMap<QString,QStringList> searchResult;
|
||||||
|
if(result.size() == 0)
|
||||||
|
return searchResult;
|
||||||
|
|
||||||
|
for (auto it = result.begin(); it != result.end(); ++it)
|
||||||
|
{
|
||||||
|
Xapian::Document doc = it.get_document();
|
||||||
|
std::string data = doc.get_data();
|
||||||
|
double docScoreWeight = it.get_weight();
|
||||||
|
Xapian::percent docScorePercent = it.get_percent();
|
||||||
|
QString path = QString::fromStdString(doc.get_value(1));
|
||||||
|
QFileInfo *info = new QFileInfo(path);
|
||||||
|
|
||||||
|
if(!info->exists())
|
||||||
|
{
|
||||||
|
// pathTobeDelete->append(QString::fromStdString(data));
|
||||||
|
qDebug()<<path<<"is not exist!!";
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Construct snippets containing keyword.
|
||||||
|
QStringList snippets;
|
||||||
|
auto term = doc.termlist_begin();
|
||||||
|
term.skip_to(keyWord.toStdString());
|
||||||
|
for(auto pos = term.positionlist_begin();pos != term.positionlist_end();++pos)
|
||||||
|
{
|
||||||
|
QByteArray snippetByte = QByteArray::fromStdString(data);
|
||||||
|
QString snippet = "..."+QString(snippetByte.left(*pos)).right(size +5) + QString(snippetByte.mid(*pos,-1)).left(size+5) + "...";
|
||||||
|
// qDebug()<<snippet;
|
||||||
|
snippets.append(snippet);
|
||||||
|
}
|
||||||
|
searchResult.insert(path,snippets);
|
||||||
|
qDebug()<< "path="<< path << ",weight=" <<docScoreWeight << ",percent=" << docScorePercent;
|
||||||
|
}
|
||||||
|
// if(!pathTobeDelete->isEmpty())
|
||||||
|
// deleteAllIndex(pathTobeDelete)
|
||||||
|
return searchResult;
|
||||||
|
}
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
#include <xapian.h>
|
#include <xapian.h>
|
||||||
#include <QStandardPaths>
|
#include <QStandardPaths>
|
||||||
#include <QVector>
|
#include <QVector>
|
||||||
|
#include <QMap>
|
||||||
#define INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/index_data").toStdString()
|
#define INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/index_data").toStdString()
|
||||||
#define CONTENT_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/content_index_data").toStdString()
|
#define CONTENT_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/content_index_data").toStdString()
|
||||||
|
|
||||||
|
@ -20,10 +21,11 @@ public Q_SLOTS:
|
||||||
void onKeywordSearchContent(QString keyword, int begin = 0, int num = 20);
|
void onKeywordSearchContent(QString keyword, int begin = 0, int num = 20);
|
||||||
|
|
||||||
Q_SIGNALS:
|
Q_SIGNALS:
|
||||||
void result(QVector<QStringList> resultV);
|
void result(QVector<QStringList> resultP);
|
||||||
void contentResult(QStringList resultL);
|
void contentResult(QMap<QString,QStringList> resultC);
|
||||||
private:
|
private:
|
||||||
QStringList getResult(Xapian::MSet &result);
|
QStringList getResult(Xapian::MSet &result);
|
||||||
|
QMap<QString,QStringList> getContentResult(Xapian::MSet &result,QString &keyWord);
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // FILESEARCHER_H
|
#endif // FILESEARCHER_H
|
||||||
|
|
|
@ -21,7 +21,7 @@ void FileTypeFilter::DoSomething(const QFileInfo& fileInfo){
|
||||||
// qDebug() << qmt.preferredSuffix();
|
// qDebug() << qmt.preferredSuffix();
|
||||||
for (auto i : this->targetFileTypeVec){
|
for (auto i : this->targetFileTypeVec){
|
||||||
if (fileInfo.fileName().endsWith(i)){
|
if (fileInfo.fileName().endsWith(i)){
|
||||||
// qDebug() << fileInfo.fileName();
|
qDebug() << fileInfo.fileName();
|
||||||
this->result->append(fileInfo.absoluteFilePath());
|
this->result->append(fileInfo.absoluteFilePath());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -36,7 +36,7 @@ QList<QString>* FileTypeFilter::getTargetFileAbsolutePath(){
|
||||||
void FileTypeFilter::Test(){
|
void FileTypeFilter::Test(){
|
||||||
IndexGenerator* ig = IndexGenerator::getInstance();
|
IndexGenerator* ig = IndexGenerator::getInstance();
|
||||||
// this->result = new QList<QString>();
|
// this->result = new QList<QString>();
|
||||||
// this->result->append(QString("/home/zpf/桌面/DOCX 文档.docx"));
|
// this->result->append(QString("/home/zpf/桌面/DOCX 文档(1).docx"));
|
||||||
|
|
||||||
ig->creatAllIndex(this->result);
|
ig->creatAllIndex(this->result);
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,13 +18,14 @@ public:
|
||||||
void Test();
|
void Test();
|
||||||
Q_SIGNALS:
|
Q_SIGNALS:
|
||||||
private:
|
private:
|
||||||
const QVector<QString> targetFileTypeVec ={ /*QString(".doc"),*/
|
const QVector<QString> targetFileTypeVec ={
|
||||||
QString(".docx")/*,*/
|
// QString(".doc"),
|
||||||
/*QString(".ppt"),
|
QString(".docx"),
|
||||||
QString(".pptx"),
|
/* QString(".ppt"),
|
||||||
QString(".xls"),
|
QString(".pptx"),
|
||||||
QString(".xlsx"),
|
QString(".xls"),
|
||||||
QString(".txt")*/};
|
QString(".xlsx"),
|
||||||
|
QString(".txt")*/};
|
||||||
QList<QString>* result;
|
QList<QString>* result;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
|
@ -201,7 +201,6 @@ Document IndexGenerator::GenerateDocument(const QVector<QString> &list)
|
||||||
doc.setData(sourcePath);
|
doc.setData(sourcePath);
|
||||||
doc.setUniqueTerm(uniqueterm);
|
doc.setUniqueTerm(uniqueterm);
|
||||||
doc.addValue(list.at(2));
|
doc.addValue(list.at(2));
|
||||||
if(list.at(2) == QString("1"))
|
|
||||||
QStringList temp;
|
QStringList temp;
|
||||||
temp.append(index_text);
|
temp.append(index_text);
|
||||||
// temp.append(pinyin_text_list);
|
// temp.append(pinyin_text_list);
|
||||||
|
@ -219,11 +218,12 @@ Document IndexGenerator::GenerateContentDocument(const QString &path)
|
||||||
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(path));
|
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(path));
|
||||||
QVector<SKeyWord> term = ChineseSegmentation::callSegement(content);
|
QVector<SKeyWord> term = ChineseSegmentation::callSegement(content);
|
||||||
Document doc;
|
Document doc;
|
||||||
doc.setData(path);
|
doc.setData(*content);
|
||||||
doc.setUniqueTerm(uniqueterm);
|
doc.setUniqueTerm(uniqueterm);
|
||||||
|
doc.addValue(path);
|
||||||
for(int i = 0;i<term.size();++i)
|
for(int i = 0;i<term.size();++i)
|
||||||
{
|
{
|
||||||
doc.addterm(term.at(i).word,static_cast<int>(term.at(i).weight));
|
doc.addterm(term.at(i).word,term.at(i).offsets,static_cast<int>(term.at(i).weight));
|
||||||
|
|
||||||
}
|
}
|
||||||
return doc;
|
return doc;
|
||||||
|
@ -322,6 +322,7 @@ bool IndexGenerator::deleteAllIndex(QStringList *pathlist)
|
||||||
{
|
{
|
||||||
qDebug()<<"--delete start--";
|
qDebug()<<"--delete start--";
|
||||||
m_datebase_path->delete_document(uniqueterm);
|
m_datebase_path->delete_document(uniqueterm);
|
||||||
|
m_database_content->delete_document(uniqueterm);
|
||||||
qDebug()<<"delete md5"<<QString::fromStdString(uniqueterm);
|
qDebug()<<"delete md5"<<QString::fromStdString(uniqueterm);
|
||||||
m_datebase_path->commit();
|
m_datebase_path->commit();
|
||||||
qDebug()<< "--delete finish--";
|
qDebug()<< "--delete finish--";
|
||||||
|
|
|
@ -69,8 +69,8 @@ int main(int argc, char *argv[])
|
||||||
/*-------------InotyifyRefact Test End-----------------*/
|
/*-------------InotyifyRefact Test End-----------------*/
|
||||||
|
|
||||||
/*-------------文本搜索 Test start-----------------*/
|
/*-------------文本搜索 Test start-----------------*/
|
||||||
// FileSearcher *search = new FileSearcher();
|
FileSearcher *search = new FileSearcher();
|
||||||
// search->onKeywordSearchContent("麒麟");
|
search->onKeywordSearchContent("测试");
|
||||||
/*-------------文本搜索 Test End-----------------*/
|
/*-------------文本搜索 Test End-----------------*/
|
||||||
|
|
||||||
qRegisterMetaType<QVector<QStringList>>("QVector<QStringList>");
|
qRegisterMetaType<QVector<QStringList>>("QVector<QStringList>");
|
||||||
|
|
Loading…
Reference in New Issue