Try fixing occupied too much memory bug.

This commit is contained in:
zhangpengfei 2021-01-19 10:44:28 +08:00
parent 5de90a7ff6
commit 20efcc67af
12 changed files with 356 additions and 103 deletions

View File

@ -1,9 +1,12 @@
#include "chinese-segmentation.h" #include "chinese-segmentation.h"
#include <QFileInfo> #include <QFileInfo>
static ChineseSegmentation *global_instance = nullptr; #include <QDebug>
static ChineseSegmentation *global_instance_chinese_segmentation = nullptr;
QMutex ChineseSegmentation::m_mutex;
ChineseSegmentation::ChineseSegmentation() ChineseSegmentation::ChineseSegmentation()
{ {
QMutexLocker locker(&m_mutex);
const char * const DICT_PATH = "/usr/share/ukui-search/res/dict/jieba.dict.utf8"; const char * const DICT_PATH = "/usr/share/ukui-search/res/dict/jieba.dict.utf8";
const char * const HMM_PATH = "/usr/share/ukui-search/res/dict/hmm_model.utf8"; const char * const HMM_PATH = "/usr/share/ukui-search/res/dict/hmm_model.utf8";
const char * const USER_DICT_PATH ="/usr/share/ukui-search/res/dict/user.dict.utf8"; const char * const USER_DICT_PATH ="/usr/share/ukui-search/res/dict/user.dict.utf8";
@ -21,20 +24,21 @@ ChineseSegmentation::~ChineseSegmentation()
{ {
if(m_jieba) if(m_jieba)
delete m_jieba; delete m_jieba;
m_jieba = nullptr;
} }
ChineseSegmentation *ChineseSegmentation::getInstance() ChineseSegmentation *ChineseSegmentation::getInstance()
{ {
if (!global_instance) { if (!global_instance_chinese_segmentation) {
global_instance = new ChineseSegmentation; global_instance_chinese_segmentation = new ChineseSegmentation;
} }
return global_instance; return global_instance_chinese_segmentation;
} }
QVector<SKeyWord> ChineseSegmentation::callSegement(QString *str) QVector<SKeyWord> ChineseSegmentation::callSegement(QString& str)
{ {
std::string s; std::string s;
s=str->toStdString(); s=str.toStdString();
const size_t topk = -1; const size_t topk = -1;
std::vector<cppjieba::KeywordExtractor::Word> keywordres; std::vector<cppjieba::KeywordExtractor::Word> keywordres;

View File

@ -10,6 +10,7 @@
#include <QVector> #include <QVector>
#include <QString> #include <QString>
#include <QDebug> #include <QDebug>
#include <QMutex>
struct SKeyWord{ struct SKeyWord{
std::string word; std::string word;
@ -21,12 +22,14 @@ class CHINESESEGMENTATION_EXPORT ChineseSegmentation
{ {
public: public:
static ChineseSegmentation *getInstance(); static ChineseSegmentation *getInstance();
QVector<SKeyWord> callSegement(QString *str); ~ChineseSegmentation();
QVector<SKeyWord> callSegement(QString &str);
void convert(std::vector<cppjieba::KeywordExtractor::Word>& keywordres,QVector<SKeyWord>& kw); void convert(std::vector<cppjieba::KeywordExtractor::Word>& keywordres,QVector<SKeyWord>& kw);
private: private:
static QMutex m_mutex;
cppjieba::Jieba *m_jieba; cppjieba::Jieba *m_jieba;
explicit ChineseSegmentation(); explicit ChineseSegmentation();
~ChineseSegmentation();
}; };
#endif // CHINESESEGMENTATION_H #endif // CHINESESEGMENTATION_H

View File

@ -0,0 +1,103 @@
#include "construct-document.h"
#include "file-utils.h"
#include "chinese-segmentation.h"
#include <QDebug>
#include <QThread>
//extern QList<Document> *_doc_list_path;
//extern QMutex _mutex_doc_list_path;
ConstructDocumentForPath::ConstructDocumentForPath(QVector<QString> list, IndexGenerator *parent)
{
this->setAutoDelete(true);
m_list = std::move(list);
}
ConstructDocumentForPath::~ConstructDocumentForPath()
{
}
void ConstructDocumentForPath::run()
{
// qDebug()<<"ConstructDocumentForPath";
if (!_doc_list_path)
_doc_list_path = new QList<Document>;
// qDebug()<<_doc_list_path->size();
QString index_text = m_list.at(0);
QString sourcePath = m_list.at(1);
index_text = index_text.replace(""," ");
index_text = index_text.simplified();
//不带多音字版
// QString pinyin_text = FileUtils::find(QString(list.at(0)).replace(".","")).replace("", " ").simplified();
//多音字版
//现加入首字母
QStringList pinyin_text_list = FileUtils::findMultiToneWords(QString(m_list.at(0)).replace(".",""));
for (QString& i : pinyin_text_list){
i.replace("", " ");
i = i.simplified();
}
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(sourcePath));
QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(sourcePath.section("/",0,-2,QString::SectionIncludeLeadingSep)));
Document doc;
doc.setData(sourcePath);
doc.setUniqueTerm(uniqueterm);
doc.addTerm(upTerm);
doc.addValue(m_list.at(2));
QStringList temp;
temp.append(index_text);
temp.append(pinyin_text_list);
doc.setIndexText(temp);
// QMetaObject::invokeMethod(m_indexGenerator,"appendDocListPath",Q_ARG(Document,doc));
_mutex_doc_list_path.lock();
_doc_list_path->append(doc);
_mutex_doc_list_path.unlock();
// qDebug()<<"ConstructDocumentForPath finish";
return;
}
ConstructDocumentForContent::ConstructDocumentForContent(QString path,QObject *parent)
{
this->setAutoDelete(true);
m_path = std::move(path);
}
ConstructDocumentForContent::~ConstructDocumentForContent()
{
}
void ConstructDocumentForContent::run()
{
qDebug() << "ConstructDocumentForContent currentThreadId()" << QThread::currentThreadId();
// 构造文本索引的document
if (!_doc_list_content)
_doc_list_content = new QList<Document>;
QString content;
FileReader::getTextContent(m_path,content);
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(m_path));
QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(m_path.section("/",0,-2,QString::SectionIncludeLeadingSep)));
QVector<SKeyWord> term = ChineseSegmentation::getInstance()->callSegement(content);
Document doc;
doc.setData(content);
doc.setUniqueTerm(uniqueterm);
doc.addTerm(upTerm);
doc.addValue(m_path);
for(int i = 0;i<term.size();++i)
{
doc.addPosting(term.at(i).word,term.at(i).offsets,static_cast<int>(term.at(i).weight));
}
_mutex_doc_list_content.lock();
_doc_list_content->append(doc);
_mutex_doc_list_content.unlock();
content.clear();
term.clear();
return;
}

View File

@ -0,0 +1,39 @@
#ifndef CONSTRUCTDOCUMENT_H
#define CONSTRUCTDOCUMENT_H
#include <QRunnable>
#include <QVector>
//#include <QMetaObject>
#include "document.h"
#include "index-generator.h"
//extern QList<Document> *_doc_list_path;
//extern QMutex _mutex_doc_list_path;
class IndexGenerator;
class ConstructDocumentForPath : public QRunnable
{
public:
explicit ConstructDocumentForPath(QVector<QString> list,IndexGenerator *parent = nullptr);
~ConstructDocumentForPath();
protected:
void run();
private:
QVector<QString> m_list;
};
class ConstructDocumentForContent : public QObject, public QRunnable
{
Q_OBJECT
public:
explicit ConstructDocumentForContent(QString path,QObject *parent = nullptr);
~ConstructDocumentForContent();
protected:
void run();
private:
QString m_path;
};
#endif // CONSTRUCTDOCUMENT_H

View File

@ -3,24 +3,13 @@
Document::Document() Document::Document()
{ {
m_document = new Xapian::Document;
}
Document::~Document()
{
// if(m_document)
// delete m_document;
// if(m_index_text)
// delete m_index_text;
// if(m_unique_term)
// delete m_unique_term;
} }
void Document::setData(QString data) void Document::setData(QString data)
{ {
if(data.isEmpty()) if(data.isEmpty())
return; return;
m_document->set_data(data.toStdString()); m_document.set_data(data.toStdString());
} }
void Document::addPosting(std::string term,QVector<size_t> offset, int weight) void Document::addPosting(std::string term,QVector<size_t> offset, int weight)
@ -32,7 +21,7 @@ void Document::addPosting(std::string term,QVector<size_t> offset, int weight)
for(size_t i : offset) for(size_t i : offset)
{ {
m_document->add_posting(term,i,weight); m_document.add_posting(term,i,weight);
} }
} }
@ -40,40 +29,37 @@ void Document::addTerm(QString term)
{ {
if(term.isEmpty()) if(term.isEmpty())
return; return;
m_document->add_term(term.toStdString()); m_document.add_term(term.toStdString());
} }
void Document::addValue(QString value) void Document::addValue(QString value)
{ {
m_document->add_value(1,value.toStdString()); m_document.add_value(1,value.toStdString());
} }
void Document::setUniqueTerm(QString term) void Document::setUniqueTerm(QString term)
{ {
if(term.isEmpty()) if(term.isEmpty())
return; return;
m_document->add_term(term.toStdString()); m_document.add_term(term.toStdString());
m_unique_term = std::move(term);
m_unique_term = new QString(term);
} }
std::string Document::getUniqueTerm() std::string Document::getUniqueTerm()
{ {
// qDebug()<<"m_unique_term!"<<*m_unique_term; return m_unique_term.toStdString();
return m_unique_term->toStdString();
} }
void Document::setIndexText(QStringList indexText) void Document::setIndexText(QStringList indexText)
{ {
// QStringList indexTextList = indexText; m_index_text = std::move(indexText);
m_index_text = new QStringList(indexText);
} }
QStringList Document::getIndexText() QStringList Document::getIndexText()
{ {
return *m_index_text; return m_index_text;
} }
Xapian::Document Document::getXapianDocument() Xapian::Document Document::getXapianDocument()
{ {
return *m_document; return m_document;
} }

View File

@ -10,7 +10,17 @@ class Document
{ {
public: public:
Document(); Document();
~Document(); ~Document(){}
Document(const Document& other){
m_document = other.m_document;
m_index_text = other.m_index_text;
m_unique_term = other.m_unique_term;
}
void operator=(const Document& other){
m_document = other.m_document;
m_index_text = other.m_index_text;
m_unique_term = other.m_unique_term;
}
void setData(QString data); void setData(QString data);
void addPosting(std::string term, QVector<size_t> offset, int weight =1); void addPosting(std::string term, QVector<size_t> offset, int weight =1);
void addTerm(QString term); void addTerm(QString term);
@ -21,9 +31,9 @@ public:
QStringList getIndexText(); QStringList getIndexText();
Xapian::Document getXapianDocument(); Xapian::Document getXapianDocument();
private: private:
Xapian::Document *m_document; Xapian::Document m_document;
QStringList *m_index_text; QStringList m_index_text;
QString *m_unique_term; QString m_unique_term;
}; };

View File

@ -21,10 +21,18 @@ FileSearcher::~FileSearcher()
} }
int FileSearcher::getCurrentIndexCount() int FileSearcher::getCurrentIndexCount()
{
try
{ {
Xapian::Database db(INDEX_PATH); Xapian::Database db(INDEX_PATH);
return db.get_doccount(); return db.get_doccount();
} }
catch(const Xapian::Error &e)
{
qWarning() <<QString::fromStdString(e.get_description());
return 0;
}
}
void FileSearcher::onKeywordSearch(QString keyword,QQueue<QString> *searchResultFile,QQueue<QString> *searchResultDir,QQueue<QPair<QString,QStringList>> *searchResultContent) void FileSearcher::onKeywordSearch(QString keyword,QQueue<QString> *searchResultFile,QQueue<QString> *searchResultDir,QQueue<QPair<QString,QStringList>> *searchResultContent)
{ {
@ -154,7 +162,7 @@ int FileSearcher::keywordSearchContent(size_t uniqueSymbol, QString keyword, int
qp.set_default_op(Xapian::Query::OP_AND); qp.set_default_op(Xapian::Query::OP_AND);
qp.set_database(db); qp.set_database(db);
QVector<SKeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegement(&keyword); QVector<SKeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegement(keyword);
//Creat a query //Creat a query
std::string words; std::string words;
for(int i=0;i<sKeyWord.size();i++) for(int i=0;i<sKeyWord.size();i++)
@ -199,8 +207,9 @@ Xapian::Query FileSearcher::creatQueryForFileSearch(QString keyword, Xapian::Dat
qp.set_default_op(Xapian::Query::OP_PHRASE); qp.set_default_op(Xapian::Query::OP_PHRASE);
qp.set_database(db); qp.set_database(db);
auto userInput = keyword; auto userInput = keyword;
userInput = userInput.replace(".","").simplified();
std::string queryStr = keyword.replace(""," ").toStdString(); std::string queryStr = keyword.replace(".","").replace(" ","").replace(""," ").simplified().toStdString();
// std::string s =db.get_spelling_suggestion(queryStr,10); // std::string s =db.get_spelling_suggestion(queryStr,10);
// qDebug()<<"spelling_suggestion!"<<QString::fromStdString(s); // qDebug()<<"spelling_suggestion!"<<QString::fromStdString(s);

View File

@ -55,8 +55,10 @@ FirstIndex::FirstIndex(const QString& path) : Traverse_BFS(path)
FirstIndex::~FirstIndex() FirstIndex::~FirstIndex()
{ {
qDebug() << "~FirstIndex"; qDebug() << "~FirstIndex";
if(this->q_index)
delete this->q_index; delete this->q_index;
this->q_index = nullptr; this->q_index = nullptr;
if(this->q_content_index)
delete this->q_content_index; delete this->q_content_index;
this->q_content_index = nullptr; this->q_content_index = nullptr;
// delete this->p_indexGenerator; // delete this->p_indexGenerator;
@ -66,7 +68,7 @@ FirstIndex::~FirstIndex()
} }
void FirstIndex::DoSomething(const QFileInfo& fileInfo){ void FirstIndex::DoSomething(const QFileInfo& fileInfo){
// qDebug() << "there are some shit here"; // qDebug() << "there are some shit here"<<fileInfo.fileName() << fileInfo.absoluteFilePath() << QString(fileInfo.isDir() ? "1" : "0");
this->q_index->enqueue(QVector<QString>() << fileInfo.fileName() << fileInfo.absoluteFilePath() << QString(fileInfo.isDir() ? "1" : "0")); this->q_index->enqueue(QVector<QString>() << fileInfo.fileName() << fileInfo.absoluteFilePath() << QString(fileInfo.isDir() ? "1" : "0"));
for (auto i : this->targetFileTypeVec){ for (auto i : this->targetFileTypeVec){
if (fileInfo.fileName().endsWith(i)){ if (fileInfo.fileName().endsWith(i)){
@ -89,11 +91,11 @@ void FirstIndex::run(){
else{ else{
//if the parameter is false, index won't be rebuild //if the parameter is false, index won't be rebuild
//if it is true, index will be rebuild //if it is true, index will be rebuild
this->p_indexGenerator = IndexGenerator::getInstance(true); this->p_indexGenerator = IndexGenerator::getInstance(true,this);
} }
} }
else{ else{
this->p_indexGenerator = IndexGenerator::getInstance(false); this->p_indexGenerator = IndexGenerator::getInstance(false,this);
} }
QSemaphore sem(5); QSemaphore sem(5);
QMutex mutex1, mutex2, mutex3; QMutex mutex1, mutex2, mutex3;

View File

@ -2,27 +2,32 @@
#include <QStandardPaths> #include <QStandardPaths>
#include <QFileInfo> #include <QFileInfo>
#include <QDebug> #include <QDebug>
#include "chinese-segmentation.h" #include <QtConcurrent>
#include <QFuture>
#include <QThreadPool>
#include "file-utils.h" #include "file-utils.h"
#include "index-generator.h" #include "index-generator.h"
#include "global-settings.h" #include "global-settings.h"
#include "chinese-segmentation.h"
#include "construct-document.h"
#include <QtConcurrent>
#include <QFuture>
using namespace std; using namespace std;
#define INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/index_data").toStdString() #define INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/index_data").toStdString()
#define CONTENT_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/content_index_data").toStdString() #define CONTENT_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/content_index_data").toStdString()
static IndexGenerator *global_instance = nullptr; static IndexGenerator *global_instance = nullptr;
QMutex IndexGenerator::m_mutex; QMutex IndexGenerator::m_mutex;
QList<Document> *_doc_list_path;
IndexGenerator *IndexGenerator::getInstance(bool rebuild) QMutex _mutex_doc_list_path;
QList<Document> *_doc_list_content;
QMutex _mutex_doc_list_content;
IndexGenerator *IndexGenerator::getInstance(bool rebuild, QObject *parent)
{ {
QMutexLocker locker(&m_mutex); QMutexLocker locker(&m_mutex);
if (!global_instance) { if (!global_instance) {
global_instance = new IndexGenerator(rebuild); qDebug()<<"IndexGenerator=================";
global_instance = new IndexGenerator(rebuild,parent);
} }
qDebug() << "global_instance" << global_instance; qDebug() << "global_instance" << global_instance;
qDebug() << "QThread::currentThreadId()" << QThread::currentThreadId(); qDebug() << "QThread::currentThreadId()" << QThread::currentThreadId();
@ -41,23 +46,25 @@ bool IndexGenerator::creatAllIndex(QQueue<QVector<QString> > *messageList)
try try
{ {
m_indexer = new Xapian::TermGenerator(); m_indexer = new Xapian::TermGenerator();
m_indexer->set_database(*m_datebase_path); m_indexer->set_database(*m_database_path);
//可以实现拼写纠正 //可以实现拼写纠正
// m_indexer->set_flags(Xapian::TermGenerator::FLAG_SPELLING); // m_indexer->set_flags(Xapian::TermGenerator::FLAG_SPELLING);
m_indexer->set_stemming_strategy(Xapian::TermGenerator::STEM_SOME); m_indexer->set_stemming_strategy(Xapian::TermGenerator::STEM_SOME);
int count =0; int count =0;
for(int i = 0;i < m_doc_list_path->size(); i++) for(int i = 0;i < _doc_list_path->size(); i++)
{ {
insertIntoDatabase(m_doc_list_path->at(i)); insertIntoDatabase(_doc_list_path->at(i));
if(++count == 9999) if(++count == 9000)
{ {
count = 0; count = 0;
m_datebase_path->commit(); m_database_path->commit();
} }
} }
m_datebase_path->commit(); m_database_path->commit();
if(m_indexer)
delete m_indexer;
} }
catch(const Xapian::Error &e) catch(const Xapian::Error &e)
{ {
@ -66,8 +73,10 @@ bool IndexGenerator::creatAllIndex(QQueue<QVector<QString> > *messageList)
GlobalSettings::getInstance()->setValue(INDEX_DATABASE_STATE,"1"); GlobalSettings::getInstance()->setValue(INDEX_DATABASE_STATE,"1");
assert(false); assert(false);
} }
m_doc_list_path->clear(); _doc_list_path->clear();
Q_EMIT this->transactionFinished(); delete _doc_list_path;
_doc_list_path = nullptr;
// Q_EMIT this->transactionFinished();
return true; return true;
} }
@ -76,12 +85,16 @@ bool IndexGenerator::creatAllIndex(QQueue<QString> *messageList)
{ {
FileUtils::_index_status = CREATING_INDEX; FileUtils::_index_status = CREATING_INDEX;
HandlePathList(messageList); HandlePathList(messageList);
int size = _doc_list_content->size();
if(!size == 0)
{
try try
{ {
int count =0; int count =0;
for(int i = 0;i < m_doc_list_content->size(); i++) for(int i = 0;i < size; i++)
{ {
insertIntoContentDatabase(m_doc_list_content->at(i)); insertIntoContentDatabase(_doc_list_content->at(0));
_doc_list_content->removeFirst();
if(++count == 1000) if(++count == 1000)
{ {
@ -97,8 +110,10 @@ bool IndexGenerator::creatAllIndex(QQueue<QString> *messageList)
GlobalSettings::getInstance()->setValue(CONTENT_INDEX_DATABASE_STATE,"1"); GlobalSettings::getInstance()->setValue(CONTENT_INDEX_DATABASE_STATE,"1");
assert(false); assert(false);
} }
m_doc_list_content->clear(); delete _doc_list_content;
Q_EMIT this->transactionFinished(); _doc_list_content = nullptr;
}
// Q_EMIT this->transactionFinished();
FileUtils::_index_status = FINISH_CREATING_INDEX; FileUtils::_index_status = FINISH_CREATING_INDEX;
return true; return true;
@ -108,12 +123,12 @@ IndexGenerator::IndexGenerator(bool rebuild, QObject *parent) : QObject(parent)
{ {
if(rebuild) if(rebuild)
{ {
m_datebase_path = new Xapian::WritableDatabase(INDEX_PATH, Xapian::DB_CREATE_OR_OVERWRITE); m_database_path = new Xapian::WritableDatabase(INDEX_PATH, Xapian::DB_CREATE_OR_OVERWRITE);
m_database_content = new Xapian::WritableDatabase(CONTENT_INDEX_PATH, Xapian::DB_CREATE_OR_OVERWRITE); m_database_content = new Xapian::WritableDatabase(CONTENT_INDEX_PATH, Xapian::DB_CREATE_OR_OVERWRITE);
} }
else else
{ {
m_datebase_path = new Xapian::WritableDatabase(INDEX_PATH, Xapian::DB_CREATE_OR_OPEN); m_database_path = new Xapian::WritableDatabase(INDEX_PATH, Xapian::DB_CREATE_OR_OPEN);
m_database_content = new Xapian::WritableDatabase(CONTENT_INDEX_PATH, Xapian::DB_CREATE_OR_OPEN); m_database_content = new Xapian::WritableDatabase(CONTENT_INDEX_PATH, Xapian::DB_CREATE_OR_OPEN);
} }
GlobalSettings::getInstance()->setValue(INDEX_DATABASE_STATE,"0"); GlobalSettings::getInstance()->setValue(INDEX_DATABASE_STATE,"0");
@ -124,10 +139,29 @@ IndexGenerator::~IndexGenerator()
{ {
QMutexLocker locker(&m_mutex); QMutexLocker locker(&m_mutex);
qDebug() << "~IndexGenerator"; qDebug() << "~IndexGenerator";
if(m_datebase_path) m_database_path->close();
delete m_datebase_path; m_database_content->close();
if(m_database_path)
delete m_database_path;
m_database_path = nullptr;
if(m_database_content) if(m_database_content)
delete m_database_content; delete m_database_content;
m_database_content = nullptr;
if(m_index_map)
delete m_index_map;
m_index_map = nullptr;
// if(m_doc_list_path)
// delete m_doc_list_path;
// m_doc_list_path = nullptr;
// if(_doc_list_content)
// delete m_doc_list_content;
// m_doc_list_content = nullptr;
if(m_index_data_path)
delete m_index_data_path;
m_index_data_path = nullptr;
if(m_indexer)
delete m_indexer;
m_indexer = nullptr;
GlobalSettings::getInstance()->setValue(INDEX_DATABASE_STATE, "2"); GlobalSettings::getInstance()->setValue(INDEX_DATABASE_STATE, "2");
GlobalSettings::getInstance()->setValue(CONTENT_INDEX_DATABASE_STATE, "2"); GlobalSettings::getInstance()->setValue(CONTENT_INDEX_DATABASE_STATE, "2");
GlobalSettings::getInstance()->setValue(INDEX_GENERATOR_NORMAL_EXIT, "2"); GlobalSettings::getInstance()->setValue(INDEX_GENERATOR_NORMAL_EXIT, "2");
@ -147,7 +181,7 @@ void IndexGenerator::insertIntoDatabase(Document doc)
m_indexer->index_text(i.toStdString()); m_indexer->index_text(i.toStdString());
} }
Xapian::docid innerId= m_datebase_path->replace_document(doc.getUniqueTerm(),document); Xapian::docid innerId= m_database_path->replace_document(doc.getUniqueTerm(),document);
// qDebug()<<"replace doc docid="<<static_cast<int>(innerId); // qDebug()<<"replace doc docid="<<static_cast<int>(innerId);
// qDebug()<< "--index finish--"; // qDebug()<< "--index finish--";
return; return;
@ -166,13 +200,30 @@ void IndexGenerator::HandlePathList(QQueue<QVector<QString>> *messageList)
qDebug()<<"Begin HandlePathList!"; qDebug()<<"Begin HandlePathList!";
qDebug()<<messageList->size(); qDebug()<<messageList->size();
// qDebug()<<QString::number(quintptr(QThread::currentThreadId())); // qDebug()<<QString::number(quintptr(QThread::currentThreadId()));
QFuture<Document> future = QtConcurrent::mapped(*messageList,&IndexGenerator::GenerateDocument); // QFuture<Document> future = QtConcurrent::mapped(*messageList,&IndexGenerator::GenerateDocument);
future.waitForFinished(); // future.waitForFinished();
QList<Document> docList = future.results(); // QList<Document> docList = future.results();
m_doc_list_path = new QList<Document>(docList); // future.cancel();
qDebug()<<m_doc_list_path->size(); // m_doc_list_path = new QList<Document>(docList);
QThreadPool pool;
// pool.setMaxThreadCount(1);
ConstructDocumentForPath *constructer;
while(!messageList->isEmpty())
{
constructer = new ConstructDocumentForPath(messageList->dequeue());
pool.start(constructer);
}
// while(!pool.waitForDone(1))
// qDebug()<<"fuck"<<pool.waitForDone(1);
qDebug()<<"pool finish"<<pool.waitForDone(-1);
// if(constructer)
// delete constructer;
// constructer = nullptr;
qDebug()<<_doc_list_path->size();
qDebug()<<"Finish HandlePathList!"; qDebug()<<"Finish HandlePathList!";
return; return;
@ -183,13 +234,31 @@ void IndexGenerator::HandlePathList(QQueue<QString> *messageList)
qDebug()<<"Begin HandlePathList for content index!"; qDebug()<<"Begin HandlePathList for content index!";
qDebug()<<messageList->size(); qDebug()<<messageList->size();
// qDebug()<<QString::number(quintptr(QThread::currentThreadId())); // qDebug()<<QString::number(quintptr(QThread::currentThreadId()));
QFuture<Document> future = QtConcurrent::mapped(*messageList,&IndexGenerator::GenerateContentDocument); ChineseSegmentation::getInstance();
ConstructDocumentForContent *constructer;
QThreadPool pool;
// pool.setMaxThreadCount(2);
pool.setExpiryTimeout(1000);
while(!messageList->isEmpty())
{
constructer = new ConstructDocumentForContent(messageList->dequeue());
pool.start(constructer);
}
// while(!pool.waitForDone(1))
// qDebug()<<"fuck"<<pool.waitForDone(1);
qDebug()<<"pool finish"<<pool.waitForDone(-1);
// if(constructer)
// delete constructer;
// constructer = nullptr;
future.waitForFinished(); // QFuture<Document> future = QtConcurrent::mapped(*messageList,&IndexGenerator::GenerateContentDocument);
QList<Document> docList = future.results(); // future.waitForFinished();
m_doc_list_content = new QList<Document>(docList); // ChineseSegmentation::getInstance()->~ChineseSegmentation();
qDebug()<<m_doc_list_content->size();
// QList<Document> docList = future.results();
// m_doc_list_content = new QList<Document>(docList);
qDebug()<<_doc_list_content->size();
qDebug()<<"Finish HandlePathList for content index!"; qDebug()<<"Finish HandlePathList for content index!";
return; return;
@ -202,7 +271,7 @@ Document IndexGenerator::GenerateDocument(const QVector<QString> &list)
//0-filename 1-filepathname 2-file or dir //0-filename 1-filepathname 2-file or dir
QString index_text = list.at(0); QString index_text = list.at(0);
QString sourcePath = list.at(1); QString sourcePath = list.at(1);
index_text = index_text.replace(".","").replace(""," "); index_text = index_text.replace(""," ");
index_text = index_text.simplified(); index_text = index_text.simplified();
//不带多音字版 //不带多音字版
@ -249,7 +318,9 @@ Document IndexGenerator::GenerateContentDocument(const QString &path)
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(path)); QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(path));
QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(path.section("/",0,-2,QString::SectionIncludeLeadingSep))); QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(path.section("/",0,-2,QString::SectionIncludeLeadingSep)));
QVector<SKeyWord> term = ChineseSegmentation::getInstance()->callSegement(&content); QVector<SKeyWord> term = ChineseSegmentation::getInstance()->callSegement(content);
// QStringList term = content.split("");
Document doc; Document doc;
doc.setData(content); doc.setData(content);
doc.setUniqueTerm(uniqueterm); doc.setUniqueTerm(uniqueterm);
@ -260,6 +331,20 @@ Document IndexGenerator::GenerateContentDocument(const QString &path)
doc.addPosting(term.at(i).word,term.at(i).offsets,static_cast<int>(term.at(i).weight)); doc.addPosting(term.at(i).word,term.at(i).offsets,static_cast<int>(term.at(i).weight));
} }
// Document doc;
// doc.setData(content);
// doc.setUniqueTerm(uniqueterm);
// doc.addTerm(upTerm);
// doc.addValue(path);
// int pos = 0;
// for(QString i : term)
// {
// doc.addPosting(i.toStdString(),QVector<size_t>() << ++pos,1);
// }
content.clear();
term.clear();
return doc; return doc;
} }
@ -267,6 +352,7 @@ bool IndexGenerator::isIndexdataExist()
{ {
// Xapian::Database db(m_index_data_path->toStdString()); // Xapian::Database db(m_index_data_path->toStdString());
return true;
} }
@ -355,11 +441,11 @@ bool IndexGenerator::deleteAllIndex(QStringList *pathlist)
try try
{ {
qDebug()<<"--delete start--"; qDebug()<<"--delete start--";
m_datebase_path->delete_document(uniqueterm); m_database_path->delete_document(uniqueterm);
m_database_content->delete_document(uniqueterm); m_database_content->delete_document(uniqueterm);
qDebug()<<"delete path"<<doc; qDebug()<<"delete path"<<doc;
qDebug()<<"delete md5"<<QString::fromStdString(uniqueterm); qDebug()<<"delete md5"<<QString::fromStdString(uniqueterm);
m_datebase_path->commit(); m_database_path->commit();
qDebug()<< "--delete finish--"; qDebug()<< "--delete finish--";
} }
catch(const Xapian::Error &e) catch(const Xapian::Error &e)

View File

@ -9,17 +9,25 @@
#include <QCryptographicHash> #include <QCryptographicHash>
#include <QMutex> #include <QMutex>
#include <QQueue> #include <QQueue>
//#include <QMetaObject>
#include "document.h" #include "document.h"
#include "file-reader.h" #include "file-reader.h"
//#include "chinese-segmentation.h"
extern QList<Document> *_doc_list_path;
extern QMutex _mutex_doc_list_path;
extern QList<Document> *_doc_list_content;
extern QMutex _mutex_doc_list_content;
class IndexGenerator : public QObject class IndexGenerator : public QObject
{ {
Q_OBJECT Q_OBJECT
public: public:
static IndexGenerator *getInstance(bool rebuild = false); static IndexGenerator *getInstance(bool rebuild = false,QObject *parent = nullptr);
~IndexGenerator(); ~IndexGenerator();
bool setIndexdataPath(); bool setIndexdataPath();
bool isIndexdataExist(); bool isIndexdataExist();
// Q_INVOKABLE void appendDocListPath(Document doc);
//for search test //for search test
static QStringList IndexSearch(QString indexText); static QStringList IndexSearch(QString indexText);
Q_SIGNALS: Q_SIGNALS:
@ -44,10 +52,10 @@ private:
void insertIntoContentDatabase(Document doc); void insertIntoContentDatabase(Document doc);
QMap<QString,QStringList> *m_index_map; QMap<QString,QStringList> *m_index_map;
QList<Document> *m_doc_list_path; //for path index // QList<Document> *m_doc_list_path; //for path index
QList<Document> *m_doc_list_content; // for text content index // QList<Document> *m_doc_list_content; // for text content index
QString *m_index_data_path; QString *m_index_data_path;
Xapian::WritableDatabase *m_datebase_path; Xapian::WritableDatabase *m_database_path;
Xapian::WritableDatabase *m_database_content; Xapian::WritableDatabase *m_database_content;
std::string m_docstr; std::string m_docstr;
std::string m_index_text_str; std::string m_index_text_str;

View File

@ -1,6 +1,7 @@
INCLUDEPATH += $$PWD INCLUDEPATH += $$PWD
HEADERS += \ HEADERS += \
$$PWD/construct-document.h \
$$PWD/document.h \ $$PWD/document.h \
$$PWD/file-reader.h \ $$PWD/file-reader.h \
$$PWD/first-index.h \ $$PWD/first-index.h \
@ -11,6 +12,7 @@ HEADERS += \
$$PWD/ukui-search-qdbus.h $$PWD/ukui-search-qdbus.h
SOURCES += \ SOURCES += \
$$PWD/construct-document.cpp \
$$PWD/document.cpp \ $$PWD/document.cpp \
$$PWD/file-reader.cpp \ $$PWD/file-reader.cpp \
$$PWD/first-index.cpp \ $$PWD/first-index.cpp \

View File

@ -92,6 +92,7 @@ int main(int argc, char *argv[])
{ {
qInstallMessageHandler(messageOutput); qInstallMessageHandler(messageOutput);
qRegisterMetaType<QPair<QString,QStringList>>("QPair<QString,QStringList>"); qRegisterMetaType<QPair<QString,QStringList>>("QPair<QString,QStringList>");
qRegisterMetaType<Document>("Document");
QApplication::setAttribute(Qt::AA_EnableHighDpiScaling); QApplication::setAttribute(Qt::AA_EnableHighDpiScaling);
QApplication::setAttribute(Qt::AA_UseHighDpiPixmaps); QApplication::setAttribute(Qt::AA_UseHighDpiPixmaps);