Try fixing occupied too much memory bug.
This commit is contained in:
parent
5de90a7ff6
commit
20efcc67af
|
@ -1,9 +1,12 @@
|
||||||
#include "chinese-segmentation.h"
|
#include "chinese-segmentation.h"
|
||||||
#include <QFileInfo>
|
#include <QFileInfo>
|
||||||
static ChineseSegmentation *global_instance = nullptr;
|
#include <QDebug>
|
||||||
|
static ChineseSegmentation *global_instance_chinese_segmentation = nullptr;
|
||||||
|
QMutex ChineseSegmentation::m_mutex;
|
||||||
|
|
||||||
ChineseSegmentation::ChineseSegmentation()
|
ChineseSegmentation::ChineseSegmentation()
|
||||||
{
|
{
|
||||||
|
QMutexLocker locker(&m_mutex);
|
||||||
const char * const DICT_PATH = "/usr/share/ukui-search/res/dict/jieba.dict.utf8";
|
const char * const DICT_PATH = "/usr/share/ukui-search/res/dict/jieba.dict.utf8";
|
||||||
const char * const HMM_PATH = "/usr/share/ukui-search/res/dict/hmm_model.utf8";
|
const char * const HMM_PATH = "/usr/share/ukui-search/res/dict/hmm_model.utf8";
|
||||||
const char * const USER_DICT_PATH ="/usr/share/ukui-search/res/dict/user.dict.utf8";
|
const char * const USER_DICT_PATH ="/usr/share/ukui-search/res/dict/user.dict.utf8";
|
||||||
|
@ -21,20 +24,21 @@ ChineseSegmentation::~ChineseSegmentation()
|
||||||
{
|
{
|
||||||
if(m_jieba)
|
if(m_jieba)
|
||||||
delete m_jieba;
|
delete m_jieba;
|
||||||
|
m_jieba = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
ChineseSegmentation *ChineseSegmentation::getInstance()
|
ChineseSegmentation *ChineseSegmentation::getInstance()
|
||||||
{
|
{
|
||||||
if (!global_instance) {
|
if (!global_instance_chinese_segmentation) {
|
||||||
global_instance = new ChineseSegmentation;
|
global_instance_chinese_segmentation = new ChineseSegmentation;
|
||||||
}
|
}
|
||||||
return global_instance;
|
return global_instance_chinese_segmentation;
|
||||||
}
|
}
|
||||||
|
|
||||||
QVector<SKeyWord> ChineseSegmentation::callSegement(QString *str)
|
QVector<SKeyWord> ChineseSegmentation::callSegement(QString& str)
|
||||||
{
|
{
|
||||||
std::string s;
|
std::string s;
|
||||||
s=str->toStdString();
|
s=str.toStdString();
|
||||||
|
|
||||||
const size_t topk = -1;
|
const size_t topk = -1;
|
||||||
std::vector<cppjieba::KeywordExtractor::Word> keywordres;
|
std::vector<cppjieba::KeywordExtractor::Word> keywordres;
|
||||||
|
|
|
@ -10,6 +10,7 @@
|
||||||
#include <QVector>
|
#include <QVector>
|
||||||
#include <QString>
|
#include <QString>
|
||||||
#include <QDebug>
|
#include <QDebug>
|
||||||
|
#include <QMutex>
|
||||||
|
|
||||||
struct SKeyWord{
|
struct SKeyWord{
|
||||||
std::string word;
|
std::string word;
|
||||||
|
@ -21,12 +22,14 @@ class CHINESESEGMENTATION_EXPORT ChineseSegmentation
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
static ChineseSegmentation *getInstance();
|
static ChineseSegmentation *getInstance();
|
||||||
QVector<SKeyWord> callSegement(QString *str);
|
~ChineseSegmentation();
|
||||||
|
QVector<SKeyWord> callSegement(QString &str);
|
||||||
void convert(std::vector<cppjieba::KeywordExtractor::Word>& keywordres,QVector<SKeyWord>& kw);
|
void convert(std::vector<cppjieba::KeywordExtractor::Word>& keywordres,QVector<SKeyWord>& kw);
|
||||||
private:
|
private:
|
||||||
|
static QMutex m_mutex;
|
||||||
cppjieba::Jieba *m_jieba;
|
cppjieba::Jieba *m_jieba;
|
||||||
explicit ChineseSegmentation();
|
explicit ChineseSegmentation();
|
||||||
~ChineseSegmentation();
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // CHINESESEGMENTATION_H
|
#endif // CHINESESEGMENTATION_H
|
||||||
|
|
|
@ -0,0 +1,103 @@
|
||||||
|
#include "construct-document.h"
|
||||||
|
#include "file-utils.h"
|
||||||
|
#include "chinese-segmentation.h"
|
||||||
|
#include <QDebug>
|
||||||
|
#include <QThread>
|
||||||
|
|
||||||
|
//extern QList<Document> *_doc_list_path;
|
||||||
|
//extern QMutex _mutex_doc_list_path;
|
||||||
|
|
||||||
|
ConstructDocumentForPath::ConstructDocumentForPath(QVector<QString> list, IndexGenerator *parent)
|
||||||
|
{
|
||||||
|
this->setAutoDelete(true);
|
||||||
|
m_list = std::move(list);
|
||||||
|
}
|
||||||
|
|
||||||
|
ConstructDocumentForPath::~ConstructDocumentForPath()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
void ConstructDocumentForPath::run()
|
||||||
|
{
|
||||||
|
// qDebug()<<"ConstructDocumentForPath";
|
||||||
|
if (!_doc_list_path)
|
||||||
|
_doc_list_path = new QList<Document>;
|
||||||
|
// qDebug()<<_doc_list_path->size();
|
||||||
|
QString index_text = m_list.at(0);
|
||||||
|
QString sourcePath = m_list.at(1);
|
||||||
|
index_text = index_text.replace(""," ");
|
||||||
|
index_text = index_text.simplified();
|
||||||
|
|
||||||
|
//不带多音字版
|
||||||
|
// QString pinyin_text = FileUtils::find(QString(list.at(0)).replace(".","")).replace("", " ").simplified();
|
||||||
|
|
||||||
|
//多音字版
|
||||||
|
//现加入首字母
|
||||||
|
QStringList pinyin_text_list = FileUtils::findMultiToneWords(QString(m_list.at(0)).replace(".",""));
|
||||||
|
for (QString& i : pinyin_text_list){
|
||||||
|
i.replace("", " ");
|
||||||
|
i = i.simplified();
|
||||||
|
}
|
||||||
|
|
||||||
|
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(sourcePath));
|
||||||
|
QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(sourcePath.section("/",0,-2,QString::SectionIncludeLeadingSep)));
|
||||||
|
|
||||||
|
Document doc;
|
||||||
|
doc.setData(sourcePath);
|
||||||
|
doc.setUniqueTerm(uniqueterm);
|
||||||
|
doc.addTerm(upTerm);
|
||||||
|
doc.addValue(m_list.at(2));
|
||||||
|
QStringList temp;
|
||||||
|
temp.append(index_text);
|
||||||
|
temp.append(pinyin_text_list);
|
||||||
|
doc.setIndexText(temp);
|
||||||
|
// QMetaObject::invokeMethod(m_indexGenerator,"appendDocListPath",Q_ARG(Document,doc));
|
||||||
|
_mutex_doc_list_path.lock();
|
||||||
|
_doc_list_path->append(doc);
|
||||||
|
_mutex_doc_list_path.unlock();
|
||||||
|
// qDebug()<<"ConstructDocumentForPath finish";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
ConstructDocumentForContent::ConstructDocumentForContent(QString path,QObject *parent)
|
||||||
|
{
|
||||||
|
this->setAutoDelete(true);
|
||||||
|
m_path = std::move(path);
|
||||||
|
}
|
||||||
|
|
||||||
|
ConstructDocumentForContent::~ConstructDocumentForContent()
|
||||||
|
{
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void ConstructDocumentForContent::run()
|
||||||
|
{
|
||||||
|
qDebug() << "ConstructDocumentForContent currentThreadId()" << QThread::currentThreadId();
|
||||||
|
// 构造文本索引的document
|
||||||
|
if (!_doc_list_content)
|
||||||
|
_doc_list_content = new QList<Document>;
|
||||||
|
QString content;
|
||||||
|
FileReader::getTextContent(m_path,content);
|
||||||
|
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(m_path));
|
||||||
|
QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(m_path.section("/",0,-2,QString::SectionIncludeLeadingSep)));
|
||||||
|
|
||||||
|
QVector<SKeyWord> term = ChineseSegmentation::getInstance()->callSegement(content);
|
||||||
|
|
||||||
|
Document doc;
|
||||||
|
doc.setData(content);
|
||||||
|
doc.setUniqueTerm(uniqueterm);
|
||||||
|
doc.addTerm(upTerm);
|
||||||
|
doc.addValue(m_path);
|
||||||
|
for(int i = 0;i<term.size();++i)
|
||||||
|
{
|
||||||
|
doc.addPosting(term.at(i).word,term.at(i).offsets,static_cast<int>(term.at(i).weight));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
_mutex_doc_list_content.lock();
|
||||||
|
_doc_list_content->append(doc);
|
||||||
|
_mutex_doc_list_content.unlock();
|
||||||
|
content.clear();
|
||||||
|
term.clear();
|
||||||
|
return;
|
||||||
|
}
|
|
@ -0,0 +1,39 @@
|
||||||
|
#ifndef CONSTRUCTDOCUMENT_H
|
||||||
|
#define CONSTRUCTDOCUMENT_H
|
||||||
|
|
||||||
|
#include <QRunnable>
|
||||||
|
#include <QVector>
|
||||||
|
//#include <QMetaObject>
|
||||||
|
#include "document.h"
|
||||||
|
#include "index-generator.h"
|
||||||
|
//extern QList<Document> *_doc_list_path;
|
||||||
|
//extern QMutex _mutex_doc_list_path;
|
||||||
|
class IndexGenerator;
|
||||||
|
class ConstructDocumentForPath : public QRunnable
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit ConstructDocumentForPath(QVector<QString> list,IndexGenerator *parent = nullptr);
|
||||||
|
~ConstructDocumentForPath();
|
||||||
|
protected:
|
||||||
|
void run();
|
||||||
|
private:
|
||||||
|
QVector<QString> m_list;
|
||||||
|
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
class ConstructDocumentForContent : public QObject, public QRunnable
|
||||||
|
{
|
||||||
|
Q_OBJECT
|
||||||
|
public:
|
||||||
|
explicit ConstructDocumentForContent(QString path,QObject *parent = nullptr);
|
||||||
|
~ConstructDocumentForContent();
|
||||||
|
protected:
|
||||||
|
void run();
|
||||||
|
private:
|
||||||
|
QString m_path;
|
||||||
|
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // CONSTRUCTDOCUMENT_H
|
|
@ -3,24 +3,13 @@
|
||||||
|
|
||||||
Document::Document()
|
Document::Document()
|
||||||
{
|
{
|
||||||
m_document = new Xapian::Document;
|
|
||||||
}
|
|
||||||
|
|
||||||
Document::~Document()
|
|
||||||
{
|
|
||||||
// if(m_document)
|
|
||||||
// delete m_document;
|
|
||||||
// if(m_index_text)
|
|
||||||
// delete m_index_text;
|
|
||||||
// if(m_unique_term)
|
|
||||||
// delete m_unique_term;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Document::setData(QString data)
|
void Document::setData(QString data)
|
||||||
{
|
{
|
||||||
if(data.isEmpty())
|
if(data.isEmpty())
|
||||||
return;
|
return;
|
||||||
m_document->set_data(data.toStdString());
|
m_document.set_data(data.toStdString());
|
||||||
}
|
}
|
||||||
|
|
||||||
void Document::addPosting(std::string term,QVector<size_t> offset, int weight)
|
void Document::addPosting(std::string term,QVector<size_t> offset, int weight)
|
||||||
|
@ -32,7 +21,7 @@ void Document::addPosting(std::string term,QVector<size_t> offset, int weight)
|
||||||
|
|
||||||
for(size_t i : offset)
|
for(size_t i : offset)
|
||||||
{
|
{
|
||||||
m_document->add_posting(term,i,weight);
|
m_document.add_posting(term,i,weight);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -40,40 +29,37 @@ void Document::addTerm(QString term)
|
||||||
{
|
{
|
||||||
if(term.isEmpty())
|
if(term.isEmpty())
|
||||||
return;
|
return;
|
||||||
m_document->add_term(term.toStdString());
|
m_document.add_term(term.toStdString());
|
||||||
}
|
}
|
||||||
|
|
||||||
void Document::addValue(QString value)
|
void Document::addValue(QString value)
|
||||||
{
|
{
|
||||||
m_document->add_value(1,value.toStdString());
|
m_document.add_value(1,value.toStdString());
|
||||||
}
|
}
|
||||||
|
|
||||||
void Document::setUniqueTerm(QString term)
|
void Document::setUniqueTerm(QString term)
|
||||||
{
|
{
|
||||||
if(term.isEmpty())
|
if(term.isEmpty())
|
||||||
return;
|
return;
|
||||||
m_document->add_term(term.toStdString());
|
m_document.add_term(term.toStdString());
|
||||||
|
m_unique_term = std::move(term);
|
||||||
m_unique_term = new QString(term);
|
|
||||||
}
|
}
|
||||||
std::string Document::getUniqueTerm()
|
std::string Document::getUniqueTerm()
|
||||||
{
|
{
|
||||||
// qDebug()<<"m_unique_term!"<<*m_unique_term;
|
return m_unique_term.toStdString();
|
||||||
return m_unique_term->toStdString();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Document::setIndexText(QStringList indexText)
|
void Document::setIndexText(QStringList indexText)
|
||||||
{
|
{
|
||||||
// QStringList indexTextList = indexText;
|
m_index_text = std::move(indexText);
|
||||||
m_index_text = new QStringList(indexText);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
QStringList Document::getIndexText()
|
QStringList Document::getIndexText()
|
||||||
{
|
{
|
||||||
return *m_index_text;
|
return m_index_text;
|
||||||
}
|
}
|
||||||
|
|
||||||
Xapian::Document Document::getXapianDocument()
|
Xapian::Document Document::getXapianDocument()
|
||||||
{
|
{
|
||||||
return *m_document;
|
return m_document;
|
||||||
}
|
}
|
||||||
|
|
|
@ -10,7 +10,17 @@ class Document
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
Document();
|
Document();
|
||||||
~Document();
|
~Document(){}
|
||||||
|
Document(const Document& other){
|
||||||
|
m_document = other.m_document;
|
||||||
|
m_index_text = other.m_index_text;
|
||||||
|
m_unique_term = other.m_unique_term;
|
||||||
|
}
|
||||||
|
void operator=(const Document& other){
|
||||||
|
m_document = other.m_document;
|
||||||
|
m_index_text = other.m_index_text;
|
||||||
|
m_unique_term = other.m_unique_term;
|
||||||
|
}
|
||||||
void setData(QString data);
|
void setData(QString data);
|
||||||
void addPosting(std::string term, QVector<size_t> offset, int weight =1);
|
void addPosting(std::string term, QVector<size_t> offset, int weight =1);
|
||||||
void addTerm(QString term);
|
void addTerm(QString term);
|
||||||
|
@ -21,9 +31,9 @@ public:
|
||||||
QStringList getIndexText();
|
QStringList getIndexText();
|
||||||
Xapian::Document getXapianDocument();
|
Xapian::Document getXapianDocument();
|
||||||
private:
|
private:
|
||||||
Xapian::Document *m_document;
|
Xapian::Document m_document;
|
||||||
QStringList *m_index_text;
|
QStringList m_index_text;
|
||||||
QString *m_unique_term;
|
QString m_unique_term;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -21,10 +21,18 @@ FileSearcher::~FileSearcher()
|
||||||
}
|
}
|
||||||
|
|
||||||
int FileSearcher::getCurrentIndexCount()
|
int FileSearcher::getCurrentIndexCount()
|
||||||
|
{
|
||||||
|
try
|
||||||
{
|
{
|
||||||
Xapian::Database db(INDEX_PATH);
|
Xapian::Database db(INDEX_PATH);
|
||||||
return db.get_doccount();
|
return db.get_doccount();
|
||||||
}
|
}
|
||||||
|
catch(const Xapian::Error &e)
|
||||||
|
{
|
||||||
|
qWarning() <<QString::fromStdString(e.get_description());
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void FileSearcher::onKeywordSearch(QString keyword,QQueue<QString> *searchResultFile,QQueue<QString> *searchResultDir,QQueue<QPair<QString,QStringList>> *searchResultContent)
|
void FileSearcher::onKeywordSearch(QString keyword,QQueue<QString> *searchResultFile,QQueue<QString> *searchResultDir,QQueue<QPair<QString,QStringList>> *searchResultContent)
|
||||||
{
|
{
|
||||||
|
@ -154,7 +162,7 @@ int FileSearcher::keywordSearchContent(size_t uniqueSymbol, QString keyword, int
|
||||||
qp.set_default_op(Xapian::Query::OP_AND);
|
qp.set_default_op(Xapian::Query::OP_AND);
|
||||||
qp.set_database(db);
|
qp.set_database(db);
|
||||||
|
|
||||||
QVector<SKeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegement(&keyword);
|
QVector<SKeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegement(keyword);
|
||||||
//Creat a query
|
//Creat a query
|
||||||
std::string words;
|
std::string words;
|
||||||
for(int i=0;i<sKeyWord.size();i++)
|
for(int i=0;i<sKeyWord.size();i++)
|
||||||
|
@ -199,8 +207,9 @@ Xapian::Query FileSearcher::creatQueryForFileSearch(QString keyword, Xapian::Dat
|
||||||
qp.set_default_op(Xapian::Query::OP_PHRASE);
|
qp.set_default_op(Xapian::Query::OP_PHRASE);
|
||||||
qp.set_database(db);
|
qp.set_database(db);
|
||||||
auto userInput = keyword;
|
auto userInput = keyword;
|
||||||
|
userInput = userInput.replace(".","").simplified();
|
||||||
|
|
||||||
std::string queryStr = keyword.replace(""," ").toStdString();
|
std::string queryStr = keyword.replace(".","").replace(" ","").replace(""," ").simplified().toStdString();
|
||||||
// std::string s =db.get_spelling_suggestion(queryStr,10);
|
// std::string s =db.get_spelling_suggestion(queryStr,10);
|
||||||
// qDebug()<<"spelling_suggestion!"<<QString::fromStdString(s);
|
// qDebug()<<"spelling_suggestion!"<<QString::fromStdString(s);
|
||||||
|
|
||||||
|
|
|
@ -55,8 +55,10 @@ FirstIndex::FirstIndex(const QString& path) : Traverse_BFS(path)
|
||||||
FirstIndex::~FirstIndex()
|
FirstIndex::~FirstIndex()
|
||||||
{
|
{
|
||||||
qDebug() << "~FirstIndex";
|
qDebug() << "~FirstIndex";
|
||||||
|
if(this->q_index)
|
||||||
delete this->q_index;
|
delete this->q_index;
|
||||||
this->q_index = nullptr;
|
this->q_index = nullptr;
|
||||||
|
if(this->q_content_index)
|
||||||
delete this->q_content_index;
|
delete this->q_content_index;
|
||||||
this->q_content_index = nullptr;
|
this->q_content_index = nullptr;
|
||||||
// delete this->p_indexGenerator;
|
// delete this->p_indexGenerator;
|
||||||
|
@ -66,7 +68,7 @@ FirstIndex::~FirstIndex()
|
||||||
}
|
}
|
||||||
|
|
||||||
void FirstIndex::DoSomething(const QFileInfo& fileInfo){
|
void FirstIndex::DoSomething(const QFileInfo& fileInfo){
|
||||||
// qDebug() << "there are some shit here";
|
// qDebug() << "there are some shit here"<<fileInfo.fileName() << fileInfo.absoluteFilePath() << QString(fileInfo.isDir() ? "1" : "0");
|
||||||
this->q_index->enqueue(QVector<QString>() << fileInfo.fileName() << fileInfo.absoluteFilePath() << QString(fileInfo.isDir() ? "1" : "0"));
|
this->q_index->enqueue(QVector<QString>() << fileInfo.fileName() << fileInfo.absoluteFilePath() << QString(fileInfo.isDir() ? "1" : "0"));
|
||||||
for (auto i : this->targetFileTypeVec){
|
for (auto i : this->targetFileTypeVec){
|
||||||
if (fileInfo.fileName().endsWith(i)){
|
if (fileInfo.fileName().endsWith(i)){
|
||||||
|
@ -89,11 +91,11 @@ void FirstIndex::run(){
|
||||||
else{
|
else{
|
||||||
//if the parameter is false, index won't be rebuild
|
//if the parameter is false, index won't be rebuild
|
||||||
//if it is true, index will be rebuild
|
//if it is true, index will be rebuild
|
||||||
this->p_indexGenerator = IndexGenerator::getInstance(true);
|
this->p_indexGenerator = IndexGenerator::getInstance(true,this);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else{
|
else{
|
||||||
this->p_indexGenerator = IndexGenerator::getInstance(false);
|
this->p_indexGenerator = IndexGenerator::getInstance(false,this);
|
||||||
}
|
}
|
||||||
QSemaphore sem(5);
|
QSemaphore sem(5);
|
||||||
QMutex mutex1, mutex2, mutex3;
|
QMutex mutex1, mutex2, mutex3;
|
||||||
|
|
|
@ -2,27 +2,32 @@
|
||||||
#include <QStandardPaths>
|
#include <QStandardPaths>
|
||||||
#include <QFileInfo>
|
#include <QFileInfo>
|
||||||
#include <QDebug>
|
#include <QDebug>
|
||||||
#include "chinese-segmentation.h"
|
#include <QtConcurrent>
|
||||||
|
#include <QFuture>
|
||||||
|
#include <QThreadPool>
|
||||||
#include "file-utils.h"
|
#include "file-utils.h"
|
||||||
#include "index-generator.h"
|
#include "index-generator.h"
|
||||||
#include "global-settings.h"
|
#include "global-settings.h"
|
||||||
|
#include "chinese-segmentation.h"
|
||||||
|
#include "construct-document.h"
|
||||||
|
|
||||||
#include <QtConcurrent>
|
|
||||||
#include <QFuture>
|
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
#define INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/index_data").toStdString()
|
#define INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/index_data").toStdString()
|
||||||
#define CONTENT_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/content_index_data").toStdString()
|
#define CONTENT_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/content_index_data").toStdString()
|
||||||
|
|
||||||
static IndexGenerator *global_instance = nullptr;
|
static IndexGenerator *global_instance = nullptr;
|
||||||
QMutex IndexGenerator::m_mutex;
|
QMutex IndexGenerator::m_mutex;
|
||||||
|
QList<Document> *_doc_list_path;
|
||||||
IndexGenerator *IndexGenerator::getInstance(bool rebuild)
|
QMutex _mutex_doc_list_path;
|
||||||
|
QList<Document> *_doc_list_content;
|
||||||
|
QMutex _mutex_doc_list_content;
|
||||||
|
IndexGenerator *IndexGenerator::getInstance(bool rebuild, QObject *parent)
|
||||||
{
|
{
|
||||||
QMutexLocker locker(&m_mutex);
|
QMutexLocker locker(&m_mutex);
|
||||||
if (!global_instance) {
|
if (!global_instance) {
|
||||||
global_instance = new IndexGenerator(rebuild);
|
qDebug()<<"IndexGenerator=================";
|
||||||
|
global_instance = new IndexGenerator(rebuild,parent);
|
||||||
}
|
}
|
||||||
qDebug() << "global_instance" << global_instance;
|
qDebug() << "global_instance" << global_instance;
|
||||||
qDebug() << "QThread::currentThreadId()" << QThread::currentThreadId();
|
qDebug() << "QThread::currentThreadId()" << QThread::currentThreadId();
|
||||||
|
@ -41,23 +46,25 @@ bool IndexGenerator::creatAllIndex(QQueue<QVector<QString> > *messageList)
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
m_indexer = new Xapian::TermGenerator();
|
m_indexer = new Xapian::TermGenerator();
|
||||||
m_indexer->set_database(*m_datebase_path);
|
m_indexer->set_database(*m_database_path);
|
||||||
//可以实现拼写纠正
|
//可以实现拼写纠正
|
||||||
// m_indexer->set_flags(Xapian::TermGenerator::FLAG_SPELLING);
|
// m_indexer->set_flags(Xapian::TermGenerator::FLAG_SPELLING);
|
||||||
m_indexer->set_stemming_strategy(Xapian::TermGenerator::STEM_SOME);
|
m_indexer->set_stemming_strategy(Xapian::TermGenerator::STEM_SOME);
|
||||||
|
|
||||||
int count =0;
|
int count =0;
|
||||||
for(int i = 0;i < m_doc_list_path->size(); i++)
|
for(int i = 0;i < _doc_list_path->size(); i++)
|
||||||
{
|
{
|
||||||
insertIntoDatabase(m_doc_list_path->at(i));
|
insertIntoDatabase(_doc_list_path->at(i));
|
||||||
|
|
||||||
if(++count == 9999)
|
if(++count == 9000)
|
||||||
{
|
{
|
||||||
count = 0;
|
count = 0;
|
||||||
m_datebase_path->commit();
|
m_database_path->commit();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
m_datebase_path->commit();
|
m_database_path->commit();
|
||||||
|
if(m_indexer)
|
||||||
|
delete m_indexer;
|
||||||
}
|
}
|
||||||
catch(const Xapian::Error &e)
|
catch(const Xapian::Error &e)
|
||||||
{
|
{
|
||||||
|
@ -66,8 +73,10 @@ bool IndexGenerator::creatAllIndex(QQueue<QVector<QString> > *messageList)
|
||||||
GlobalSettings::getInstance()->setValue(INDEX_DATABASE_STATE,"1");
|
GlobalSettings::getInstance()->setValue(INDEX_DATABASE_STATE,"1");
|
||||||
assert(false);
|
assert(false);
|
||||||
}
|
}
|
||||||
m_doc_list_path->clear();
|
_doc_list_path->clear();
|
||||||
Q_EMIT this->transactionFinished();
|
delete _doc_list_path;
|
||||||
|
_doc_list_path = nullptr;
|
||||||
|
// Q_EMIT this->transactionFinished();
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -76,12 +85,16 @@ bool IndexGenerator::creatAllIndex(QQueue<QString> *messageList)
|
||||||
{
|
{
|
||||||
FileUtils::_index_status = CREATING_INDEX;
|
FileUtils::_index_status = CREATING_INDEX;
|
||||||
HandlePathList(messageList);
|
HandlePathList(messageList);
|
||||||
|
int size = _doc_list_content->size();
|
||||||
|
if(!size == 0)
|
||||||
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
int count =0;
|
int count =0;
|
||||||
for(int i = 0;i < m_doc_list_content->size(); i++)
|
for(int i = 0;i < size; i++)
|
||||||
{
|
{
|
||||||
insertIntoContentDatabase(m_doc_list_content->at(i));
|
insertIntoContentDatabase(_doc_list_content->at(0));
|
||||||
|
_doc_list_content->removeFirst();
|
||||||
|
|
||||||
if(++count == 1000)
|
if(++count == 1000)
|
||||||
{
|
{
|
||||||
|
@ -97,8 +110,10 @@ bool IndexGenerator::creatAllIndex(QQueue<QString> *messageList)
|
||||||
GlobalSettings::getInstance()->setValue(CONTENT_INDEX_DATABASE_STATE,"1");
|
GlobalSettings::getInstance()->setValue(CONTENT_INDEX_DATABASE_STATE,"1");
|
||||||
assert(false);
|
assert(false);
|
||||||
}
|
}
|
||||||
m_doc_list_content->clear();
|
delete _doc_list_content;
|
||||||
Q_EMIT this->transactionFinished();
|
_doc_list_content = nullptr;
|
||||||
|
}
|
||||||
|
// Q_EMIT this->transactionFinished();
|
||||||
FileUtils::_index_status = FINISH_CREATING_INDEX;
|
FileUtils::_index_status = FINISH_CREATING_INDEX;
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
|
@ -108,12 +123,12 @@ IndexGenerator::IndexGenerator(bool rebuild, QObject *parent) : QObject(parent)
|
||||||
{
|
{
|
||||||
if(rebuild)
|
if(rebuild)
|
||||||
{
|
{
|
||||||
m_datebase_path = new Xapian::WritableDatabase(INDEX_PATH, Xapian::DB_CREATE_OR_OVERWRITE);
|
m_database_path = new Xapian::WritableDatabase(INDEX_PATH, Xapian::DB_CREATE_OR_OVERWRITE);
|
||||||
m_database_content = new Xapian::WritableDatabase(CONTENT_INDEX_PATH, Xapian::DB_CREATE_OR_OVERWRITE);
|
m_database_content = new Xapian::WritableDatabase(CONTENT_INDEX_PATH, Xapian::DB_CREATE_OR_OVERWRITE);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
m_datebase_path = new Xapian::WritableDatabase(INDEX_PATH, Xapian::DB_CREATE_OR_OPEN);
|
m_database_path = new Xapian::WritableDatabase(INDEX_PATH, Xapian::DB_CREATE_OR_OPEN);
|
||||||
m_database_content = new Xapian::WritableDatabase(CONTENT_INDEX_PATH, Xapian::DB_CREATE_OR_OPEN);
|
m_database_content = new Xapian::WritableDatabase(CONTENT_INDEX_PATH, Xapian::DB_CREATE_OR_OPEN);
|
||||||
}
|
}
|
||||||
GlobalSettings::getInstance()->setValue(INDEX_DATABASE_STATE,"0");
|
GlobalSettings::getInstance()->setValue(INDEX_DATABASE_STATE,"0");
|
||||||
|
@ -124,10 +139,29 @@ IndexGenerator::~IndexGenerator()
|
||||||
{
|
{
|
||||||
QMutexLocker locker(&m_mutex);
|
QMutexLocker locker(&m_mutex);
|
||||||
qDebug() << "~IndexGenerator";
|
qDebug() << "~IndexGenerator";
|
||||||
if(m_datebase_path)
|
m_database_path->close();
|
||||||
delete m_datebase_path;
|
m_database_content->close();
|
||||||
|
if(m_database_path)
|
||||||
|
delete m_database_path;
|
||||||
|
m_database_path = nullptr;
|
||||||
if(m_database_content)
|
if(m_database_content)
|
||||||
delete m_database_content;
|
delete m_database_content;
|
||||||
|
m_database_content = nullptr;
|
||||||
|
if(m_index_map)
|
||||||
|
delete m_index_map;
|
||||||
|
m_index_map = nullptr;
|
||||||
|
// if(m_doc_list_path)
|
||||||
|
// delete m_doc_list_path;
|
||||||
|
// m_doc_list_path = nullptr;
|
||||||
|
// if(_doc_list_content)
|
||||||
|
// delete m_doc_list_content;
|
||||||
|
// m_doc_list_content = nullptr;
|
||||||
|
if(m_index_data_path)
|
||||||
|
delete m_index_data_path;
|
||||||
|
m_index_data_path = nullptr;
|
||||||
|
if(m_indexer)
|
||||||
|
delete m_indexer;
|
||||||
|
m_indexer = nullptr;
|
||||||
GlobalSettings::getInstance()->setValue(INDEX_DATABASE_STATE, "2");
|
GlobalSettings::getInstance()->setValue(INDEX_DATABASE_STATE, "2");
|
||||||
GlobalSettings::getInstance()->setValue(CONTENT_INDEX_DATABASE_STATE, "2");
|
GlobalSettings::getInstance()->setValue(CONTENT_INDEX_DATABASE_STATE, "2");
|
||||||
GlobalSettings::getInstance()->setValue(INDEX_GENERATOR_NORMAL_EXIT, "2");
|
GlobalSettings::getInstance()->setValue(INDEX_GENERATOR_NORMAL_EXIT, "2");
|
||||||
|
@ -147,7 +181,7 @@ void IndexGenerator::insertIntoDatabase(Document doc)
|
||||||
m_indexer->index_text(i.toStdString());
|
m_indexer->index_text(i.toStdString());
|
||||||
}
|
}
|
||||||
|
|
||||||
Xapian::docid innerId= m_datebase_path->replace_document(doc.getUniqueTerm(),document);
|
Xapian::docid innerId= m_database_path->replace_document(doc.getUniqueTerm(),document);
|
||||||
// qDebug()<<"replace doc docid="<<static_cast<int>(innerId);
|
// qDebug()<<"replace doc docid="<<static_cast<int>(innerId);
|
||||||
// qDebug()<< "--index finish--";
|
// qDebug()<< "--index finish--";
|
||||||
return;
|
return;
|
||||||
|
@ -166,13 +200,30 @@ void IndexGenerator::HandlePathList(QQueue<QVector<QString>> *messageList)
|
||||||
qDebug()<<"Begin HandlePathList!";
|
qDebug()<<"Begin HandlePathList!";
|
||||||
qDebug()<<messageList->size();
|
qDebug()<<messageList->size();
|
||||||
// qDebug()<<QString::number(quintptr(QThread::currentThreadId()));
|
// qDebug()<<QString::number(quintptr(QThread::currentThreadId()));
|
||||||
QFuture<Document> future = QtConcurrent::mapped(*messageList,&IndexGenerator::GenerateDocument);
|
// QFuture<Document> future = QtConcurrent::mapped(*messageList,&IndexGenerator::GenerateDocument);
|
||||||
|
|
||||||
future.waitForFinished();
|
// future.waitForFinished();
|
||||||
|
|
||||||
QList<Document> docList = future.results();
|
// QList<Document> docList = future.results();
|
||||||
m_doc_list_path = new QList<Document>(docList);
|
// future.cancel();
|
||||||
qDebug()<<m_doc_list_path->size();
|
// m_doc_list_path = new QList<Document>(docList);
|
||||||
|
QThreadPool pool;
|
||||||
|
// pool.setMaxThreadCount(1);
|
||||||
|
ConstructDocumentForPath *constructer;
|
||||||
|
while(!messageList->isEmpty())
|
||||||
|
{
|
||||||
|
constructer = new ConstructDocumentForPath(messageList->dequeue());
|
||||||
|
pool.start(constructer);
|
||||||
|
}
|
||||||
|
// while(!pool.waitForDone(1))
|
||||||
|
// qDebug()<<"fuck"<<pool.waitForDone(1);
|
||||||
|
qDebug()<<"pool finish"<<pool.waitForDone(-1);
|
||||||
|
// if(constructer)
|
||||||
|
// delete constructer;
|
||||||
|
// constructer = nullptr;
|
||||||
|
|
||||||
|
|
||||||
|
qDebug()<<_doc_list_path->size();
|
||||||
|
|
||||||
qDebug()<<"Finish HandlePathList!";
|
qDebug()<<"Finish HandlePathList!";
|
||||||
return;
|
return;
|
||||||
|
@ -183,13 +234,31 @@ void IndexGenerator::HandlePathList(QQueue<QString> *messageList)
|
||||||
qDebug()<<"Begin HandlePathList for content index!";
|
qDebug()<<"Begin HandlePathList for content index!";
|
||||||
qDebug()<<messageList->size();
|
qDebug()<<messageList->size();
|
||||||
// qDebug()<<QString::number(quintptr(QThread::currentThreadId()));
|
// qDebug()<<QString::number(quintptr(QThread::currentThreadId()));
|
||||||
QFuture<Document> future = QtConcurrent::mapped(*messageList,&IndexGenerator::GenerateContentDocument);
|
ChineseSegmentation::getInstance();
|
||||||
|
ConstructDocumentForContent *constructer;
|
||||||
|
QThreadPool pool;
|
||||||
|
// pool.setMaxThreadCount(2);
|
||||||
|
pool.setExpiryTimeout(1000);
|
||||||
|
while(!messageList->isEmpty())
|
||||||
|
{
|
||||||
|
constructer = new ConstructDocumentForContent(messageList->dequeue());
|
||||||
|
pool.start(constructer);
|
||||||
|
}
|
||||||
|
// while(!pool.waitForDone(1))
|
||||||
|
// qDebug()<<"fuck"<<pool.waitForDone(1);
|
||||||
|
qDebug()<<"pool finish"<<pool.waitForDone(-1);
|
||||||
|
// if(constructer)
|
||||||
|
// delete constructer;
|
||||||
|
// constructer = nullptr;
|
||||||
|
|
||||||
future.waitForFinished();
|
// QFuture<Document> future = QtConcurrent::mapped(*messageList,&IndexGenerator::GenerateContentDocument);
|
||||||
|
|
||||||
QList<Document> docList = future.results();
|
// future.waitForFinished();
|
||||||
m_doc_list_content = new QList<Document>(docList);
|
// ChineseSegmentation::getInstance()->~ChineseSegmentation();
|
||||||
qDebug()<<m_doc_list_content->size();
|
|
||||||
|
// QList<Document> docList = future.results();
|
||||||
|
// m_doc_list_content = new QList<Document>(docList);
|
||||||
|
qDebug()<<_doc_list_content->size();
|
||||||
|
|
||||||
qDebug()<<"Finish HandlePathList for content index!";
|
qDebug()<<"Finish HandlePathList for content index!";
|
||||||
return;
|
return;
|
||||||
|
@ -202,7 +271,7 @@ Document IndexGenerator::GenerateDocument(const QVector<QString> &list)
|
||||||
//0-filename 1-filepathname 2-file or dir
|
//0-filename 1-filepathname 2-file or dir
|
||||||
QString index_text = list.at(0);
|
QString index_text = list.at(0);
|
||||||
QString sourcePath = list.at(1);
|
QString sourcePath = list.at(1);
|
||||||
index_text = index_text.replace(".","").replace(""," ");
|
index_text = index_text.replace(""," ");
|
||||||
index_text = index_text.simplified();
|
index_text = index_text.simplified();
|
||||||
|
|
||||||
//不带多音字版
|
//不带多音字版
|
||||||
|
@ -249,7 +318,9 @@ Document IndexGenerator::GenerateContentDocument(const QString &path)
|
||||||
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(path));
|
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(path));
|
||||||
QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(path.section("/",0,-2,QString::SectionIncludeLeadingSep)));
|
QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(path.section("/",0,-2,QString::SectionIncludeLeadingSep)));
|
||||||
|
|
||||||
QVector<SKeyWord> term = ChineseSegmentation::getInstance()->callSegement(&content);
|
QVector<SKeyWord> term = ChineseSegmentation::getInstance()->callSegement(content);
|
||||||
|
// QStringList term = content.split("");
|
||||||
|
|
||||||
Document doc;
|
Document doc;
|
||||||
doc.setData(content);
|
doc.setData(content);
|
||||||
doc.setUniqueTerm(uniqueterm);
|
doc.setUniqueTerm(uniqueterm);
|
||||||
|
@ -260,6 +331,20 @@ Document IndexGenerator::GenerateContentDocument(const QString &path)
|
||||||
doc.addPosting(term.at(i).word,term.at(i).offsets,static_cast<int>(term.at(i).weight));
|
doc.addPosting(term.at(i).word,term.at(i).offsets,static_cast<int>(term.at(i).weight));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Document doc;
|
||||||
|
// doc.setData(content);
|
||||||
|
// doc.setUniqueTerm(uniqueterm);
|
||||||
|
// doc.addTerm(upTerm);
|
||||||
|
// doc.addValue(path);
|
||||||
|
// int pos = 0;
|
||||||
|
// for(QString i : term)
|
||||||
|
// {
|
||||||
|
// doc.addPosting(i.toStdString(),QVector<size_t>() << ++pos,1);
|
||||||
|
// }
|
||||||
|
|
||||||
|
content.clear();
|
||||||
|
term.clear();
|
||||||
return doc;
|
return doc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -267,6 +352,7 @@ bool IndexGenerator::isIndexdataExist()
|
||||||
{
|
{
|
||||||
|
|
||||||
// Xapian::Database db(m_index_data_path->toStdString());
|
// Xapian::Database db(m_index_data_path->toStdString());
|
||||||
|
return true;
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -355,11 +441,11 @@ bool IndexGenerator::deleteAllIndex(QStringList *pathlist)
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
qDebug()<<"--delete start--";
|
qDebug()<<"--delete start--";
|
||||||
m_datebase_path->delete_document(uniqueterm);
|
m_database_path->delete_document(uniqueterm);
|
||||||
m_database_content->delete_document(uniqueterm);
|
m_database_content->delete_document(uniqueterm);
|
||||||
qDebug()<<"delete path"<<doc;
|
qDebug()<<"delete path"<<doc;
|
||||||
qDebug()<<"delete md5"<<QString::fromStdString(uniqueterm);
|
qDebug()<<"delete md5"<<QString::fromStdString(uniqueterm);
|
||||||
m_datebase_path->commit();
|
m_database_path->commit();
|
||||||
qDebug()<< "--delete finish--";
|
qDebug()<< "--delete finish--";
|
||||||
}
|
}
|
||||||
catch(const Xapian::Error &e)
|
catch(const Xapian::Error &e)
|
||||||
|
|
|
@ -9,17 +9,25 @@
|
||||||
#include <QCryptographicHash>
|
#include <QCryptographicHash>
|
||||||
#include <QMutex>
|
#include <QMutex>
|
||||||
#include <QQueue>
|
#include <QQueue>
|
||||||
|
//#include <QMetaObject>
|
||||||
#include "document.h"
|
#include "document.h"
|
||||||
#include "file-reader.h"
|
#include "file-reader.h"
|
||||||
|
//#include "chinese-segmentation.h"
|
||||||
|
|
||||||
|
extern QList<Document> *_doc_list_path;
|
||||||
|
extern QMutex _mutex_doc_list_path;
|
||||||
|
extern QList<Document> *_doc_list_content;
|
||||||
|
extern QMutex _mutex_doc_list_content;
|
||||||
|
|
||||||
class IndexGenerator : public QObject
|
class IndexGenerator : public QObject
|
||||||
{
|
{
|
||||||
Q_OBJECT
|
Q_OBJECT
|
||||||
public:
|
public:
|
||||||
static IndexGenerator *getInstance(bool rebuild = false);
|
static IndexGenerator *getInstance(bool rebuild = false,QObject *parent = nullptr);
|
||||||
~IndexGenerator();
|
~IndexGenerator();
|
||||||
bool setIndexdataPath();
|
bool setIndexdataPath();
|
||||||
bool isIndexdataExist();
|
bool isIndexdataExist();
|
||||||
|
// Q_INVOKABLE void appendDocListPath(Document doc);
|
||||||
//for search test
|
//for search test
|
||||||
static QStringList IndexSearch(QString indexText);
|
static QStringList IndexSearch(QString indexText);
|
||||||
Q_SIGNALS:
|
Q_SIGNALS:
|
||||||
|
@ -44,10 +52,10 @@ private:
|
||||||
void insertIntoContentDatabase(Document doc);
|
void insertIntoContentDatabase(Document doc);
|
||||||
|
|
||||||
QMap<QString,QStringList> *m_index_map;
|
QMap<QString,QStringList> *m_index_map;
|
||||||
QList<Document> *m_doc_list_path; //for path index
|
// QList<Document> *m_doc_list_path; //for path index
|
||||||
QList<Document> *m_doc_list_content; // for text content index
|
// QList<Document> *m_doc_list_content; // for text content index
|
||||||
QString *m_index_data_path;
|
QString *m_index_data_path;
|
||||||
Xapian::WritableDatabase *m_datebase_path;
|
Xapian::WritableDatabase *m_database_path;
|
||||||
Xapian::WritableDatabase *m_database_content;
|
Xapian::WritableDatabase *m_database_content;
|
||||||
std::string m_docstr;
|
std::string m_docstr;
|
||||||
std::string m_index_text_str;
|
std::string m_index_text_str;
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
INCLUDEPATH += $$PWD
|
INCLUDEPATH += $$PWD
|
||||||
|
|
||||||
HEADERS += \
|
HEADERS += \
|
||||||
|
$$PWD/construct-document.h \
|
||||||
$$PWD/document.h \
|
$$PWD/document.h \
|
||||||
$$PWD/file-reader.h \
|
$$PWD/file-reader.h \
|
||||||
$$PWD/first-index.h \
|
$$PWD/first-index.h \
|
||||||
|
@ -11,6 +12,7 @@ HEADERS += \
|
||||||
$$PWD/ukui-search-qdbus.h
|
$$PWD/ukui-search-qdbus.h
|
||||||
|
|
||||||
SOURCES += \
|
SOURCES += \
|
||||||
|
$$PWD/construct-document.cpp \
|
||||||
$$PWD/document.cpp \
|
$$PWD/document.cpp \
|
||||||
$$PWD/file-reader.cpp \
|
$$PWD/file-reader.cpp \
|
||||||
$$PWD/first-index.cpp \
|
$$PWD/first-index.cpp \
|
||||||
|
|
|
@ -92,6 +92,7 @@ int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
qInstallMessageHandler(messageOutput);
|
qInstallMessageHandler(messageOutput);
|
||||||
qRegisterMetaType<QPair<QString,QStringList>>("QPair<QString,QStringList>");
|
qRegisterMetaType<QPair<QString,QStringList>>("QPair<QString,QStringList>");
|
||||||
|
qRegisterMetaType<Document>("Document");
|
||||||
QApplication::setAttribute(Qt::AA_EnableHighDpiScaling);
|
QApplication::setAttribute(Qt::AA_EnableHighDpiScaling);
|
||||||
QApplication::setAttribute(Qt::AA_UseHighDpiPixmaps);
|
QApplication::setAttribute(Qt::AA_UseHighDpiPixmaps);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue