Try fixing occupied too much memory bug.
This commit is contained in:
parent
5de90a7ff6
commit
20efcc67af
|
@ -1,9 +1,12 @@
|
|||
#include "chinese-segmentation.h"
|
||||
#include <QFileInfo>
|
||||
static ChineseSegmentation *global_instance = nullptr;
|
||||
#include <QDebug>
|
||||
static ChineseSegmentation *global_instance_chinese_segmentation = nullptr;
|
||||
QMutex ChineseSegmentation::m_mutex;
|
||||
|
||||
ChineseSegmentation::ChineseSegmentation()
|
||||
{
|
||||
QMutexLocker locker(&m_mutex);
|
||||
const char * const DICT_PATH = "/usr/share/ukui-search/res/dict/jieba.dict.utf8";
|
||||
const char * const HMM_PATH = "/usr/share/ukui-search/res/dict/hmm_model.utf8";
|
||||
const char * const USER_DICT_PATH ="/usr/share/ukui-search/res/dict/user.dict.utf8";
|
||||
|
@ -11,9 +14,9 @@ ChineseSegmentation::ChineseSegmentation()
|
|||
const char * const STOP_WORD_PATH = "/usr/share/ukui-search/res/dict/stop_words.utf8";
|
||||
|
||||
m_jieba = new cppjieba::Jieba(DICT_PATH,
|
||||
HMM_PATH,
|
||||
USER_DICT_PATH,
|
||||
IDF_PATH,
|
||||
HMM_PATH,
|
||||
USER_DICT_PATH,
|
||||
IDF_PATH,
|
||||
STOP_WORD_PATH);
|
||||
}
|
||||
|
||||
|
@ -21,20 +24,21 @@ ChineseSegmentation::~ChineseSegmentation()
|
|||
{
|
||||
if(m_jieba)
|
||||
delete m_jieba;
|
||||
m_jieba = nullptr;
|
||||
}
|
||||
|
||||
ChineseSegmentation *ChineseSegmentation::getInstance()
|
||||
{
|
||||
if (!global_instance) {
|
||||
global_instance = new ChineseSegmentation;
|
||||
if (!global_instance_chinese_segmentation) {
|
||||
global_instance_chinese_segmentation = new ChineseSegmentation;
|
||||
}
|
||||
return global_instance;
|
||||
return global_instance_chinese_segmentation;
|
||||
}
|
||||
|
||||
QVector<SKeyWord> ChineseSegmentation::callSegement(QString *str)
|
||||
QVector<SKeyWord> ChineseSegmentation::callSegement(QString& str)
|
||||
{
|
||||
std::string s;
|
||||
s=str->toStdString();
|
||||
s=str.toStdString();
|
||||
|
||||
const size_t topk = -1;
|
||||
std::vector<cppjieba::KeywordExtractor::Word> keywordres;
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include <QVector>
|
||||
#include <QString>
|
||||
#include <QDebug>
|
||||
#include <QMutex>
|
||||
|
||||
struct SKeyWord{
|
||||
std::string word;
|
||||
|
@ -21,12 +22,14 @@ class CHINESESEGMENTATION_EXPORT ChineseSegmentation
|
|||
{
|
||||
public:
|
||||
static ChineseSegmentation *getInstance();
|
||||
QVector<SKeyWord> callSegement(QString *str);
|
||||
~ChineseSegmentation();
|
||||
QVector<SKeyWord> callSegement(QString &str);
|
||||
void convert(std::vector<cppjieba::KeywordExtractor::Word>& keywordres,QVector<SKeyWord>& kw);
|
||||
private:
|
||||
static QMutex m_mutex;
|
||||
cppjieba::Jieba *m_jieba;
|
||||
explicit ChineseSegmentation();
|
||||
~ChineseSegmentation();
|
||||
|
||||
};
|
||||
|
||||
#endif // CHINESESEGMENTATION_H
|
||||
|
|
|
@ -0,0 +1,103 @@
|
|||
#include "construct-document.h"
|
||||
#include "file-utils.h"
|
||||
#include "chinese-segmentation.h"
|
||||
#include <QDebug>
|
||||
#include <QThread>
|
||||
|
||||
//extern QList<Document> *_doc_list_path;
|
||||
//extern QMutex _mutex_doc_list_path;
|
||||
|
||||
ConstructDocumentForPath::ConstructDocumentForPath(QVector<QString> list, IndexGenerator *parent)
|
||||
{
|
||||
this->setAutoDelete(true);
|
||||
m_list = std::move(list);
|
||||
}
|
||||
|
||||
ConstructDocumentForPath::~ConstructDocumentForPath()
|
||||
{
|
||||
}
|
||||
|
||||
void ConstructDocumentForPath::run()
|
||||
{
|
||||
// qDebug()<<"ConstructDocumentForPath";
|
||||
if (!_doc_list_path)
|
||||
_doc_list_path = new QList<Document>;
|
||||
// qDebug()<<_doc_list_path->size();
|
||||
QString index_text = m_list.at(0);
|
||||
QString sourcePath = m_list.at(1);
|
||||
index_text = index_text.replace(""," ");
|
||||
index_text = index_text.simplified();
|
||||
|
||||
//不带多音字版
|
||||
// QString pinyin_text = FileUtils::find(QString(list.at(0)).replace(".","")).replace("", " ").simplified();
|
||||
|
||||
//多音字版
|
||||
//现加入首字母
|
||||
QStringList pinyin_text_list = FileUtils::findMultiToneWords(QString(m_list.at(0)).replace(".",""));
|
||||
for (QString& i : pinyin_text_list){
|
||||
i.replace("", " ");
|
||||
i = i.simplified();
|
||||
}
|
||||
|
||||
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(sourcePath));
|
||||
QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(sourcePath.section("/",0,-2,QString::SectionIncludeLeadingSep)));
|
||||
|
||||
Document doc;
|
||||
doc.setData(sourcePath);
|
||||
doc.setUniqueTerm(uniqueterm);
|
||||
doc.addTerm(upTerm);
|
||||
doc.addValue(m_list.at(2));
|
||||
QStringList temp;
|
||||
temp.append(index_text);
|
||||
temp.append(pinyin_text_list);
|
||||
doc.setIndexText(temp);
|
||||
// QMetaObject::invokeMethod(m_indexGenerator,"appendDocListPath",Q_ARG(Document,doc));
|
||||
_mutex_doc_list_path.lock();
|
||||
_doc_list_path->append(doc);
|
||||
_mutex_doc_list_path.unlock();
|
||||
// qDebug()<<"ConstructDocumentForPath finish";
|
||||
return;
|
||||
}
|
||||
|
||||
ConstructDocumentForContent::ConstructDocumentForContent(QString path,QObject *parent)
|
||||
{
|
||||
this->setAutoDelete(true);
|
||||
m_path = std::move(path);
|
||||
}
|
||||
|
||||
ConstructDocumentForContent::~ConstructDocumentForContent()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void ConstructDocumentForContent::run()
|
||||
{
|
||||
qDebug() << "ConstructDocumentForContent currentThreadId()" << QThread::currentThreadId();
|
||||
// 构造文本索引的document
|
||||
if (!_doc_list_content)
|
||||
_doc_list_content = new QList<Document>;
|
||||
QString content;
|
||||
FileReader::getTextContent(m_path,content);
|
||||
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(m_path));
|
||||
QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(m_path.section("/",0,-2,QString::SectionIncludeLeadingSep)));
|
||||
|
||||
QVector<SKeyWord> term = ChineseSegmentation::getInstance()->callSegement(content);
|
||||
|
||||
Document doc;
|
||||
doc.setData(content);
|
||||
doc.setUniqueTerm(uniqueterm);
|
||||
doc.addTerm(upTerm);
|
||||
doc.addValue(m_path);
|
||||
for(int i = 0;i<term.size();++i)
|
||||
{
|
||||
doc.addPosting(term.at(i).word,term.at(i).offsets,static_cast<int>(term.at(i).weight));
|
||||
|
||||
}
|
||||
|
||||
_mutex_doc_list_content.lock();
|
||||
_doc_list_content->append(doc);
|
||||
_mutex_doc_list_content.unlock();
|
||||
content.clear();
|
||||
term.clear();
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,39 @@
|
|||
#ifndef CONSTRUCTDOCUMENT_H
|
||||
#define CONSTRUCTDOCUMENT_H
|
||||
|
||||
#include <QRunnable>
|
||||
#include <QVector>
|
||||
//#include <QMetaObject>
|
||||
#include "document.h"
|
||||
#include "index-generator.h"
|
||||
//extern QList<Document> *_doc_list_path;
|
||||
//extern QMutex _mutex_doc_list_path;
|
||||
class IndexGenerator;
|
||||
class ConstructDocumentForPath : public QRunnable
|
||||
{
|
||||
public:
|
||||
explicit ConstructDocumentForPath(QVector<QString> list,IndexGenerator *parent = nullptr);
|
||||
~ConstructDocumentForPath();
|
||||
protected:
|
||||
void run();
|
||||
private:
|
||||
QVector<QString> m_list;
|
||||
|
||||
|
||||
};
|
||||
|
||||
class ConstructDocumentForContent : public QObject, public QRunnable
|
||||
{
|
||||
Q_OBJECT
|
||||
public:
|
||||
explicit ConstructDocumentForContent(QString path,QObject *parent = nullptr);
|
||||
~ConstructDocumentForContent();
|
||||
protected:
|
||||
void run();
|
||||
private:
|
||||
QString m_path;
|
||||
|
||||
|
||||
};
|
||||
|
||||
#endif // CONSTRUCTDOCUMENT_H
|
|
@ -3,24 +3,13 @@
|
|||
|
||||
Document::Document()
|
||||
{
|
||||
m_document = new Xapian::Document;
|
||||
}
|
||||
|
||||
Document::~Document()
|
||||
{
|
||||
// if(m_document)
|
||||
// delete m_document;
|
||||
// if(m_index_text)
|
||||
// delete m_index_text;
|
||||
// if(m_unique_term)
|
||||
// delete m_unique_term;
|
||||
}
|
||||
|
||||
void Document::setData(QString data)
|
||||
{
|
||||
if(data.isEmpty())
|
||||
return;
|
||||
m_document->set_data(data.toStdString());
|
||||
m_document.set_data(data.toStdString());
|
||||
}
|
||||
|
||||
void Document::addPosting(std::string term,QVector<size_t> offset, int weight)
|
||||
|
@ -32,7 +21,7 @@ void Document::addPosting(std::string term,QVector<size_t> offset, int weight)
|
|||
|
||||
for(size_t i : offset)
|
||||
{
|
||||
m_document->add_posting(term,i,weight);
|
||||
m_document.add_posting(term,i,weight);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -40,40 +29,37 @@ void Document::addTerm(QString term)
|
|||
{
|
||||
if(term.isEmpty())
|
||||
return;
|
||||
m_document->add_term(term.toStdString());
|
||||
m_document.add_term(term.toStdString());
|
||||
}
|
||||
|
||||
void Document::addValue(QString value)
|
||||
{
|
||||
m_document->add_value(1,value.toStdString());
|
||||
m_document.add_value(1,value.toStdString());
|
||||
}
|
||||
|
||||
void Document::setUniqueTerm(QString term)
|
||||
{
|
||||
if(term.isEmpty())
|
||||
return;
|
||||
m_document->add_term(term.toStdString());
|
||||
|
||||
m_unique_term = new QString(term);
|
||||
m_document.add_term(term.toStdString());
|
||||
m_unique_term = std::move(term);
|
||||
}
|
||||
std::string Document::getUniqueTerm()
|
||||
{
|
||||
// qDebug()<<"m_unique_term!"<<*m_unique_term;
|
||||
return m_unique_term->toStdString();
|
||||
return m_unique_term.toStdString();
|
||||
}
|
||||
|
||||
void Document::setIndexText(QStringList indexText)
|
||||
{
|
||||
// QStringList indexTextList = indexText;
|
||||
m_index_text = new QStringList(indexText);
|
||||
m_index_text = std::move(indexText);
|
||||
}
|
||||
|
||||
QStringList Document::getIndexText()
|
||||
{
|
||||
return *m_index_text;
|
||||
return m_index_text;
|
||||
}
|
||||
|
||||
Xapian::Document Document::getXapianDocument()
|
||||
{
|
||||
return *m_document;
|
||||
return m_document;
|
||||
}
|
||||
|
|
|
@ -10,7 +10,17 @@ class Document
|
|||
{
|
||||
public:
|
||||
Document();
|
||||
~Document();
|
||||
~Document(){}
|
||||
Document(const Document& other){
|
||||
m_document = other.m_document;
|
||||
m_index_text = other.m_index_text;
|
||||
m_unique_term = other.m_unique_term;
|
||||
}
|
||||
void operator=(const Document& other){
|
||||
m_document = other.m_document;
|
||||
m_index_text = other.m_index_text;
|
||||
m_unique_term = other.m_unique_term;
|
||||
}
|
||||
void setData(QString data);
|
||||
void addPosting(std::string term, QVector<size_t> offset, int weight =1);
|
||||
void addTerm(QString term);
|
||||
|
@ -21,9 +31,9 @@ public:
|
|||
QStringList getIndexText();
|
||||
Xapian::Document getXapianDocument();
|
||||
private:
|
||||
Xapian::Document *m_document;
|
||||
QStringList *m_index_text;
|
||||
QString *m_unique_term;
|
||||
Xapian::Document m_document;
|
||||
QStringList m_index_text;
|
||||
QString m_unique_term;
|
||||
|
||||
};
|
||||
|
||||
|
|
|
@ -22,8 +22,16 @@ FileSearcher::~FileSearcher()
|
|||
|
||||
int FileSearcher::getCurrentIndexCount()
|
||||
{
|
||||
Xapian::Database db(INDEX_PATH);
|
||||
return db.get_doccount();
|
||||
try
|
||||
{
|
||||
Xapian::Database db(INDEX_PATH);
|
||||
return db.get_doccount();
|
||||
}
|
||||
catch(const Xapian::Error &e)
|
||||
{
|
||||
qWarning() <<QString::fromStdString(e.get_description());
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
void FileSearcher::onKeywordSearch(QString keyword,QQueue<QString> *searchResultFile,QQueue<QString> *searchResultDir,QQueue<QPair<QString,QStringList>> *searchResultContent)
|
||||
|
@ -154,7 +162,7 @@ int FileSearcher::keywordSearchContent(size_t uniqueSymbol, QString keyword, int
|
|||
qp.set_default_op(Xapian::Query::OP_AND);
|
||||
qp.set_database(db);
|
||||
|
||||
QVector<SKeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegement(&keyword);
|
||||
QVector<SKeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegement(keyword);
|
||||
//Creat a query
|
||||
std::string words;
|
||||
for(int i=0;i<sKeyWord.size();i++)
|
||||
|
@ -199,8 +207,9 @@ Xapian::Query FileSearcher::creatQueryForFileSearch(QString keyword, Xapian::Dat
|
|||
qp.set_default_op(Xapian::Query::OP_PHRASE);
|
||||
qp.set_database(db);
|
||||
auto userInput = keyword;
|
||||
userInput = userInput.replace(".","").simplified();
|
||||
|
||||
std::string queryStr = keyword.replace(""," ").toStdString();
|
||||
std::string queryStr = keyword.replace(".","").replace(" ","").replace(""," ").simplified().toStdString();
|
||||
// std::string s =db.get_spelling_suggestion(queryStr,10);
|
||||
// qDebug()<<"spelling_suggestion!"<<QString::fromStdString(s);
|
||||
|
||||
|
|
|
@ -55,9 +55,11 @@ FirstIndex::FirstIndex(const QString& path) : Traverse_BFS(path)
|
|||
FirstIndex::~FirstIndex()
|
||||
{
|
||||
qDebug() << "~FirstIndex";
|
||||
delete this->q_index;
|
||||
if(this->q_index)
|
||||
delete this->q_index;
|
||||
this->q_index = nullptr;
|
||||
delete this->q_content_index;
|
||||
if(this->q_content_index)
|
||||
delete this->q_content_index;
|
||||
this->q_content_index = nullptr;
|
||||
// delete this->p_indexGenerator;
|
||||
// this->p_indexGenerator;
|
||||
|
@ -66,7 +68,7 @@ FirstIndex::~FirstIndex()
|
|||
}
|
||||
|
||||
void FirstIndex::DoSomething(const QFileInfo& fileInfo){
|
||||
// qDebug() << "there are some shit here";
|
||||
// qDebug() << "there are some shit here"<<fileInfo.fileName() << fileInfo.absoluteFilePath() << QString(fileInfo.isDir() ? "1" : "0");
|
||||
this->q_index->enqueue(QVector<QString>() << fileInfo.fileName() << fileInfo.absoluteFilePath() << QString(fileInfo.isDir() ? "1" : "0"));
|
||||
for (auto i : this->targetFileTypeVec){
|
||||
if (fileInfo.fileName().endsWith(i)){
|
||||
|
@ -89,11 +91,11 @@ void FirstIndex::run(){
|
|||
else{
|
||||
//if the parameter is false, index won't be rebuild
|
||||
//if it is true, index will be rebuild
|
||||
this->p_indexGenerator = IndexGenerator::getInstance(true);
|
||||
this->p_indexGenerator = IndexGenerator::getInstance(true,this);
|
||||
}
|
||||
}
|
||||
else{
|
||||
this->p_indexGenerator = IndexGenerator::getInstance(false);
|
||||
this->p_indexGenerator = IndexGenerator::getInstance(false,this);
|
||||
}
|
||||
QSemaphore sem(5);
|
||||
QMutex mutex1, mutex2, mutex3;
|
||||
|
|
|
@ -2,27 +2,32 @@
|
|||
#include <QStandardPaths>
|
||||
#include <QFileInfo>
|
||||
#include <QDebug>
|
||||
#include "chinese-segmentation.h"
|
||||
#include <QtConcurrent>
|
||||
#include <QFuture>
|
||||
#include <QThreadPool>
|
||||
#include "file-utils.h"
|
||||
#include "index-generator.h"
|
||||
#include "global-settings.h"
|
||||
#include "chinese-segmentation.h"
|
||||
#include "construct-document.h"
|
||||
|
||||
#include <QtConcurrent>
|
||||
#include <QFuture>
|
||||
|
||||
using namespace std;
|
||||
|
||||
#define INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/index_data").toStdString()
|
||||
#define CONTENT_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/content_index_data").toStdString()
|
||||
|
||||
static IndexGenerator *global_instance = nullptr;
|
||||
QMutex IndexGenerator::m_mutex;
|
||||
|
||||
IndexGenerator *IndexGenerator::getInstance(bool rebuild)
|
||||
QList<Document> *_doc_list_path;
|
||||
QMutex _mutex_doc_list_path;
|
||||
QList<Document> *_doc_list_content;
|
||||
QMutex _mutex_doc_list_content;
|
||||
IndexGenerator *IndexGenerator::getInstance(bool rebuild, QObject *parent)
|
||||
{
|
||||
QMutexLocker locker(&m_mutex);
|
||||
if (!global_instance) {
|
||||
global_instance = new IndexGenerator(rebuild);
|
||||
qDebug()<<"IndexGenerator=================";
|
||||
global_instance = new IndexGenerator(rebuild,parent);
|
||||
}
|
||||
qDebug() << "global_instance" << global_instance;
|
||||
qDebug() << "QThread::currentThreadId()" << QThread::currentThreadId();
|
||||
|
@ -41,23 +46,25 @@ bool IndexGenerator::creatAllIndex(QQueue<QVector<QString> > *messageList)
|
|||
try
|
||||
{
|
||||
m_indexer = new Xapian::TermGenerator();
|
||||
m_indexer->set_database(*m_datebase_path);
|
||||
m_indexer->set_database(*m_database_path);
|
||||
//可以实现拼写纠正
|
||||
// m_indexer->set_flags(Xapian::TermGenerator::FLAG_SPELLING);
|
||||
m_indexer->set_stemming_strategy(Xapian::TermGenerator::STEM_SOME);
|
||||
|
||||
int count =0;
|
||||
for(int i = 0;i < m_doc_list_path->size(); i++)
|
||||
for(int i = 0;i < _doc_list_path->size(); i++)
|
||||
{
|
||||
insertIntoDatabase(m_doc_list_path->at(i));
|
||||
insertIntoDatabase(_doc_list_path->at(i));
|
||||
|
||||
if(++count == 9999)
|
||||
if(++count == 9000)
|
||||
{
|
||||
count = 0;
|
||||
m_datebase_path->commit();
|
||||
m_database_path->commit();
|
||||
}
|
||||
}
|
||||
m_datebase_path->commit();
|
||||
m_database_path->commit();
|
||||
if(m_indexer)
|
||||
delete m_indexer;
|
||||
}
|
||||
catch(const Xapian::Error &e)
|
||||
{
|
||||
|
@ -66,8 +73,10 @@ bool IndexGenerator::creatAllIndex(QQueue<QVector<QString> > *messageList)
|
|||
GlobalSettings::getInstance()->setValue(INDEX_DATABASE_STATE,"1");
|
||||
assert(false);
|
||||
}
|
||||
m_doc_list_path->clear();
|
||||
Q_EMIT this->transactionFinished();
|
||||
_doc_list_path->clear();
|
||||
delete _doc_list_path;
|
||||
_doc_list_path = nullptr;
|
||||
// Q_EMIT this->transactionFinished();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -76,29 +85,35 @@ bool IndexGenerator::creatAllIndex(QQueue<QString> *messageList)
|
|||
{
|
||||
FileUtils::_index_status = CREATING_INDEX;
|
||||
HandlePathList(messageList);
|
||||
try
|
||||
int size = _doc_list_content->size();
|
||||
if(!size == 0)
|
||||
{
|
||||
int count =0;
|
||||
for(int i = 0;i < m_doc_list_content->size(); i++)
|
||||
try
|
||||
{
|
||||
insertIntoContentDatabase(m_doc_list_content->at(i));
|
||||
|
||||
if(++count == 1000)
|
||||
int count =0;
|
||||
for(int i = 0;i < size; i++)
|
||||
{
|
||||
count = 0;
|
||||
m_database_content->commit();
|
||||
insertIntoContentDatabase(_doc_list_content->at(0));
|
||||
_doc_list_content->removeFirst();
|
||||
|
||||
if(++count == 1000)
|
||||
{
|
||||
count = 0;
|
||||
m_database_content->commit();
|
||||
}
|
||||
}
|
||||
m_database_content->commit();
|
||||
}
|
||||
m_database_content->commit();
|
||||
catch(const Xapian::Error &e)
|
||||
{
|
||||
qWarning()<<"creat content Index fail!"<<QString::fromStdString(e.get_description());
|
||||
GlobalSettings::getInstance()->setValue(CONTENT_INDEX_DATABASE_STATE,"1");
|
||||
assert(false);
|
||||
}
|
||||
delete _doc_list_content;
|
||||
_doc_list_content = nullptr;
|
||||
}
|
||||
catch(const Xapian::Error &e)
|
||||
{
|
||||
qWarning()<<"creat content Index fail!"<<QString::fromStdString(e.get_description());
|
||||
GlobalSettings::getInstance()->setValue(CONTENT_INDEX_DATABASE_STATE,"1");
|
||||
assert(false);
|
||||
}
|
||||
m_doc_list_content->clear();
|
||||
Q_EMIT this->transactionFinished();
|
||||
// Q_EMIT this->transactionFinished();
|
||||
FileUtils::_index_status = FINISH_CREATING_INDEX;
|
||||
return true;
|
||||
|
||||
|
@ -108,12 +123,12 @@ IndexGenerator::IndexGenerator(bool rebuild, QObject *parent) : QObject(parent)
|
|||
{
|
||||
if(rebuild)
|
||||
{
|
||||
m_datebase_path = new Xapian::WritableDatabase(INDEX_PATH, Xapian::DB_CREATE_OR_OVERWRITE);
|
||||
m_database_path = new Xapian::WritableDatabase(INDEX_PATH, Xapian::DB_CREATE_OR_OVERWRITE);
|
||||
m_database_content = new Xapian::WritableDatabase(CONTENT_INDEX_PATH, Xapian::DB_CREATE_OR_OVERWRITE);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_datebase_path = new Xapian::WritableDatabase(INDEX_PATH, Xapian::DB_CREATE_OR_OPEN);
|
||||
m_database_path = new Xapian::WritableDatabase(INDEX_PATH, Xapian::DB_CREATE_OR_OPEN);
|
||||
m_database_content = new Xapian::WritableDatabase(CONTENT_INDEX_PATH, Xapian::DB_CREATE_OR_OPEN);
|
||||
}
|
||||
GlobalSettings::getInstance()->setValue(INDEX_DATABASE_STATE,"0");
|
||||
|
@ -124,10 +139,29 @@ IndexGenerator::~IndexGenerator()
|
|||
{
|
||||
QMutexLocker locker(&m_mutex);
|
||||
qDebug() << "~IndexGenerator";
|
||||
if(m_datebase_path)
|
||||
delete m_datebase_path;
|
||||
m_database_path->close();
|
||||
m_database_content->close();
|
||||
if(m_database_path)
|
||||
delete m_database_path;
|
||||
m_database_path = nullptr;
|
||||
if(m_database_content)
|
||||
delete m_database_content;
|
||||
m_database_content = nullptr;
|
||||
if(m_index_map)
|
||||
delete m_index_map;
|
||||
m_index_map = nullptr;
|
||||
// if(m_doc_list_path)
|
||||
// delete m_doc_list_path;
|
||||
// m_doc_list_path = nullptr;
|
||||
// if(_doc_list_content)
|
||||
// delete m_doc_list_content;
|
||||
// m_doc_list_content = nullptr;
|
||||
if(m_index_data_path)
|
||||
delete m_index_data_path;
|
||||
m_index_data_path = nullptr;
|
||||
if(m_indexer)
|
||||
delete m_indexer;
|
||||
m_indexer = nullptr;
|
||||
GlobalSettings::getInstance()->setValue(INDEX_DATABASE_STATE, "2");
|
||||
GlobalSettings::getInstance()->setValue(CONTENT_INDEX_DATABASE_STATE, "2");
|
||||
GlobalSettings::getInstance()->setValue(INDEX_GENERATOR_NORMAL_EXIT, "2");
|
||||
|
@ -147,7 +181,7 @@ void IndexGenerator::insertIntoDatabase(Document doc)
|
|||
m_indexer->index_text(i.toStdString());
|
||||
}
|
||||
|
||||
Xapian::docid innerId= m_datebase_path->replace_document(doc.getUniqueTerm(),document);
|
||||
Xapian::docid innerId= m_database_path->replace_document(doc.getUniqueTerm(),document);
|
||||
// qDebug()<<"replace doc docid="<<static_cast<int>(innerId);
|
||||
// qDebug()<< "--index finish--";
|
||||
return;
|
||||
|
@ -166,13 +200,30 @@ void IndexGenerator::HandlePathList(QQueue<QVector<QString>> *messageList)
|
|||
qDebug()<<"Begin HandlePathList!";
|
||||
qDebug()<<messageList->size();
|
||||
// qDebug()<<QString::number(quintptr(QThread::currentThreadId()));
|
||||
QFuture<Document> future = QtConcurrent::mapped(*messageList,&IndexGenerator::GenerateDocument);
|
||||
// QFuture<Document> future = QtConcurrent::mapped(*messageList,&IndexGenerator::GenerateDocument);
|
||||
|
||||
future.waitForFinished();
|
||||
// future.waitForFinished();
|
||||
|
||||
QList<Document> docList = future.results();
|
||||
m_doc_list_path = new QList<Document>(docList);
|
||||
qDebug()<<m_doc_list_path->size();
|
||||
// QList<Document> docList = future.results();
|
||||
// future.cancel();
|
||||
// m_doc_list_path = new QList<Document>(docList);
|
||||
QThreadPool pool;
|
||||
// pool.setMaxThreadCount(1);
|
||||
ConstructDocumentForPath *constructer;
|
||||
while(!messageList->isEmpty())
|
||||
{
|
||||
constructer = new ConstructDocumentForPath(messageList->dequeue());
|
||||
pool.start(constructer);
|
||||
}
|
||||
// while(!pool.waitForDone(1))
|
||||
// qDebug()<<"fuck"<<pool.waitForDone(1);
|
||||
qDebug()<<"pool finish"<<pool.waitForDone(-1);
|
||||
// if(constructer)
|
||||
// delete constructer;
|
||||
// constructer = nullptr;
|
||||
|
||||
|
||||
qDebug()<<_doc_list_path->size();
|
||||
|
||||
qDebug()<<"Finish HandlePathList!";
|
||||
return;
|
||||
|
@ -183,13 +234,31 @@ void IndexGenerator::HandlePathList(QQueue<QString> *messageList)
|
|||
qDebug()<<"Begin HandlePathList for content index!";
|
||||
qDebug()<<messageList->size();
|
||||
// qDebug()<<QString::number(quintptr(QThread::currentThreadId()));
|
||||
QFuture<Document> future = QtConcurrent::mapped(*messageList,&IndexGenerator::GenerateContentDocument);
|
||||
ChineseSegmentation::getInstance();
|
||||
ConstructDocumentForContent *constructer;
|
||||
QThreadPool pool;
|
||||
// pool.setMaxThreadCount(2);
|
||||
pool.setExpiryTimeout(1000);
|
||||
while(!messageList->isEmpty())
|
||||
{
|
||||
constructer = new ConstructDocumentForContent(messageList->dequeue());
|
||||
pool.start(constructer);
|
||||
}
|
||||
// while(!pool.waitForDone(1))
|
||||
// qDebug()<<"fuck"<<pool.waitForDone(1);
|
||||
qDebug()<<"pool finish"<<pool.waitForDone(-1);
|
||||
// if(constructer)
|
||||
// delete constructer;
|
||||
// constructer = nullptr;
|
||||
|
||||
future.waitForFinished();
|
||||
// QFuture<Document> future = QtConcurrent::mapped(*messageList,&IndexGenerator::GenerateContentDocument);
|
||||
|
||||
QList<Document> docList = future.results();
|
||||
m_doc_list_content = new QList<Document>(docList);
|
||||
qDebug()<<m_doc_list_content->size();
|
||||
// future.waitForFinished();
|
||||
// ChineseSegmentation::getInstance()->~ChineseSegmentation();
|
||||
|
||||
// QList<Document> docList = future.results();
|
||||
// m_doc_list_content = new QList<Document>(docList);
|
||||
qDebug()<<_doc_list_content->size();
|
||||
|
||||
qDebug()<<"Finish HandlePathList for content index!";
|
||||
return;
|
||||
|
@ -202,7 +271,7 @@ Document IndexGenerator::GenerateDocument(const QVector<QString> &list)
|
|||
//0-filename 1-filepathname 2-file or dir
|
||||
QString index_text = list.at(0);
|
||||
QString sourcePath = list.at(1);
|
||||
index_text = index_text.replace(".","").replace(""," ");
|
||||
index_text = index_text.replace(""," ");
|
||||
index_text = index_text.simplified();
|
||||
|
||||
//不带多音字版
|
||||
|
@ -249,7 +318,9 @@ Document IndexGenerator::GenerateContentDocument(const QString &path)
|
|||
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(path));
|
||||
QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(path.section("/",0,-2,QString::SectionIncludeLeadingSep)));
|
||||
|
||||
QVector<SKeyWord> term = ChineseSegmentation::getInstance()->callSegement(&content);
|
||||
QVector<SKeyWord> term = ChineseSegmentation::getInstance()->callSegement(content);
|
||||
// QStringList term = content.split("");
|
||||
|
||||
Document doc;
|
||||
doc.setData(content);
|
||||
doc.setUniqueTerm(uniqueterm);
|
||||
|
@ -260,6 +331,20 @@ Document IndexGenerator::GenerateContentDocument(const QString &path)
|
|||
doc.addPosting(term.at(i).word,term.at(i).offsets,static_cast<int>(term.at(i).weight));
|
||||
|
||||
}
|
||||
|
||||
// Document doc;
|
||||
// doc.setData(content);
|
||||
// doc.setUniqueTerm(uniqueterm);
|
||||
// doc.addTerm(upTerm);
|
||||
// doc.addValue(path);
|
||||
// int pos = 0;
|
||||
// for(QString i : term)
|
||||
// {
|
||||
// doc.addPosting(i.toStdString(),QVector<size_t>() << ++pos,1);
|
||||
// }
|
||||
|
||||
content.clear();
|
||||
term.clear();
|
||||
return doc;
|
||||
}
|
||||
|
||||
|
@ -267,6 +352,7 @@ bool IndexGenerator::isIndexdataExist()
|
|||
{
|
||||
|
||||
// Xapian::Database db(m_index_data_path->toStdString());
|
||||
return true;
|
||||
|
||||
|
||||
}
|
||||
|
@ -355,11 +441,11 @@ bool IndexGenerator::deleteAllIndex(QStringList *pathlist)
|
|||
try
|
||||
{
|
||||
qDebug()<<"--delete start--";
|
||||
m_datebase_path->delete_document(uniqueterm);
|
||||
m_database_path->delete_document(uniqueterm);
|
||||
m_database_content->delete_document(uniqueterm);
|
||||
qDebug()<<"delete path"<<doc;
|
||||
qDebug()<<"delete md5"<<QString::fromStdString(uniqueterm);
|
||||
m_datebase_path->commit();
|
||||
m_database_path->commit();
|
||||
qDebug()<< "--delete finish--";
|
||||
}
|
||||
catch(const Xapian::Error &e)
|
||||
|
|
|
@ -9,17 +9,25 @@
|
|||
#include <QCryptographicHash>
|
||||
#include <QMutex>
|
||||
#include <QQueue>
|
||||
//#include <QMetaObject>
|
||||
#include "document.h"
|
||||
#include "file-reader.h"
|
||||
//#include "chinese-segmentation.h"
|
||||
|
||||
extern QList<Document> *_doc_list_path;
|
||||
extern QMutex _mutex_doc_list_path;
|
||||
extern QList<Document> *_doc_list_content;
|
||||
extern QMutex _mutex_doc_list_content;
|
||||
|
||||
class IndexGenerator : public QObject
|
||||
{
|
||||
Q_OBJECT
|
||||
public:
|
||||
static IndexGenerator *getInstance(bool rebuild = false);
|
||||
static IndexGenerator *getInstance(bool rebuild = false,QObject *parent = nullptr);
|
||||
~IndexGenerator();
|
||||
bool setIndexdataPath();
|
||||
bool isIndexdataExist();
|
||||
// Q_INVOKABLE void appendDocListPath(Document doc);
|
||||
//for search test
|
||||
static QStringList IndexSearch(QString indexText);
|
||||
Q_SIGNALS:
|
||||
|
@ -44,10 +52,10 @@ private:
|
|||
void insertIntoContentDatabase(Document doc);
|
||||
|
||||
QMap<QString,QStringList> *m_index_map;
|
||||
QList<Document> *m_doc_list_path; //for path index
|
||||
QList<Document> *m_doc_list_content; // for text content index
|
||||
// QList<Document> *m_doc_list_path; //for path index
|
||||
// QList<Document> *m_doc_list_content; // for text content index
|
||||
QString *m_index_data_path;
|
||||
Xapian::WritableDatabase *m_datebase_path;
|
||||
Xapian::WritableDatabase *m_database_path;
|
||||
Xapian::WritableDatabase *m_database_content;
|
||||
std::string m_docstr;
|
||||
std::string m_index_text_str;
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
INCLUDEPATH += $$PWD
|
||||
|
||||
HEADERS += \
|
||||
$$PWD/construct-document.h \
|
||||
$$PWD/document.h \
|
||||
$$PWD/file-reader.h \
|
||||
$$PWD/first-index.h \
|
||||
|
@ -11,6 +12,7 @@ HEADERS += \
|
|||
$$PWD/ukui-search-qdbus.h
|
||||
|
||||
SOURCES += \
|
||||
$$PWD/construct-document.cpp \
|
||||
$$PWD/document.cpp \
|
||||
$$PWD/file-reader.cpp \
|
||||
$$PWD/first-index.cpp \
|
||||
|
|
|
@ -92,6 +92,7 @@ int main(int argc, char *argv[])
|
|||
{
|
||||
qInstallMessageHandler(messageOutput);
|
||||
qRegisterMetaType<QPair<QString,QStringList>>("QPair<QString,QStringList>");
|
||||
qRegisterMetaType<Document>("Document");
|
||||
QApplication::setAttribute(Qt::AA_EnableHighDpiScaling);
|
||||
QApplication::setAttribute(Qt::AA_UseHighDpiPixmaps);
|
||||
|
||||
|
|
Loading…
Reference in New Issue