diff --git a/libsearch/index/construct-document.cpp b/libsearch/index/construct-document.cpp index dc76d6f..f0cc876 100644 --- a/libsearch/index/construct-document.cpp +++ b/libsearch/index/construct-document.cpp @@ -24,8 +24,8 @@ #include #include -//extern QList *_doc_list_path; -//extern QMutex _mutex_doc_list_path; +//extern QList *g_docListForPath; +//extern QMutex g_mutexDocListForPath; using namespace Zeeker; ConstructDocumentForPath::ConstructDocumentForPath(QVector list) { this->setAutoDelete(true); @@ -34,9 +34,9 @@ ConstructDocumentForPath::ConstructDocumentForPath(QVector list) { void ConstructDocumentForPath::run() { // qDebug()<<"ConstructDocumentForPath"; -// if(!Zeeker::_doc_list_path) -// Zeeker::_doc_list_path = new QVector; -// qDebug()<<_doc_list_path->size(); +// if(!Zeeker::g_docListForPath) +// Zeeker::g_docListForPath = new QVector; +// qDebug()<size(); QString index_text = m_list.at(0).toLower(); QString sourcePath = m_list.at(1); Document doc; @@ -86,9 +86,9 @@ void ConstructDocumentForPath::run() { } // QMetaObject::invokeMethod(m_indexGenerator,"appendDocListPath",Q_ARG(Document,doc)); - IndexGenerator::_mutex_doc_list_path.lock(); - IndexGenerator::_doc_list_path.append(doc); - IndexGenerator::_mutex_doc_list_path.unlock(); + IndexGenerator::g_mutexDocListForPath.lock(); + IndexGenerator::g_docListForPath.append(doc); + IndexGenerator::g_mutexDocListForPath.unlock(); // qDebug()<<"ConstructDocumentForPath finish"; return; } @@ -100,40 +100,33 @@ ConstructDocumentForContent::ConstructDocumentForContent(QString path) { void ConstructDocumentForContent::run() { // qDebug() << "ConstructDocumentForContent currentThreadId()" << QThread::currentThreadId(); - // 构造文本索引的document -// if(!Zeeker::_doc_list_content) -// Zeeker::_doc_list_content = new QVector; + //构造文本索引的document QString content; FileReader::getTextContent(m_path, content); - if(content.isEmpty()) - return; - //QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(m_path)); - //QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(m_path.section("/", 0, -2, QString::SectionIncludeLeadingSep))); + Document doc; - doc.setData(content); - //doc.setUniqueTerm(uniqueterm); doc.setUniqueTerm(FileUtils::makeDocUterm(m_path)); - //doc.addTerm(upTerm); doc.addTerm("ZEEKERUPTERM" + FileUtils::makeDocUterm(m_path.section("/", 0, -2, QString::SectionIncludeLeadingSep))); doc.addValue(m_path); - //'\xEF\xBC\x8C' is "," "\xE3\x80\x82" is "。" use three " " to replace ,to ensure the offset info. - content = content.replace("\t", " ").replace("\xEF\xBC\x8C", " ").replace("\xE3\x80\x82", " "); - -// QVector term = ChineseSegmentation::getInstance()->callSegement(content.left(20480000)); - std::vector term = ChineseSegmentation::getInstance()->callSegementStd(content.left(20480000).toStdString()); - - for(size_t i = 0; i < term.size(); ++i) { - doc.addPosting(term.at(i).word, term.at(i).offsets, static_cast(term.at(i).weight)); + if(content.isEmpty()) { + doc.reuireDeleted(); + } else { + doc.setData(content); + //'\xEF\xBC\x8C' is "," "\xE3\x80\x82" is "。" use three " " to replace ,to ensure the offset info. + content = content.replace("\t", " ").replace("\xEF\xBC\x8C", " ").replace("\xE3\x80\x82", " "); + std::vector term = ChineseSegmentation::getInstance()->callSegementStd(content.left(20480000).toStdString()); + for(size_t i = 0; i < term.size(); ++i) { + doc.addPosting(term.at(i).word, term.at(i).offsets, static_cast(term.at(i).weight)); + } + term.clear(); + term.shrink_to_fit(); } - - IndexGenerator::_mutex_doc_list_content.lock(); - IndexGenerator::_doc_list_content.append(doc); - IndexGenerator::_mutex_doc_list_content.unlock(); + IndexGenerator::g_mutexDocListForContent.lock(); + IndexGenerator::g_docListForContent.append(doc); + IndexGenerator::g_mutexDocListForContent.unlock(); content.clear(); content.squeeze(); - term.clear(); - term.shrink_to_fit(); return; } diff --git a/libsearch/index/document.cpp b/libsearch/index/document.cpp index 57f907a..f6a58f6 100644 --- a/libsearch/index/document.cpp +++ b/libsearch/index/document.cpp @@ -108,3 +108,13 @@ QStringList Document::getIndexText() { Xapian::Document Document::getXapianDocument() { return m_document; } + +void Document::reuireDeleted() +{ + m_shouldDelete = true; +} + +bool Document::isRequiredDeleted() +{ + return m_shouldDelete; +} diff --git a/libsearch/index/document.h b/libsearch/index/document.h index 84e6262..9fbb6f5 100644 --- a/libsearch/index/document.h +++ b/libsearch/index/document.h @@ -33,11 +33,13 @@ public: m_document = other.m_document; m_index_text = other.m_index_text; m_unique_term = other.m_unique_term; + m_shouldDelete = other.m_shouldDelete; } void operator=(const Document& other) { m_document = other.m_document; m_index_text = other.m_index_text; m_unique_term = other.m_unique_term; + m_shouldDelete = other.m_shouldDelete; } void setData(QString &data); void addPosting(std::string term, QVector offset, int weight = 1); @@ -52,11 +54,14 @@ public: void setIndexText(QStringList indexText); QStringList getIndexText(); Xapian::Document getXapianDocument(); + void reuireDeleted(); + bool isRequiredDeleted(); private: Xapian::Document m_document; QStringList m_index_text; //QString m_unique_term; std::string m_unique_term; + bool m_shouldDelete = false; }; } diff --git a/libsearch/index/index-generator.cpp b/libsearch/index/index-generator.cpp index b0805da..dc07b87 100644 --- a/libsearch/index/index-generator.cpp +++ b/libsearch/index/index-generator.cpp @@ -38,14 +38,14 @@ using namespace Zeeker; static IndexGenerator *global_instance = nullptr; QMutex IndexGenerator::m_mutex; -//QVector *Zeeker::_doc_list_path; -//QMutex Zeeker::_mutex_doc_list_path; -//QVector *Zeeker::_doc_list_content; -//QMutex Zeeker::_mutex_doc_list_content; -QMutex IndexGenerator::_mutex_doc_list_path; -QMutex IndexGenerator::_mutex_doc_list_content; -QVector IndexGenerator::_doc_list_path = QVector(); -QVector IndexGenerator::_doc_list_content = QVector(); +//QVector *Zeeker::g_docListForPath; +//QMutex Zeeker::g_mutexDocListForPath; +//QVector *Zeeker::g_docListForContent; +//QMutex Zeeker::g_mutexDocListForContent; +QMutex IndexGenerator::g_mutexDocListForPath; +QMutex IndexGenerator::g_mutexDocListForContent; +QVector IndexGenerator::g_docListForPath = QVector(); +QVector IndexGenerator::g_docListForContent = QVector(); IndexGenerator *IndexGenerator::getInstance(bool rebuild, QObject *parent) { QMutexLocker locker(&m_mutex); @@ -65,15 +65,15 @@ bool IndexGenerator::setIndexdataPath() { //文件名索引 bool IndexGenerator::creatAllIndex(QQueue > *messageList) { HandlePathList(messageList); -// if(_doc_list_path == NULL) { +// if(g_docListForPath == NULL) { // return false; // } - if(IndexGenerator::_doc_list_path.isEmpty()) { + if(IndexGenerator::g_docListForPath.isEmpty()) { return false; } qDebug() << "begin creatAllIndex"; try { - for(auto i : IndexGenerator::_doc_list_path) { + for(auto i : IndexGenerator::g_docListForPath) { insertIntoDatabase(i); } @@ -85,33 +85,32 @@ bool IndexGenerator::creatAllIndex(QQueue > *messageList) { assert(false); } qDebug() << "finish creatAllIndex"; - IndexGenerator::_doc_list_path.clear(); - IndexGenerator::_doc_list_path.squeeze(); - QVector().swap(IndexGenerator::_doc_list_path); + IndexGenerator::g_docListForPath.clear(); + IndexGenerator::g_docListForPath.squeeze(); + QVector().swap(IndexGenerator::g_docListForPath); -// delete _doc_list_path; -// _doc_list_path = nullptr; +// delete g_docListForPath; +// g_docListForPath = nullptr; return true; } //文件内容索引 bool IndexGenerator::creatAllIndex(QQueue *messageList) { -// FileUtils::_index_status |= 0x2; HandlePathList(messageList); qDebug() << "begin creatAllIndex for content"; -// if(_doc_list_content == NULL) { -// return false; -// } - if(IndexGenerator::_doc_list_content.isEmpty()) { + if(IndexGenerator::g_docListForContent.isEmpty()) { return false; } - int size = IndexGenerator::_doc_list_content.size(); + int size = IndexGenerator::g_docListForContent.size(); qDebug() << "begin creatAllIndex for content" << size; if(!size == 0) { -// GlobalSettings::getInstance()->setValue(CONTENT_INDEX_DATABASE_STATE, "0"); try { int count = 0; - for(auto i : IndexGenerator::_doc_list_content) { - insertIntoContentDatabase(i); + for(Document i : IndexGenerator::g_docListForContent) { + if(!i.isRequiredDeleted()) { + m_database_content->replace_document(i.getUniqueTerm(), i.getXapianDocument()); + } else { + m_database_content->delete_document(i.getUniqueTerm()); + } if(++count > 999) { count = 0; m_database_content->commit(); @@ -121,16 +120,13 @@ bool IndexGenerator::creatAllIndex(QQueue *messageList) { } catch(const Xapian::Error &e) { qWarning() << "creat content Index fail!" << QString::fromStdString(e.get_description()); IndexStatusRecorder::getInstance()->setStatus(CONTENT_INDEX_DATABASE_STATE, "1"); -// FileUtils::_index_status &= ~0x2; assert(false); } -// GlobalSettings::getInstance()->setValue(CONTENT_INDEX_DATABASE_STATE, "2"); -// FileUtils::_index_status &= ~0x2; qDebug() << "finish creatAllIndex for content"; - IndexGenerator::_doc_list_content.clear(); - IndexGenerator::_doc_list_content.squeeze(); - QVector().swap(IndexGenerator::_doc_list_content); + IndexGenerator::g_docListForContent.clear(); + IndexGenerator::g_docListForContent.squeeze(); + QVector().swap(IndexGenerator::g_docListForContent); malloc_trim(0); } Q_EMIT this->transactionFinished(); @@ -175,12 +171,12 @@ IndexGenerator::~IndexGenerator() { // if(m_index_map) // delete m_index_map; // m_index_map = nullptr; -// if(m_doc_list_path) -// delete m_doc_list_path; -// m_doc_list_path = nullptr; -// if(m_doc_list_content) -// delete m_doc_list_content; -// m_doc_list_content = nullptr; +// if(mg_docListForPath) +// delete mg_docListForPath; +// mg_docListForPath = nullptr; +// if(mg_docListForContent) +// delete mg_docListForContent; +// mg_docListForContent = nullptr; // if(m_index_data_path) // delete m_index_data_path; // m_index_data_path = nullptr; @@ -230,7 +226,7 @@ void IndexGenerator::HandlePathList(QQueue> *messageList) { // QList docList = future.results(); // future.cancel(); -// m_doc_list_path = new QList(docList); +// mg_docListForPath = new QList(docList); QThreadPool pool; pool.setMaxThreadCount(((QThread::idealThreadCount() - 1) / 2) + 1); pool.setExpiryTimeout(100); @@ -244,12 +240,12 @@ void IndexGenerator::HandlePathList(QQueue> *messageList) { // delete constructer; // constructer = nullptr; -// qDebug()<<_doc_list_path->size(); -// qWarning() << _doc_list_path; +// qDebug()<size(); +// qWarning() << g_docListForPath; // QList docList = future.results(); -// m_doc_list_path = new QList(docList); -// m_doc_list_path = std::move(future.results()); -// qDebug()<(docList); +// mg_docListForPath = std::move(future.results()); +// qDebug()< *messageList) { // ChineseSegmentation::getInstance()->~ChineseSegmentation(); // QList docList = future.results(); -// m_doc_list_content = new QList(docList); +// mg_docListForContent = new QList(docList); -// qDebug()<<_doc_list_content->size(); +// qDebug()<size(); // QList docList = future.results(); -// m_doc_list_content = new QList(docList); -// m_doc_list_content = std::move(future.results()); +// mg_docListForContent = new QList(docList); +// mg_docListForContent = std::move(future.results()); // future.cancel(); qDebug() << "Finish HandlePathList for content index!"; diff --git a/libsearch/index/index-generator.h b/libsearch/index/index-generator.h index 3d66b84..9cd98fe 100644 --- a/libsearch/index/index-generator.h +++ b/libsearch/index/index-generator.h @@ -78,10 +78,10 @@ private: void insertIntoDatabase(Document& doc); void insertIntoContentDatabase(Document& doc); - static QVector _doc_list_path; - static QMutex _mutex_doc_list_path; - static QVector _doc_list_content; - static QMutex _mutex_doc_list_content; + static QVector g_docListForPath; + static QMutex g_mutexDocListForPath; + static QVector g_docListForContent; + static QMutex g_mutexDocListForContent; QMap m_index_map; QString m_index_data_path; Xapian::WritableDatabase* m_database_path;