From d6e3cd46e9ba2496b74cea31f3314a829cd6b041 Mon Sep 17 00:00:00 2001 From: iaom Date: Thu, 28 Sep 2023 10:54:58 +0800 Subject: [PATCH] =?UTF-8?q?feat(ukui-search-service):=E4=B8=BAOCR=E5=86=85?= =?UTF-8?q?=E5=AE=B9=E7=B4=A2=E5=BC=95=E5=A2=9E=E5=8A=A0=E5=8D=95=E7=8B=AC?= =?UTF-8?q?=E7=9A=84=E6=95=B0=E6=8D=AE=E5=BA=93=E5=92=8C=E5=8D=95=E7=8B=AC?= =?UTF-8?q?=E6=8E=A7=E5=88=B6=E5=BC=80=E5=85=B3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- data/org.ukui.search.data.gschema.xml | 5 + libsearch/common.h | 7 +- libsearch/index/batch-indexer.cpp | 148 +++++++++++------ libsearch/index/batch-indexer.h | 5 + libsearch/index/file-indexer-config.cpp | 35 +++- libsearch/index/file-indexer-config.h | 5 + libsearch/index/index-scheduler.cpp | 205 ++++++++++++++---------- libsearch/index/index-scheduler.h | 39 ++--- libsearch/index/index-status-recorder.h | 2 + libsearch/index/index-updater.cpp | 97 ++++++++--- libsearch/index/index-updater.h | 5 +- libsearch/index/search-manager.cpp | 203 +++++------------------ libsearch/index/search-manager.h | 32 +--- libsearch/index/writable-database.cpp | 61 ++++--- 14 files changed, 448 insertions(+), 401 deletions(-) diff --git a/data/org.ukui.search.data.gschema.xml b/data/org.ukui.search.data.gschema.xml index e5633a2..7b5f2af 100644 --- a/data/org.ukui.search.data.gschema.xml +++ b/data/org.ukui.search.data.gschema.xml @@ -25,5 +25,10 @@ content fuzzy search Enable or disable fuzzy search for file content. + + false + content index enable ocr + Enable or disable OCR in content index. + diff --git a/libsearch/common.h b/libsearch/common.h index fea6b31..9354a65 100644 --- a/libsearch/common.h +++ b/libsearch/common.h @@ -13,6 +13,8 @@ static const int LABEL_MAX_WIDTH = 320; static const QString HOME_PATH = QDir::homePath(); static const QString INDEX_PATH = HOME_PATH + QStringLiteral("/.config/org.ukui/ukui-search/index_data"); static const QString CONTENT_INDEX_PATH = HOME_PATH + QStringLiteral("/.config/org.ukui/ukui-search/content_index_data"); +static const QString OCR_CONTENT_INDEX_PATH = HOME_PATH + QStringLiteral("/.config/org.ukui/ukui-search/ocr_content_index_data"); + static const QString FILE_SEARCH_VALUE = QStringLiteral("0"); static const QString DIR_SEARCH_VALUE = QStringLiteral("1"); static const QString INDEX_SEM = QStringLiteral("ukui-search-index-sem"); @@ -27,7 +29,7 @@ static const QString INDEX_DATABASE_VERSION = QStringLiteral("1.0.1"); * changelog 1.1.0 增加文件修改时间value */ static const QString CONTENT_DATABASE_VERSION = QStringLiteral("1.1.0"); - +static const QString OCR_CONTENT_DATABASE_VERSION = QStringLiteral("1.0.0"); static const QStringList allAppPath = { @@ -84,7 +86,8 @@ static const QMap targetPhotographTypeMap = { */ enum class DataBaseType { Basic = 0, - Content = 1 + Content = 1, + OcrContent = 2 }; /** diff --git a/libsearch/index/batch-indexer.cpp b/libsearch/index/batch-indexer.cpp index 470e2d5..16fb65d 100644 --- a/libsearch/index/batch-indexer.cpp +++ b/libsearch/index/batch-indexer.cpp @@ -32,11 +32,14 @@ #include "writable-database.h" #include "compatible-define.h" using namespace UkuiSearch; -BatchIndexer::BatchIndexer(const QStringList &folders, const QStringList &blackList, QAtomicInt& indexStop, QAtomicInt &contentIndexStop, WorkMode mode, Targets target) +BatchIndexer::BatchIndexer(const QStringList &folders, const QStringList &blackList, + QAtomicInt& indexStop, QAtomicInt &contentIndexStop, QAtomicInt &contentIndexOcrStop, + WorkMode mode, Targets target) : m_folders(folders), m_blackList(blackList), m_indexStop(&indexStop), m_contentIndexStop(&contentIndexStop), + m_contentIndexOcrStop(&contentIndexOcrStop), m_mode(mode), m_target(target) { @@ -60,6 +63,10 @@ void BatchIndexer::run() contentIndex(); Q_EMIT contentIndexDone(m_mode); } + if(m_target & Target::Ocr) { + ocrIndex(); + Q_EMIT ocrContentIndexDone(m_mode); + } m_cache.clear(); malloc_trim(0); qDebug() << "FirstRunIndexer: time :" << timer.elapsed() << "milliseconds"; @@ -71,15 +78,15 @@ void BatchIndexer::fetch() qDebug() << "Now begin fetching files to be indexed..."; qDebug() << "Index folders:" << m_folders << "blacklist :" << m_blackList; QQueue bfs; - for(QString blockPath : m_blackList) { - for(QString path : m_folders) { + for(const QString& blockPath : m_blackList) { + for(const QString& path : m_folders) { if(FileUtils::isOrUnder(path, blockPath)) { m_folders.removeOne(path); } } } m_cache.append(m_folders); - for(QString path : m_folders) { + for(const QString &path : m_folders) { bfs.enqueue(path); } QFileInfoList list; @@ -91,9 +98,9 @@ void BatchIndexer::fetch() while(!bfs.empty()) { dir.setPath(bfs.dequeue()); list = dir.entryInfoList(); - for(auto i : list) { + for(const auto& i : list) { bool isBlocked = false; - for(QString path : tmpList) { + for(const QString &path : tmpList) { if(i.absoluteFilePath() == path) { isBlocked = true; tmpList.removeOne(path); @@ -144,7 +151,7 @@ void BatchIndexer::basicIndex() } if(!indexTimes.isEmpty()) { qDebug() << indexTimes.size() << "documents need remove."; - for(std::string uniqueTerm : indexTimes.keys()) { + for(const std::string& uniqueTerm : indexTimes.keys()) { basicDb.removeDocument(uniqueTerm); } basicDb.commit(); @@ -163,7 +170,7 @@ void BatchIndexer::basicIndex() ++finishNum; } if(batchSize >= 8192) { - qDebug() << "8192 finished."; + qDebug() << finishNum << "of" << allSize <<"finished."; basicDb.commit(); Q_EMIT progress(IndexType::Basic, allSize, finishNum); //文件名索引很快 @@ -194,16 +201,10 @@ void BatchIndexer::contentIndex() qWarning() << "Content db open failed, fail to run content index!"; return; } + QStringList filesNeedIndex; - QStringList filesNeedOCRIndex; - QMap suffixMap = targetFileTypeMap; QFileInfo info; - // ocr -// bool ocrEnable = FileIndexerConfig::getInstance()->isOCREnable(); - if(FileIndexerConfig::getInstance()->isOCREnable()) { - qDebug() << "OCR enabled."; - suffixMap.INSERT(targetPhotographTypeMap); - } + if(m_mode == WorkMode::Rebuild) { contentDb.rebuild(); if(!contentDb.open()) { @@ -211,9 +212,9 @@ void BatchIndexer::contentIndex() } } if(m_mode == WorkMode::Rebuild || m_mode == WorkMode::Add) { - for(QString path : m_cache) { + for(const QString& path : m_cache) { info.setFile(path); - if(true == suffixMap[info.suffix()] && info.isFile()) { + if(targetFileTypeMap[info.suffix()] && info.isFile()) { if(!FileUtils::isEncrypedOrUnsupport(path, info.suffix())) { filesNeedIndex.append(path); } @@ -222,9 +223,9 @@ void BatchIndexer::contentIndex() } else if(m_mode == WorkMode::Update) { QMap indexTimes = contentDb.getIndexTimes(); qDebug() << indexTimes.size() << "documents recorded"; - for(QString path : m_cache) { + for(const QString& path : m_cache) { info.setFile(path); - if(true == suffixMap[info.suffix()] && info.isFile()) { + if(targetFileTypeMap[info.suffix()] && info.isFile()) { std::string uterm = FileUtils::makeDocUterm(path); if(indexTimes.value(uterm) != info.lastModified().toString("yyyyMMddHHmmsszzz").toStdString()) { if(!FileUtils::isEncrypedOrUnsupport(path, info.suffix())) { @@ -238,7 +239,7 @@ void BatchIndexer::contentIndex() } if(!indexTimes.isEmpty()) { qDebug() << indexTimes.size() << "documents need remove"; - for(std::string uniqueTerm : indexTimes.keys()) { + for(const std::string& uniqueTerm : indexTimes.keys()) { contentDb.removeDocument(uniqueTerm); } contentDb.commit(); @@ -251,19 +252,12 @@ void BatchIndexer::contentIndex() uint batchSize = 0; uint finishNum = 0; - for (QString path : filesNeedIndex) { + for (const QString& path : filesNeedIndex) { if(m_contentIndexStop->LOAD) { qDebug() << "Index stopped, interrupt content index."; filesNeedIndex.clear(); - filesNeedOCRIndex.clear(); return; } - info.setFile(path); - if(true == targetPhotographTypeMap[info.suffix()]) { - filesNeedOCRIndex.append(path); - filesNeedIndex.removeOne(path); - continue; - } fileContentIndexer indexer(path); if(indexer.index()) { contentDb.addDocument(indexer.document()); @@ -274,24 +268,84 @@ void BatchIndexer::contentIndex() } if(batchSize >= 30) { contentDb.commit(); - qDebug() << "30 finished."; + qDebug() << finishNum << "of" << allSize <<"finished."; Q_EMIT progress(IndexType::Contents, allSize, finishNum); batchSize = 0; } } contentDb.commit(); Q_EMIT progress(IndexType::Contents, allSize, finishNum); - filesNeedIndex.clear(); - qDebug() << "Content index for normal files finished, now begin OCR index"; - int ocrSize = filesNeedOCRIndex.size(); - qDebug() << ocrSize << "pictures need OCR index."; + qDebug() << "Finish content index"; +} - batchSize = 0; - int ocrFinishNum = 0; - for(QString path : filesNeedOCRIndex) { - if(m_contentIndexStop->LOAD) { - qDebug() << "Index stopped, interrupt content index."; +void BatchIndexer::ocrIndex() +{ + qDebug() << "Begin ocr content index"; + if(m_contentIndexOcrStop->LOAD) { + qDebug() << "Index stopped, abort ocr content index."; + return; + } + WritableDatabase contentDb(DataBaseType::OcrContent); + if(!contentDb.open()) { + qWarning() << "Content db open failed, fail to run ocr content index!"; + return; + } + + QStringList filesNeedOCRIndex; + QFileInfo info; + + if(m_mode == WorkMode::Rebuild) { + contentDb.rebuild(); + if(!contentDb.open()) { + return; + } + } + + if(m_mode == WorkMode::Rebuild || m_mode == WorkMode::Add) { + for(const QString &path : m_cache) { + info.setFile(path); + if(targetPhotographTypeMap[info.suffix()] && info.isFile()) { + if(!FileUtils::isEncrypedOrUnsupport(path, info.suffix())) { + filesNeedOCRIndex.append(path); + } + } + } + } else { + QMap indexTimes = contentDb.getIndexTimes(); + qDebug() << indexTimes.size() << "documents recorded"; + for(const QString& path : m_cache) { + info.setFile(path); + if(targetPhotographTypeMap[info.suffix()] && info.isFile()) { + std::string uterm = FileUtils::makeDocUterm(path); + if(indexTimes.value(uterm) != info.lastModified().toString("yyyyMMddHHmmsszzz").toStdString()) { + if(!FileUtils::isEncrypedOrUnsupport(path, info.suffix())) { + filesNeedOCRIndex.append(path); + indexTimes.remove(uterm); + } + } else { + indexTimes.remove(uterm); + } + } + } + if(!indexTimes.isEmpty()) { + qDebug() << indexTimes.size() << "documents need remove"; + for(const std::string& uniqueTerm : indexTimes.keys()) { + contentDb.removeDocument(uniqueTerm); + } + contentDb.commit(); + } + } + + uint allSize = filesNeedOCRIndex.size(); + qDebug() << allSize << "pictures need ocr content index."; + Q_EMIT progress(IndexType::OCR, allSize, 0); + + uint batchSize = 0; + uint finishNum = 0; + for (const QString &path : filesNeedOCRIndex) { + if(m_contentIndexOcrStop->LOAD) { + qDebug() << "Index stopped, interrupt ocr content index."; filesNeedOCRIndex.clear(); return; } @@ -299,22 +353,20 @@ void BatchIndexer::contentIndex() if(indexer.index()) { contentDb.addDocument(indexer.document()); ++batchSize; - ++ocrFinishNum; + ++finishNum; } else { // qDebug() << "Extract fail===" << path; } - if(batchSize >= 30) { + if(batchSize >= 10) { contentDb.commit(); - qDebug() << "30 finished."; - Q_EMIT progress(IndexType::Contents, allSize, finishNum + ocrFinishNum); - Q_EMIT progress(IndexType::OCR, ocrSize, ocrFinishNum); + qDebug() << finishNum << "of" << allSize <<"finished."; + Q_EMIT progress(IndexType::OCR, allSize, finishNum); batchSize = 0; } } contentDb.commit(); - Q_EMIT progress(IndexType::OCR, ocrSize, ocrFinishNum); - Q_EMIT progress(IndexType::Contents, allSize, finishNum + ocrFinishNum); + Q_EMIT progress(IndexType::OCR, allSize, finishNum); filesNeedOCRIndex.clear(); - qDebug() << "Finish OCR index."; - qDebug() << "Finish content index"; + qDebug() << "Ocr content index finished,"; + } diff --git a/libsearch/index/batch-indexer.h b/libsearch/index/batch-indexer.h index eef2912..f8fe3f9 100644 --- a/libsearch/index/batch-indexer.h +++ b/libsearch/index/batch-indexer.h @@ -53,6 +53,7 @@ public: None = 0, Basic = 1u << 0, Content = 1u << 1, + Ocr = 1u << 2, All = Basic | Content }; Q_DECLARE_FLAGS(Targets, Target) @@ -61,6 +62,7 @@ public: const QStringList& blackList, QAtomicInt& indexStop, QAtomicInt& contentIndexStop, + QAtomicInt& contentIndexOcrStop, WorkMode mode = WorkMode::Update, Targets target = Target::All); void run() override; @@ -69,17 +71,20 @@ Q_SIGNALS: void progress(IndexType type, uint all, uint finished); void basicIndexDone(WorkMode); void contentIndexDone(WorkMode); + void ocrContentIndexDone(WorkMode); void done(WorkMode, Targets); private: void fetch(); void basicIndex(); void contentIndex(); + void ocrIndex(); QStringList m_folders; QStringList m_blackList; QAtomicInt *m_indexStop = nullptr; QAtomicInt *m_contentIndexStop = nullptr; + QAtomicInt *m_contentIndexOcrStop = nullptr; WorkMode m_mode; Targets m_target; QStringList m_cache; diff --git a/libsearch/index/file-indexer-config.cpp b/libsearch/index/file-indexer-config.cpp index 9cc0c1d..dd69c9e 100644 --- a/libsearch/index/file-indexer-config.cpp +++ b/libsearch/index/file-indexer-config.cpp @@ -23,11 +23,15 @@ #include #include #define INDEX_SETTINGS QDir::homePath() + "/.config/org.ukui/ukui-search/ukui-search-service.conf" -static const QString CONFIG_VERSION = QStringLiteral("1.0"); +/** + * changelog: 1.1 增加ocr开关 + */ +static const QString CONFIG_VERSION = QStringLiteral("1.1"); static const QByteArray UKUI_SEARCH_SCHEMAS = QByteArrayLiteral("org.ukui.search.settings"); static const QString FILE_INDEX_ENABLE_KEY = QStringLiteral("fileIndexEnable"); static const QString CONTENT_INDEX_ENABLE_KEY = QStringLiteral("contentIndexEnable"); static const QString CONTENT_FUZZY_SEARCH_KEY = QStringLiteral("contentFuzzySearch"); +static const QString CONTENT_INDEX_ENABLE_OCR_KEY = QStringLiteral("contentIndexEnableOcr"); static const QString OCR_ENABLE_KEY = QStringLiteral("ocrEnable"); static const QString META_DATA_INDEX_ENABLE_KEY = QStringLiteral("metaDataIndexEnable"); static const QString CONFIG_VERSION_KEY = QStringLiteral("version"); @@ -65,6 +69,20 @@ FileIndexerConfig::FileIndexerConfig(QObject *parent) if(m_gsettings->keys().contains(CONTENT_INDEX_ENABLE_KEY)) { m_gsettings->set(CONTENT_INDEX_ENABLE_KEY, true); } + if(m_gsettings->keys().contains(CONTENT_INDEX_ENABLE_OCR_KEY)) { + m_gsettings->set(CONTENT_INDEX_ENABLE_OCR_KEY, true); + } + } + m_gsettings->set(CONFIG_VERSION_KEY, CONFIG_VERSION); + } else if (oldVersion == "1.0") { + bool contentIndex = false; + if(m_gsettings->keys().contains(CONTENT_INDEX_ENABLE_KEY)) { + contentIndex = m_gsettings->get(CONTENT_INDEX_ENABLE_KEY).toBool(); + } + if(contentIndex) { + if(m_gsettings->keys().contains(CONTENT_INDEX_ENABLE_OCR_KEY)) { + m_gsettings->set(CONTENT_INDEX_ENABLE_OCR_KEY, true); + } } m_gsettings->set(CONFIG_VERSION_KEY, CONFIG_VERSION); } @@ -74,7 +92,8 @@ FileIndexerConfig::FileIndexerConfig(QObject *parent) Q_EMIT this->fileIndexEnableStatusChanged(m_gsettings->get(FILE_INDEX_ENABLE_KEY).toBool()); } else if(key == CONTENT_INDEX_ENABLE_KEY) { Q_EMIT this->contentIndexEnableStatusChanged(m_gsettings->get(CONTENT_INDEX_ENABLE_KEY).toBool()); - + } else if(key == CONTENT_INDEX_ENABLE_OCR_KEY) { + Q_EMIT this->contentIndexEnableOcrStatusChanged(m_gsettings->get(CONTENT_INDEX_ENABLE_OCR_KEY).toBool()); } }); } else { @@ -146,7 +165,17 @@ bool FileIndexerConfig::isFuzzySearchEnable() bool FileIndexerConfig::isOCREnable() { - return m_settings->value(OCR_ENABLE_KEY, true).toBool(); + if(m_gsettings) { + if(m_gsettings->keys().contains(CONTENT_INDEX_ENABLE_OCR_KEY)) { + return m_gsettings->get(CONTENT_INDEX_ENABLE_OCR_KEY).toBool(); + } else { + qWarning() << "FileIndexerConfig: Can not find key:" << CONTENT_INDEX_ENABLE_OCR_KEY << "in" << UKUI_SEARCH_SCHEMAS; + return false; + } + } else { + qWarning() << "FileIndexerConfig:" << UKUI_SEARCH_SCHEMAS << " is not found!"; + return false; + } } bool FileIndexerConfig::isMetaDataIndexEnable() diff --git a/libsearch/index/file-indexer-config.h b/libsearch/index/file-indexer-config.h index 1613015..be32118 100644 --- a/libsearch/index/file-indexer-config.h +++ b/libsearch/index/file-indexer-config.h @@ -87,6 +87,11 @@ Q_SIGNALS: * 内容索引 */ void contentIndexEnableStatusChanged(bool); + /** + * @brief contentIndexEnableOcrStatusChanged + * ocr + */ + void contentIndexEnableOcrStatusChanged(bool); private: explicit FileIndexerConfig(QObject *parent = nullptr); diff --git a/libsearch/index/index-scheduler.cpp b/libsearch/index/index-scheduler.cpp index eba2320..ce58be6 100644 --- a/libsearch/index/index-scheduler.cpp +++ b/libsearch/index/index-scheduler.cpp @@ -23,12 +23,13 @@ using namespace UkuiSearch; IndexScheduler::IndexScheduler(QObject *parent) : - QObject(parent), - m_statusRecorder(IndexStatusRecorder::getInstance()), - m_config(FileIndexerConfig::getInstance()), - m_state(Startup), - m_indexStop(0), - m_contentIndexStop(0) + QObject(parent), + m_statusRecorder(IndexStatusRecorder::getInstance()), + m_config(FileIndexerConfig::getInstance()), + m_state(Startup), + m_indexStop(0), + m_contentIndexStop(0), + m_ocrContentIndexStop(0) { qRegisterMetaType("IndexerState"); qRegisterMetaType("BatchIndexer::WorkMode"); @@ -38,6 +39,7 @@ IndexScheduler::IndexScheduler(QObject *parent) : connect(&m_fileWatcher, &FileWatcher::filesUpdate, this, &IndexScheduler::updateIndex); connect(m_config, &FileIndexerConfig::fileIndexEnableStatusChanged, this, &IndexScheduler::fileIndexEnable); connect(m_config, &FileIndexerConfig::contentIndexEnableStatusChanged, this, &IndexScheduler::contentIndexEnable); + connect(m_config, &FileIndexerConfig::contentIndexEnableOcrStatusChanged, this, &IndexScheduler::ocrContentIndexEnable); connect(m_config, &FileIndexerConfig::appendIndexDir, this, &IndexScheduler::addNewPath); connect(m_config, &FileIndexerConfig::removeIndexDir, this, &IndexScheduler::removeIndex); @@ -50,6 +52,9 @@ IndexScheduler::IndexScheduler(QObject *parent) : } if(m_config->isContentIndexEnable()) { targets |= BatchIndexer::Target::Content; + if(m_config->isOCREnable()) { + targets |= BatchIndexer::Target::Ocr; + } } else { m_contentIndexStop.fetchAndStoreRelaxed(1); } @@ -62,8 +67,7 @@ void IndexScheduler::addNewPath(const QString &folders, const QStringList &black qDebug() << "Index Scheduler is being stopped, add operation will be executed when started up next time."; return; } - m_state = Running; - Q_EMIT stateChange(m_state); + BatchIndexer::Targets target = BatchIndexer::Target::None; if(m_config->isFileIndexEnable()) { target |= BatchIndexer::Target::Basic; @@ -73,6 +77,10 @@ void IndexScheduler::addNewPath(const QString &folders, const QStringList &black target |= BatchIndexer::Target::Content; m_statusRecorder->setStatus(CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Updating); } + if(m_config->isOCREnable()) { + target |= BatchIndexer::Target::Ocr; + m_statusRecorder->setStatus(OCR_CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Updating); + } BatchIndexer::WorkMode mode = BatchIndexer::WorkMode::Add; startIndexJob(QStringList() << folders, blackList, mode, target); if(BatchIndexer::Target::None != target) { @@ -101,7 +109,12 @@ void IndexScheduler::stop(BatchIndexer::Targets target) m_statusRecorder->setStatus(CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Off); qDebug() << "File content index has been stopped."; } - if(m_indexStop.LOAD && m_contentIndexStop.LOAD) { + if(target & BatchIndexer::Target::Ocr) { + m_ocrContentIndexStop.fetchAndStoreRelaxed(1); + m_statusRecorder->setStatus(OCR_CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Off); + qDebug() << "File ocr content index has been stopped."; + } + if(m_indexStop.LOAD && m_contentIndexStop.LOAD && m_ocrContentIndexStop.LOAD) { m_fileWatcher.removeWatch(); m_threadPool.clear(); m_threadPool.waitForDone(-1); @@ -119,24 +132,30 @@ IndexScheduler::IndexerState IndexScheduler::getIndexState() void IndexScheduler::start(BatchIndexer::Targets target) { qDebug() << "Index scheduler start." << target; + + BatchIndexer::Targets realTargets = BatchIndexer::Target::None; //检查是否有任务未完成 - BatchIndexer::Targets tmpTargets = BatchIndexer::Target::None; if(target & BatchIndexer::Basic) { - if(m_indexFirstRunFinished && m_indexRebuildFinished) { - tmpTargets |= BatchIndexer::Target::Basic; + if(m_indexPendingWorkCount == 0) { + realTargets |= BatchIndexer::Target::Basic; } } if(target & BatchIndexer::Content) { - if(m_contentIndexFirstRunFinished && m_contentIndexRebuildFinished) { - tmpTargets |= BatchIndexer::Target::Content; + if(m_contentIndexPendingWorkCount == 0) { + realTargets |= BatchIndexer::Target::Content; } } - if(tmpTargets == BatchIndexer::Target::None) { - qDebug() << "Index scheduler running, start operation ignored." - << "FirstRun finished: " << m_indexFirstRunFinished - << "Rebuild finished: " << m_indexRebuildFinished - << "Content index firstRun finished: " << m_contentIndexFirstRunFinished - << "Content index rebuild finished: " << m_contentIndexRebuildFinished; + if(target & BatchIndexer::Ocr) { + if(m_ocrContentIndexPendingWorkCount == 0) { + realTargets |= BatchIndexer::Target::Ocr; + } + } + + if(realTargets == BatchIndexer::Target::None) { + qDebug() << "Index scheduler running, start operation ignored.\n" + << "index pending work count: " << m_contentIndexPendingWorkCount << "\n" + << "Content index pending work count: " << m_contentIndexPendingWorkCount << "\n" + << "Ocr content index pending work count: " << m_ocrContentIndexPendingWorkCount << "\n"; return; } @@ -147,25 +166,29 @@ void IndexScheduler::start(BatchIndexer::Targets target) if(target & BatchIndexer::Content) { m_contentIndexStop.fetchAndStoreRelaxed(0); } - //将索引调度器状态设置为运行中 - m_state = Running; - Q_EMIT stateChange(m_state); + if(target & BatchIndexer::Ocr) { + m_ocrContentIndexStop.fetchAndStoreRelaxed(0); + } //检查是否有数据库需要重建并且执行重建 - BatchIndexer::Targets rebuiltTarget = checkAndRebuild(tmpTargets); + BatchIndexer::Targets rebuiltTarget = checkAndRebuild(realTargets); BatchIndexer::WorkMode mode = BatchIndexer::WorkMode::Update; BatchIndexer::Targets startTarget = BatchIndexer::Target::None; //如果数据库被执行过重建,那么跳过增量更新步骤。 - if((tmpTargets & BatchIndexer::Target::Basic) && !(rebuiltTarget & BatchIndexer::Target::Basic)) { + if((realTargets & BatchIndexer::Target::Basic) && !(rebuiltTarget & BatchIndexer::Target::Basic)) { startTarget |= BatchIndexer::Target::Basic; m_statusRecorder->setStatus(INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Updating); } - if((tmpTargets & BatchIndexer::Target::Content) && !(rebuiltTarget & BatchIndexer::Target::Content)) { + if((realTargets & BatchIndexer::Target::Content) && !(rebuiltTarget & BatchIndexer::Target::Content)) { startTarget |= BatchIndexer::Target::Content; m_statusRecorder->setStatus(CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Updating); } + if(realTargets & BatchIndexer::Ocr && !(rebuiltTarget & BatchIndexer::Target::Ocr)) { + startTarget |= BatchIndexer::Target::Ocr; + m_statusRecorder->setStatus(OCR_CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Updating); + } startIndexJob(m_config->currentIndexableDir(), m_config->currentBlackListOfIndex(), mode, startTarget); //启动监听 @@ -183,7 +206,6 @@ BatchIndexer::Targets IndexScheduler::checkAndRebuild(BatchIndexer::Targets targ rebuildTarget |= BatchIndexer::Target::Basic; m_statusRecorder->setStatus(INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Initializing); } - if((target & BatchIndexer::Target::Content) && m_config->isContentIndexEnable() && (m_statusRecorder->getStatus(CONTENT_INDEX_DATABASE_STATE_KEY).toInt() == IndexStatusRecorder::State::Error || !m_statusRecorder->versionCheck(CONTENT_DATABASE_VERSION_KEY, CONTENT_DATABASE_VERSION))) { @@ -191,6 +213,13 @@ BatchIndexer::Targets IndexScheduler::checkAndRebuild(BatchIndexer::Targets targ rebuildTarget |= BatchIndexer::Target::Content; m_statusRecorder->setStatus(CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Initializing); } + if((target & BatchIndexer::Target::Ocr) && m_config->isOCREnable() && + (m_statusRecorder->getStatus(OCR_CONTENT_INDEX_DATABASE_STATE_KEY).toInt() == IndexStatusRecorder::State::Error + || !m_statusRecorder->versionCheck(OCR_CONTENT_DATABASE_VERSION_KEY, OCR_CONTENT_DATABASE_VERSION))) { + qDebug() << "Ocr content database need rebuild"; + rebuildTarget |= BatchIndexer::Target::Ocr; + m_statusRecorder->setStatus(OCR_CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Initializing); + } startIndexJob(m_config->currentIndexableDir(), m_config->currentBlackListOfIndex(), mode, rebuildTarget); return rebuildTarget; } @@ -198,34 +227,28 @@ BatchIndexer::Targets IndexScheduler::checkAndRebuild(BatchIndexer::Targets targ void IndexScheduler::startIndexJob(const QStringList& folders,const QStringList& blackList, BatchIndexer::WorkMode mode, BatchIndexer::Targets target) { if(BatchIndexer::Target::None != target) { - switch (mode) { - case BatchIndexer::WorkMode::Add: - m_addNewPathFinished = false; - break; - case BatchIndexer::WorkMode::Rebuild: - if(target & BatchIndexer::Basic) { - m_indexRebuildFinished = false; - } - if(target & BatchIndexer::Content) { - m_contentIndexRebuildFinished = false; - } - break; - case BatchIndexer::WorkMode::Update: - if(target & BatchIndexer::Basic) { - m_indexFirstRunFinished = false; - } - if(target & BatchIndexer::Content) { - m_contentIndexFirstRunFinished = false; - } - break; - default: - break; + if(mode == BatchIndexer::WorkMode::Add) { + m_addNewPathPendingWorkCount++; } - BatchIndexer *indexer = new BatchIndexer(folders, blackList, m_indexStop, m_contentIndexStop, mode, target); - connect(indexer, &BatchIndexer::done, this, &IndexScheduler::firstRunFinished, Qt::QueuedConnection); + if(target & BatchIndexer::Basic) { + m_indexPendingWorkCount++; + } + if(target & BatchIndexer::Content) { + m_contentIndexPendingWorkCount++; + } + if(target & BatchIndexer::Ocr) { + m_ocrContentIndexPendingWorkCount++; + } + + m_state = Running; + Q_EMIT stateChange(m_state); + + BatchIndexer *indexer = new BatchIndexer(folders, blackList, m_indexStop, m_contentIndexStop, m_ocrContentIndexStop, mode, target); + connect(indexer, &BatchIndexer::done, this, &IndexScheduler::batchIndexerFinished, Qt::QueuedConnection); connect(indexer, &BatchIndexer::progress, this, &IndexScheduler::process, Qt::QueuedConnection); connect(indexer, &BatchIndexer::basicIndexDone, this, &IndexScheduler::onBasicIndexDone, Qt::QueuedConnection); connect(indexer, &BatchIndexer::contentIndexDone, this, &IndexScheduler::onContentIndexDone, Qt::QueuedConnection); + connect(indexer, &BatchIndexer::ocrContentIndexDone, this, &IndexScheduler::onOcrContentIndexDone, Qt::QueuedConnection); m_threadPool.start(indexer); } } @@ -248,18 +271,33 @@ void IndexScheduler::contentIndexEnable(bool enable) } } +void IndexScheduler::ocrContentIndexEnable(bool enable) +{ + if(enable) { + start(BatchIndexer::Ocr); + } else { + stop(BatchIndexer::Ocr); + } +} + void IndexScheduler::updateIndex(const QVector &files) { qDebug() << "updateIndex====="; - m_updateFinished = false; + m_updatePendingWorkCount++; + m_state = Running; - IndexUpdater *updateJob = new IndexUpdater(files, m_indexStop, m_contentIndexStop); + Q_EMIT stateChange(m_state); + + IndexUpdater *updateJob = new IndexUpdater(files, m_indexStop, m_contentIndexStop, m_ocrContentIndexStop); connect(updateJob, &IndexUpdater::done, this, &IndexScheduler::updateFinished, Qt::QueuedConnection); m_threadPool.start(updateJob); } -void IndexScheduler::firstRunFinished() +void IndexScheduler::batchIndexerFinished(BatchIndexer::WorkMode mode, BatchIndexer::Targets targets) { + if(mode == BatchIndexer::WorkMode::Add) { + m_addNewPathPendingWorkCount--; + } if(isIdle()) { m_state = Idle; Q_EMIT stateChange(m_state); @@ -268,7 +306,7 @@ void IndexScheduler::firstRunFinished() void IndexScheduler::updateFinished() { - m_updateFinished = true; + m_updatePendingWorkCount--; if(isIdle()) { m_state = Idle; Q_EMIT stateChange(m_state); @@ -277,30 +315,20 @@ void IndexScheduler::updateFinished() bool IndexScheduler::isIdle() { - return m_indexFirstRunFinished && m_contentIndexFirstRunFinished - && m_addNewPathFinished - && m_updateFinished - && m_indexRebuildFinished && m_contentIndexRebuildFinished; + return m_indexPendingWorkCount == 0 + && m_contentIndexPendingWorkCount == 0 + && m_ocrContentIndexPendingWorkCount == 0 + && m_updatePendingWorkCount == 0 + && m_addNewPathPendingWorkCount == 0; } void IndexScheduler::onBasicIndexDone(BatchIndexer::WorkMode mode) { - switch (mode) { - case BatchIndexer::WorkMode::Add: - m_addNewPathFinished = true; - break; - case BatchIndexer::WorkMode::Rebuild: - m_indexRebuildFinished = true; - break; - case BatchIndexer::WorkMode::Update: - m_indexFirstRunFinished = true; - break; - default: - break; - } + Q_UNUSED(mode) + m_indexPendingWorkCount--; bool success = false; - if(!(m_statusRecorder->getStatus(INDEX_DATABASE_STATE_KEY).toInt() == IndexStatusRecorder::State::Error)) { + if(m_statusRecorder->getStatus(INDEX_DATABASE_STATE_KEY).toInt() != IndexStatusRecorder::State::Error) { m_statusRecorder->setStatus(INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Ready); success = true; } @@ -309,23 +337,26 @@ void IndexScheduler::onBasicIndexDone(BatchIndexer::WorkMode mode) void IndexScheduler::onContentIndexDone(BatchIndexer::WorkMode mode) { - switch (mode) { - case BatchIndexer::WorkMode::Add: - m_addNewPathFinished = true; - break; - case BatchIndexer::WorkMode::Rebuild: - m_contentIndexRebuildFinished = true; - break; - case BatchIndexer::WorkMode::Update: - m_contentIndexFirstRunFinished = true; - break; - default: - break; - } + Q_UNUSED(mode) + m_contentIndexPendingWorkCount--; + bool success = false; - if(!(m_statusRecorder->getStatus(CONTENT_INDEX_DATABASE_STATE_KEY).toInt() == IndexStatusRecorder::State::Error)) { + if(m_statusRecorder->getStatus(CONTENT_INDEX_DATABASE_STATE_KEY).toInt() != IndexStatusRecorder::State::Error) { m_statusRecorder->setStatus(CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Ready); success = true; } Q_EMIT contentIndexDone(success); } + +void IndexScheduler::onOcrContentIndexDone(BatchIndexer::WorkMode mode) +{ + Q_UNUSED(mode) + m_ocrContentIndexPendingWorkCount--; + + bool success = false; + if(m_statusRecorder->getStatus(OCR_CONTENT_INDEX_DATABASE_STATE_KEY).toInt() != IndexStatusRecorder::State::Error) { + m_statusRecorder->setStatus(OCR_CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Ready); + success = true; + } + Q_EMIT contentIndexDone(success); +} diff --git a/libsearch/index/index-scheduler.h b/libsearch/index/index-scheduler.h index 7e64400..e4e4dfe 100644 --- a/libsearch/index/index-scheduler.h +++ b/libsearch/index/index-scheduler.h @@ -43,17 +43,6 @@ public: Q_ENUM(IndexerState) explicit IndexScheduler(QObject *parent = nullptr); - /** - * @brief addNewPath - * @param folders 要添加索引的目录 - * @param blackList 要添加索引的目录下的黑名单 - */ - Q_SCRIPTABLE void addNewPath(const QString &folders, const QStringList& blackList = QStringList()); - /** - * @brief removeIndex - * @param folders 要移除索引的目录 - */ - Q_SCRIPTABLE void removeIndex(const QString& folders); Q_SCRIPTABLE IndexerState getIndexState(); @@ -65,16 +54,29 @@ Q_SIGNALS: void done(); private Q_SLOTS: + /** + * @brief addNewPath + * @param folders 要添加索引的目录 + * @param blackList 要添加索引的目录下的黑名单 + */ + Q_SCRIPTABLE void addNewPath(const QString &folders, const QStringList& blackList = QStringList()); + /** + * @brief removeIndex + * @param folders 要移除索引的目录 + */ + Q_SCRIPTABLE void removeIndex(const QString& folders); void start(BatchIndexer::Targets target); void stop(BatchIndexer::Targets target); void fileIndexEnable(bool enable); void contentIndexEnable(bool enable); + void ocrContentIndexEnable(bool enable); void updateIndex(const QVector& files); - void firstRunFinished(); + void batchIndexerFinished(BatchIndexer::WorkMode mode, BatchIndexer::Targets targets); void updateFinished(); bool isIdle(); void onBasicIndexDone(BatchIndexer::WorkMode mode); void onContentIndexDone(BatchIndexer::WorkMode mode); + void onOcrContentIndexDone(BatchIndexer::WorkMode mode); private: /** @@ -90,16 +92,15 @@ private: IndexerState m_state; QAtomicInt m_indexStop; QAtomicInt m_contentIndexStop; + QAtomicInt m_ocrContentIndexStop; QThreadPool m_threadPool; - bool m_indexFirstRunFinished = true; - bool m_contentIndexFirstRunFinished = true; + quint64 m_indexPendingWorkCount = 0; + quint64 m_contentIndexPendingWorkCount = 0; + quint64 m_ocrContentIndexPendingWorkCount= 0; - bool m_indexRebuildFinished = true; - bool m_contentIndexRebuildFinished = true; - - bool m_updateFinished = true; - bool m_addNewPathFinished = true; + quint64 m_updatePendingWorkCount = 0; + quint64 m_addNewPathPendingWorkCount = 0; }; } #endif // INDEXSCHEDULER_H diff --git a/libsearch/index/index-status-recorder.h b/libsearch/index/index-status-recorder.h index 72a83b9..1aa96a1 100644 --- a/libsearch/index/index-status-recorder.h +++ b/libsearch/index/index-status-recorder.h @@ -25,10 +25,12 @@ #include #include #define CONTENT_INDEX_DATABASE_STATE_KEY "content_index_database_state" +#define OCR_CONTENT_INDEX_DATABASE_STATE_KEY "ocr_content_index_database_state" #define INDEX_DATABASE_STATE_KEY "index_database_state" #define INDEX_STATUS QDir::homePath() + "/.config/org.ukui/ukui-search/ukui-search-index-status.conf" #define INDEX_DATABASE_VERSION_KEY "index_database_version" #define CONTENT_DATABASE_VERSION_KEY "content_database_version" +#define OCR_CONTENT_DATABASE_VERSION_KEY "ocr_content_database_version" namespace UkuiSearch { //fixme: we need a better way to record index status. class IndexStatusRecorder : public QObject diff --git a/libsearch/index/index-updater.cpp b/libsearch/index/index-updater.cpp index 5d73fac..b90ffe2 100644 --- a/libsearch/index/index-updater.cpp +++ b/libsearch/index/index-updater.cpp @@ -27,10 +27,11 @@ #include "file-utils.h" #include "compatible-define.h" using namespace UkuiSearch; -IndexUpdater::IndexUpdater(const QVector& files, QAtomicInt& indexstop, QAtomicInt& contentIndexstop) +IndexUpdater::IndexUpdater(const QVector& files, QAtomicInt& indexstop, QAtomicInt& contentIndexstop, QAtomicInt& contentIndexOcrStop) : m_cache(files), m_indexStop(&indexstop), - m_contentIndexStop(&contentIndexstop) + m_contentIndexStop(&contentIndexstop), + m_contentIndexOcrStop(&contentIndexOcrStop) { } void IndexUpdater::updateIndex() @@ -44,7 +45,7 @@ void IndexUpdater::updateIndex() return; } qDebug() << "===update basic index==="; - for(PendingFile file : m_cache) { + for(const PendingFile& file : m_cache) { if(file.shouldRemoveIndex()) { qDebug() << "| remove:" <isContentIndexEnable() && !m_contentIndexStop->LOAD) { WritableDatabase contentDb(DataBaseType::Content); if(!contentDb.open()) { @@ -70,23 +86,22 @@ void IndexUpdater::updateIndex() return; } - QMap suffixMap = targetFileTypeMap; - //ocr - if(FileIndexerConfig::getInstance()->isOCREnable()) { - suffixMap.INSERT(targetPhotographTypeMap); - } qDebug() << "===update content index==="; int size = 0; for(PendingFile file : m_cache) { + if(m_contentIndexStop->LOAD) { + qDebug() << "Content index update interrupted"; + return; + } QString suffix = QFileInfo(file.path()).suffix(); if(file.shouldRemoveIndex()) { qDebug() << "| remove:" <LOAD) { - qDebug() << "Index stopped, content index update interrupted"; - m_cache.clear(); - m_cache.shrink_to_fit(); - malloc_trim(0); - return; - } } contentDb.commit(); qDebug() << "===finish update content index==="; } - m_cache.clear(); - m_cache.shrink_to_fit(); - malloc_trim(0); - Q_EMIT done(); } -void IndexUpdater::run() +void IndexUpdater::updateOcrContentIndex() { - updateIndex(); + if(FileIndexerConfig::getInstance()->isOCREnable() && !m_contentIndexOcrStop->LOAD) { + WritableDatabase contentDb(DataBaseType::OcrContent); + if(!contentDb.open()) { + qWarning() << "Ocr content db open failed, fail to update index"; + return; + } + + qDebug() << "===update ocr content index==="; + int size = 0; + for(PendingFile file : m_cache) { + if(m_contentIndexOcrStop->LOAD) { + qDebug() << "Ocr content index update interrupted"; + return; + } + QString suffix = QFileInfo(file.path()).suffix(); + if(file.shouldRemoveIndex()) { + qDebug() << "| remove:" <= 10) { + contentDb.commit(); + qDebug() << "10 finished."; + size = 0; + } + } + contentDb.commit(); + qDebug() << "===finish update ocr content index==="; + } } diff --git a/libsearch/index/index-updater.h b/libsearch/index/index-updater.h index cedd1f8..4367d63 100644 --- a/libsearch/index/index-updater.h +++ b/libsearch/index/index-updater.h @@ -31,7 +31,7 @@ class IndexUpdater : public QObject, public QRunnable { Q_OBJECT public: - explicit IndexUpdater(const QVector& files, QAtomicInt& indexstop, QAtomicInt& contentIndexstop); + explicit IndexUpdater(const QVector& files, QAtomicInt& indexstop, QAtomicInt& contentIndexstop, QAtomicInt& contentIndexOcrStop); void run() override; Q_SIGNALS: @@ -39,10 +39,13 @@ Q_SIGNALS: private: void updateIndex(); + void updateContentIndex(); + void updateOcrContentIndex(); QVector m_cache; QAtomicInt *m_contentIndexStop = nullptr; QAtomicInt *m_indexStop = nullptr; + QAtomicInt *m_contentIndexOcrStop = nullptr; }; } #endif // INDEXUPDATER_H diff --git a/libsearch/index/search-manager.cpp b/libsearch/index/search-manager.cpp index 4054d37..0da7955 100644 --- a/libsearch/index/search-manager.cpp +++ b/libsearch/index/search-manager.cpp @@ -18,17 +18,17 @@ * */ #include "search-manager.h" + +#include #include "dir-watcher.h" using namespace UkuiSearch; size_t SearchManager::uniqueSymbolFile = 0; size_t SearchManager::uniqueSymbolDir = 0; size_t SearchManager::uniqueSymbolContent = 0; -size_t SearchManager::uniqueSymbolOcr = 0; QMutex SearchManager::m_mutexFile; QMutex SearchManager::m_mutexDir; QMutex SearchManager::m_mutexContent; -QMutex SearchManager::m_mutexOcr; SearchManager::SearchManager(QObject *parent) : QObject(parent) { } @@ -36,7 +36,7 @@ SearchManager::SearchManager(QObject *parent) : QObject(parent) { SearchManager::~SearchManager() { } -int SearchManager::getCurrentIndexCount() { +uint SearchManager::getCurrentIndexCount() { try { Xapian::Database db(INDEX_PATH); return db.get_doccount(); @@ -48,7 +48,7 @@ int SearchManager::getCurrentIndexCount() { bool SearchManager::isBlocked(QString &path) { QStringList blockList = DirWatcher::getDirWatcher()->getBlockDirsOfUser(); - for(QString i : blockList) { + for(const QString& i : blockList) { if(FileUtils::isOrUnder(path, i)) return true; } @@ -56,7 +56,7 @@ bool SearchManager::isBlocked(QString &path) { } -bool SearchManager::creatResultInfo(SearchPluginIface::ResultInfo &ri, QString path) +bool SearchManager::creatResultInfo(SearchPluginIface::ResultInfo &ri, const QString& path) { QFileInfo info(path); if(!info.exists()) { @@ -68,7 +68,7 @@ bool SearchManager::creatResultInfo(SearchPluginIface::ResultInfo &ri, QString p << SearchPluginIface::DescriptionInfo{tr("Path:"), path} \ << SearchPluginIface::DescriptionInfo{tr("Modified time:"), info.lastModified().toString("yyyy/MM/dd hh:mm:ss")}; ri.actionKey = path; - if (true == targetPhotographTypeMap[info.suffix()]) { + if (targetPhotographTypeMap[info.suffix()]) { ri.type = 1;//1为ocr图片文件 } else { ri.type = 0;//0为默认文本文件 @@ -80,8 +80,8 @@ FileSearch::FileSearch(DataQueue *searchResult, s this->setAutoDelete(true); m_search_result = searchResult; m_uniqueSymbol = uniqueSymbol; - m_keyword = keyword; - m_value = value; + m_keyword = std::move(keyword); + m_value = std::move(value); m_slot = slot; m_begin = begin; m_num = num; @@ -113,20 +113,19 @@ void FileSearch::run() { //可能会有更好的方法,待优化。 m_begin = 0; m_num = 100; - int resultCount = 1; - int totalCount = 0; + uint resultCount = 1; + uint totalCount = 0; while(resultCount > 0) { - resultCount = keywordSearchfile(); + resultCount = keywordSearchFile(); m_begin += m_num; totalCount += resultCount; } qDebug() << "Total count:" << m_value << totalCount; - return; } -int FileSearch::keywordSearchfile() { +uint FileSearch::keywordSearchFile() { try { - qDebug() << "--keywordSearchfile start--"; + qDebug() << "--keywordSearchFile start--"; Xapian::Database db(INDEX_PATH); Xapian::Query query = creatQueryForFileSearch(); Xapian::Enquire enquire(db); @@ -140,24 +139,24 @@ int FileSearch::keywordSearchfile() { queryFile = query; } - qDebug() << "keywordSearchfile:" << QString::fromStdString(queryFile.get_description()); + qDebug() << "keywordSearchFile:" << QString::fromStdString(queryFile.get_description()); enquire.set_query(queryFile); - enquire.set_docid_order(enquire.DONT_CARE); + enquire.set_docid_order(Xapian::Enquire::DONT_CARE); enquire.set_sort_by_relevance_then_value(2, true); - Xapian::MSet result = enquire.get_mset(m_begin, m_num, 0, m_matchDecider); - int resultCount = result.size(); - qDebug() << "keywordSearchfile results count=" << resultCount; + Xapian::MSet result = enquire.get_mset(m_begin, m_num, nullptr, m_matchDecider); + uint resultCount = result.size(); + qDebug() << "keywordSearchFile results count=" << resultCount; if(resultCount == 0) return 0; if(getResult(result) == -1) return -1; - qDebug() << "--keywordSearchfile finish--"; + qDebug() << "--keywordSearchFile finish--"; return resultCount; } catch(const Xapian::Error &e) { qWarning() << QString::fromStdString(e.get_description()); - qDebug() << "--keywordSearchfile finish--"; + qDebug() << "--keywordSearchFile finish--"; return -1; } } @@ -170,7 +169,7 @@ Xapian::Query FileSearch::creatQueryForFileSearch() { for(; bf.position() != -1; bf.toNextBoundary()) { int end = bf.position(); if(bf.boundaryReasons() & QTextBoundaryFinder::EndOfItem) { - v.push_back(Xapian::Query(QUrl::toPercentEncoding(userInput.mid(start, end - start)).toStdString())); + v.emplace_back(QUrl::toPercentEncoding(userInput.mid(start, end - start)).toStdString()); } start = end; } @@ -233,7 +232,7 @@ int FileSearch::getResult(Xapian::MSet &result) { FileContentSearch::FileContentSearch(DataQueue *searchResult, size_t uniqueSymbol, QString keyword, bool fuzzy, int begin, int num) :m_search_result(searchResult), m_uniqueSymbol(uniqueSymbol), - m_keyword(keyword), + m_keyword(std::move(keyword)), m_fuzzy(fuzzy), m_begin(begin), m_num(num) @@ -258,22 +257,22 @@ void FileContentSearch::run() { //这里同文件搜索,待优化。 m_begin = 0; m_num = 100; - int resultCount = 1; - int totalCount = 0; + uint resultCount = 1; + uint totalCount = 0; while(resultCount > 0) { resultCount = keywordSearchContent(); m_begin += m_num; totalCount += resultCount; } qDebug() << "Total count:" << totalCount; - return; } -int FileContentSearch::keywordSearchContent() { +uint FileContentSearch::keywordSearchContent() { try { qDebug() << "--keywordSearchContent search start--"; Xapian::Database db(CONTENT_INDEX_PATH); + db.add_database(Xapian::Database(OCR_CONTENT_INDEX_PATH)); Xapian::Enquire enquire(db); Xapian::QueryParser qp; qp.set_default_op(Xapian::Query::OP_AND); @@ -282,15 +281,15 @@ int FileContentSearch::keywordSearchContent() { std::vector sKeyWord = ChineseSegmentation::getInstance()->callSegment(m_keyword); //Creat a query std::string words; - for(size_t i = 0; i < sKeyWord.size(); i++) { - words.append(sKeyWord.at(i).word).append(" "); + for(auto & i : sKeyWord) { + words.append(i.word).append(" "); } // Xapian::Query query = qp.parse_query(words); std::vector v; - for(size_t i = 0; i < sKeyWord.size(); i++) { - v.push_back(Xapian::Query(sKeyWord.at(i).word)); + for(auto & i : sKeyWord) { + v.emplace_back(i.word); // qDebug() << QString::fromStdString(sKeyWord.at(i).word); } Xapian::Query query; @@ -304,9 +303,9 @@ int FileContentSearch::keywordSearchContent() { enquire.set_query(query); - Xapian::MSet result = enquire.get_mset(m_begin, m_num, 0, m_matchDecider); - int resultCount = result.size(); - if(result.size() == 0) { + Xapian::MSet result = enquire.get_mset(m_begin, m_num, nullptr, m_matchDecider); + uint resultCount = result.size(); + if(result.empty()) { return 0; } qDebug() << "keywordSearchContent results count=" << resultCount; @@ -339,7 +338,7 @@ int FileContentSearch::getResult(Xapian::MSet &result, std::string &keyWord) { } // Construct snippets containing keyword. auto termIterator = doc.termlist_begin(); - QStringList words = QString::fromStdString(keyWord).split(" ", QString::SkipEmptyParts); + QStringList words = QString::fromStdString(keyWord).split(" ", Qt::SkipEmptyParts); for(const QString& wordTobeFound : words) { std::string term = wordTobeFound.toStdString(); @@ -374,127 +373,12 @@ int FileContentSearch::getResult(Xapian::MSet &result, std::string &keyWord) { return 0; } -OcrSearch::OcrSearch(DataQueue *searchResult, size_t uniqueSymbol, QString keyword, int begin, int num) { - this->setAutoDelete(true); - m_search_result = searchResult; - m_uniqueSymbol = uniqueSymbol; - m_keyword = keyword; - m_begin = begin; - m_num = num; - m_matchDecider = new OcrMatchDecider(); -} - -OcrSearch::~OcrSearch() { - m_search_result = nullptr; - if(m_matchDecider) - delete m_matchDecider; -} - -void OcrSearch::run() { - SearchManager::m_mutexOcr.lock(); - if(!m_search_result->isEmpty()) { - m_search_result->clear(); - } - SearchManager::m_mutexOcr.unlock(); - - //这里同文件搜索,待优化。 - m_begin = 0; - m_num = 100; - int resultCount = 1; - int totalCount = 0; - while(resultCount > 0) { - resultCount = keywordSearchOcr(); - m_begin += m_num; - totalCount += resultCount; - } - qDebug() << "Total count:" << totalCount; - return; -} - -int OcrSearch::keywordSearchOcr() { - try { - qDebug() << "--keywordSearch OCR search start--"; - Xapian::Database db(OCR_INDEX_PATH); - Xapian::Enquire enquire(db); - Xapian::QueryParser qp; - qp.set_default_op(Xapian::Query::OP_AND); - qp.set_database(db); - std::vector sKeyWord = ChineseSegmentation::getInstance()->callSegment(m_keyword); - //Creat a query - std::string words; - for(size_t i = 0; i < sKeyWord.size(); i++) { - words.append(sKeyWord.at(i).word).append(" "); - } - std::vector v; - for(size_t i = 0; i < sKeyWord.size(); i++) { - v.push_back(Xapian::Query(sKeyWord.at(i).word)); - qDebug() << QString::fromStdString(sKeyWord.at(i).word); - } - Xapian::Query query = Xapian::Query(Xapian::Query::OP_AND, v.begin(), v.end()); - - qDebug() << "keywordSearch OCR:" << QString::fromStdString(query.get_description()); - - enquire.set_query(query); - - Xapian::MSet result = enquire.get_mset(m_begin, m_num, 0, m_matchDecider); - int resultCount = result.size(); - if(result.size() == 0) { - return 0; - } - qDebug() << "keywordSearch OCR results count=" << resultCount; - - if(getResult(result, words) == -1) { - return -1; - } - - qDebug() << "--keywordSearch OCR search finish--"; - return resultCount; - } catch(const Xapian::Error &e) { - qWarning() << QString::fromStdString(e.get_description()); - qDebug() << "--keywordSearch OCR search finish--"; - return -1; - } -} - -int OcrSearch::getResult(Xapian::MSet &result, std::string &keyWord) { - for(auto it = result.begin(); it != result.end(); ++it) { - Xapian::Document doc = it.get_document(); - std::string data = doc.get_data(); - QString path = QString::fromStdString(doc.get_value(1)); - - SearchPluginIface::ResultInfo ri; - if(!SearchManager::creatResultInfo(ri, path)) { - continue; - } - // Construct snippets containing keyword. - auto term = doc.termlist_begin(); - std::string wordTobeFound = QString::fromStdString(keyWord).section(" ", 0, 0).toStdString(); - term.skip_to(wordTobeFound); - //fix me: make a snippet without cut cjk char. - auto pos = term.positionlist_begin(); - QString snippet = FileUtils::getSnippet(data, *pos, QString::fromStdString(keyWord).remove(" ")); - - ri.description.prepend(SearchPluginIface::DescriptionInfo{"", FileUtils::getHtmlText(snippet, QString::fromStdString(keyWord).remove(" "))}); - QString().swap(snippet); - std::string().swap(data); - SearchManager::m_mutexOcr.lock(); - if(m_uniqueSymbol == SearchManager::uniqueSymbolOcr) { - m_search_result->enqueue(ri); - SearchManager::m_mutexOcr.unlock(); - } else { - SearchManager::m_mutexOcr.unlock(); - return -1; - } - } - return 0; -} - DirectSearch::DirectSearch(QString keyword, DataQueue *searchResult, QString value, size_t uniqueSymbol) { this->setAutoDelete(true); - m_keyword = keyword; + m_keyword = std::move(keyword); m_searchResult = searchResult; m_uniqueSymbol = uniqueSymbol; - m_value = value; + m_value = std::move(value); } void DirectSearch::run() { @@ -534,13 +418,13 @@ void DirectSearch::run() { for (auto i : list) { if (i.isDir() && (!(i.isSymLink()))) { bool findIndex = false; - for (QString j : blockList) { + for (const QString& j : blockList) { if (FileUtils::isOrUnder(i.absoluteFilePath(), j)) { findIndex = true; break; } } - if (findIndex == true) { + if (findIndex) { qDebug() << "path is blocked:" << i.absoluteFilePath(); continue; } @@ -586,13 +470,4 @@ bool FileContentMatchDecider::operator ()(const Xapian::Document &doc) const return false; } return true; -} - -bool OcrMatchDecider::operator ()(const Xapian::Document &doc) const -{ - QString path = QString::fromStdString(doc.get_value(1)); - if(SearchManager::isBlocked(path)) { - return false; - } - return true; -} +} \ No newline at end of file diff --git a/libsearch/index/search-manager.h b/libsearch/index/search-manager.h index 06207c7..263ad17 100644 --- a/libsearch/index/search-manager.h +++ b/libsearch/index/search-manager.h @@ -63,7 +63,7 @@ #define INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/index_data").toStdString() #define CONTENT_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/content_index_data").toStdString() -#define OCR_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/ocr_index_data").toStdString() +#define OCR_CONTENT_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/ocr_content_index_data").toStdString() namespace UkuiSearch { class FileMatchDecider; class FileContentMatchDecider; @@ -81,7 +81,7 @@ public: explicit SearchManager(QObject *parent = nullptr); ~SearchManager(); - static int getCurrentIndexCount(); + static uint getCurrentIndexCount(); static size_t uniqueSymbolFile; static size_t uniqueSymbolDir; @@ -94,7 +94,7 @@ public: private: static bool isBlocked(QString &path); - static bool creatResultInfo(UkuiSearch::SearchPluginIface::ResultInfo &ri, QString path); + static bool creatResultInfo(UkuiSearch::SearchPluginIface::ResultInfo &ri, const QString& path); }; class FileSearch : public QRunnable { @@ -104,7 +104,7 @@ public: protected: void run(); private: - int keywordSearchfile(); + uint keywordSearchFile(); Xapian::Query creatQueryForFileSearch(); int getResult(Xapian::MSet &result); @@ -125,7 +125,7 @@ public: protected: void run(); private: - int keywordSearchContent(); + uint keywordSearchContent(); int getResult(Xapian::MSet &result, std::string &keyWord); DataQueue *m_search_result = nullptr; @@ -137,24 +137,6 @@ private: int m_num = 20; }; -class OcrSearch : public QRunnable { -public: - explicit OcrSearch(DataQueue *searchResult, size_t uniqueSymbol, QString keyword, int begin = 0, int num = 20); - ~OcrSearch(); -protected: - void run(); -private: - int keywordSearchOcr(); - int getResult(Xapian::MSet &result, std::string &keyWord); - - DataQueue *m_search_result = nullptr; - OcrMatchDecider *m_matchDecider; - size_t m_uniqueSymbol; - QString m_keyword; - int m_begin = 0; - int m_num = 20; -}; - class DirectSearch : public QRunnable { public: explicit DirectSearch(QString keyword, DataQueue *searchResult, QString value, size_t uniqueSymbol); @@ -176,10 +158,6 @@ class FileContentMatchDecider : public Xapian::MatchDecider { public: bool operator ()(const Xapian::Document &doc) const; }; -class OcrMatchDecider : public Xapian::MatchDecider { -public: - bool operator ()(const Xapian::Document &doc) const; -}; } #endif // SEARCHMANAGER_H diff --git a/libsearch/index/writable-database.cpp b/libsearch/index/writable-database.cpp index 4e1901d..b14247f 100644 --- a/libsearch/index/writable-database.cpp +++ b/libsearch/index/writable-database.cpp @@ -29,6 +29,7 @@ using namespace UkuiSearch; static QMutex g_basicDatabaseMutex; static QMutex g_contentDatabaseMutex; +static QMutex g_ocrContentDatabaseMutex; #define DATABASE_TRY(code) try { \ code; \ @@ -43,16 +44,19 @@ WritableDatabase::WritableDatabase(const DataBaseType &type) : m_type(type) { switch (type) { - case DataBaseType::Basic: - m_path = INDEX_PATH; - m_mutex = &g_basicDatabaseMutex; - break; - case DataBaseType::Content: - m_path = CONTENT_INDEX_PATH; - m_mutex = &g_contentDatabaseMutex; - break; - default: - break; + case DataBaseType::Basic: + m_path = INDEX_PATH; + m_mutex = &g_basicDatabaseMutex; + break; + case DataBaseType::Content: + m_path = CONTENT_INDEX_PATH; + m_mutex = &g_contentDatabaseMutex; + break; + case DataBaseType::OcrContent: + m_path = OCR_CONTENT_INDEX_PATH; + m_mutex = &g_ocrContentDatabaseMutex; + default: + break; } m_mutex->lock(); } @@ -100,14 +104,16 @@ void WritableDatabase::rebuild() //更新版本号 switch (m_type) { - case DataBaseType::Basic: - IndexStatusRecorder::getInstance()->setVersion(INDEX_DATABASE_VERSION_KEY, INDEX_DATABASE_VERSION); - break; - case DataBaseType::Content: - IndexStatusRecorder::getInstance()->setVersion(CONTENT_DATABASE_VERSION_KEY, CONTENT_DATABASE_VERSION); - break; - default: - break; + case DataBaseType::Basic: + IndexStatusRecorder::getInstance()->setVersion(INDEX_DATABASE_VERSION_KEY, INDEX_DATABASE_VERSION); + break; + case DataBaseType::Content: + IndexStatusRecorder::getInstance()->setVersion(CONTENT_DATABASE_VERSION_KEY, CONTENT_DATABASE_VERSION); + break; + case DataBaseType::OcrContent: + IndexStatusRecorder::getInstance()->setVersion(OCR_CONTENT_DATABASE_VERSION_KEY, OCR_CONTENT_DATABASE_VERSION); + default: + break; } } @@ -184,14 +190,17 @@ QMap WritableDatabase::getIndexTimes() void WritableDatabase::errorRecord() { switch (m_type) { - case DataBaseType::Basic: - IndexStatusRecorder::getInstance()->setStatus(INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Error); - break; - case DataBaseType::Content: - IndexStatusRecorder::getInstance()->setStatus(CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Error); - break; - default: - break; + case DataBaseType::Basic: + IndexStatusRecorder::getInstance()->setStatus(INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Error); + break; + case DataBaseType::Content: + IndexStatusRecorder::getInstance()->setStatus(CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Error); + break; + case DataBaseType::OcrContent: + IndexStatusRecorder::getInstance()->setStatus(OCR_CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Error); + break; + default: + break; } }