feat(ukui-search-service):为OCR内容索引增加单独的数据库和单独控制开关

This commit is contained in:
iaom 2023-09-28 10:54:58 +08:00 committed by Baijunjie
parent e9ce06a471
commit d6e3cd46e9
14 changed files with 448 additions and 401 deletions

View File

@ -25,5 +25,10 @@
<summary>content fuzzy search</summary> <summary>content fuzzy search</summary>
<description>Enable or disable fuzzy search for file content.</description> <description>Enable or disable fuzzy search for file content.</description>
</key> </key>
<key name="content-index-enable-ocr" type="b">
<default>false</default>
<summary>content index enable ocr</summary>
<description>Enable or disable OCR in content index.</description>
</key>
</schema> </schema>
</schemalist> </schemalist>

View File

@ -13,6 +13,8 @@ static const int LABEL_MAX_WIDTH = 320;
static const QString HOME_PATH = QDir::homePath(); static const QString HOME_PATH = QDir::homePath();
static const QString INDEX_PATH = HOME_PATH + QStringLiteral("/.config/org.ukui/ukui-search/index_data"); static const QString INDEX_PATH = HOME_PATH + QStringLiteral("/.config/org.ukui/ukui-search/index_data");
static const QString CONTENT_INDEX_PATH = HOME_PATH + QStringLiteral("/.config/org.ukui/ukui-search/content_index_data"); static const QString CONTENT_INDEX_PATH = HOME_PATH + QStringLiteral("/.config/org.ukui/ukui-search/content_index_data");
static const QString OCR_CONTENT_INDEX_PATH = HOME_PATH + QStringLiteral("/.config/org.ukui/ukui-search/ocr_content_index_data");
static const QString FILE_SEARCH_VALUE = QStringLiteral("0"); static const QString FILE_SEARCH_VALUE = QStringLiteral("0");
static const QString DIR_SEARCH_VALUE = QStringLiteral("1"); static const QString DIR_SEARCH_VALUE = QStringLiteral("1");
static const QString INDEX_SEM = QStringLiteral("ukui-search-index-sem"); static const QString INDEX_SEM = QStringLiteral("ukui-search-index-sem");
@ -27,7 +29,7 @@ static const QString INDEX_DATABASE_VERSION = QStringLiteral("1.0.1");
* changelog 1.1.0 value * changelog 1.1.0 value
*/ */
static const QString CONTENT_DATABASE_VERSION = QStringLiteral("1.1.0"); static const QString CONTENT_DATABASE_VERSION = QStringLiteral("1.1.0");
static const QString OCR_CONTENT_DATABASE_VERSION = QStringLiteral("1.0.0");
static const QStringList allAppPath = { static const QStringList allAppPath = {
@ -84,7 +86,8 @@ static const QMap<QString, bool> targetPhotographTypeMap = {
*/ */
enum class DataBaseType { enum class DataBaseType {
Basic = 0, Basic = 0,
Content = 1 Content = 1,
OcrContent = 2
}; };
/** /**

View File

@ -32,11 +32,14 @@
#include "writable-database.h" #include "writable-database.h"
#include "compatible-define.h" #include "compatible-define.h"
using namespace UkuiSearch; using namespace UkuiSearch;
BatchIndexer::BatchIndexer(const QStringList &folders, const QStringList &blackList, QAtomicInt& indexStop, QAtomicInt &contentIndexStop, WorkMode mode, Targets target) BatchIndexer::BatchIndexer(const QStringList &folders, const QStringList &blackList,
QAtomicInt& indexStop, QAtomicInt &contentIndexStop, QAtomicInt &contentIndexOcrStop,
WorkMode mode, Targets target)
: m_folders(folders), : m_folders(folders),
m_blackList(blackList), m_blackList(blackList),
m_indexStop(&indexStop), m_indexStop(&indexStop),
m_contentIndexStop(&contentIndexStop), m_contentIndexStop(&contentIndexStop),
m_contentIndexOcrStop(&contentIndexOcrStop),
m_mode(mode), m_mode(mode),
m_target(target) m_target(target)
{ {
@ -60,6 +63,10 @@ void BatchIndexer::run()
contentIndex(); contentIndex();
Q_EMIT contentIndexDone(m_mode); Q_EMIT contentIndexDone(m_mode);
} }
if(m_target & Target::Ocr) {
ocrIndex();
Q_EMIT ocrContentIndexDone(m_mode);
}
m_cache.clear(); m_cache.clear();
malloc_trim(0); malloc_trim(0);
qDebug() << "FirstRunIndexer: time :" << timer.elapsed() << "milliseconds"; qDebug() << "FirstRunIndexer: time :" << timer.elapsed() << "milliseconds";
@ -71,15 +78,15 @@ void BatchIndexer::fetch()
qDebug() << "Now begin fetching files to be indexed..."; qDebug() << "Now begin fetching files to be indexed...";
qDebug() << "Index folders:" << m_folders << "blacklist :" << m_blackList; qDebug() << "Index folders:" << m_folders << "blacklist :" << m_blackList;
QQueue<QString> bfs; QQueue<QString> bfs;
for(QString blockPath : m_blackList) { for(const QString& blockPath : m_blackList) {
for(QString path : m_folders) { for(const QString& path : m_folders) {
if(FileUtils::isOrUnder(path, blockPath)) { if(FileUtils::isOrUnder(path, blockPath)) {
m_folders.removeOne(path); m_folders.removeOne(path);
} }
} }
} }
m_cache.append(m_folders); m_cache.append(m_folders);
for(QString path : m_folders) { for(const QString &path : m_folders) {
bfs.enqueue(path); bfs.enqueue(path);
} }
QFileInfoList list; QFileInfoList list;
@ -91,9 +98,9 @@ void BatchIndexer::fetch()
while(!bfs.empty()) { while(!bfs.empty()) {
dir.setPath(bfs.dequeue()); dir.setPath(bfs.dequeue());
list = dir.entryInfoList(); list = dir.entryInfoList();
for(auto i : list) { for(const auto& i : list) {
bool isBlocked = false; bool isBlocked = false;
for(QString path : tmpList) { for(const QString &path : tmpList) {
if(i.absoluteFilePath() == path) { if(i.absoluteFilePath() == path) {
isBlocked = true; isBlocked = true;
tmpList.removeOne(path); tmpList.removeOne(path);
@ -144,7 +151,7 @@ void BatchIndexer::basicIndex()
} }
if(!indexTimes.isEmpty()) { if(!indexTimes.isEmpty()) {
qDebug() << indexTimes.size() << "documents need remove."; qDebug() << indexTimes.size() << "documents need remove.";
for(std::string uniqueTerm : indexTimes.keys()) { for(const std::string& uniqueTerm : indexTimes.keys()) {
basicDb.removeDocument(uniqueTerm); basicDb.removeDocument(uniqueTerm);
} }
basicDb.commit(); basicDb.commit();
@ -163,7 +170,7 @@ void BatchIndexer::basicIndex()
++finishNum; ++finishNum;
} }
if(batchSize >= 8192) { if(batchSize >= 8192) {
qDebug() << "8192 finished."; qDebug() << finishNum << "of" << allSize <<"finished.";
basicDb.commit(); basicDb.commit();
Q_EMIT progress(IndexType::Basic, allSize, finishNum); Q_EMIT progress(IndexType::Basic, allSize, finishNum);
//文件名索引很快 //文件名索引很快
@ -194,16 +201,10 @@ void BatchIndexer::contentIndex()
qWarning() << "Content db open failed, fail to run content index!"; qWarning() << "Content db open failed, fail to run content index!";
return; return;
} }
QStringList filesNeedIndex; QStringList filesNeedIndex;
QStringList filesNeedOCRIndex;
QMap<QString, bool> suffixMap = targetFileTypeMap;
QFileInfo info; QFileInfo info;
// ocr
// bool ocrEnable = FileIndexerConfig::getInstance()->isOCREnable();
if(FileIndexerConfig::getInstance()->isOCREnable()) {
qDebug() << "OCR enabled.";
suffixMap.INSERT(targetPhotographTypeMap);
}
if(m_mode == WorkMode::Rebuild) { if(m_mode == WorkMode::Rebuild) {
contentDb.rebuild(); contentDb.rebuild();
if(!contentDb.open()) { if(!contentDb.open()) {
@ -211,9 +212,9 @@ void BatchIndexer::contentIndex()
} }
} }
if(m_mode == WorkMode::Rebuild || m_mode == WorkMode::Add) { if(m_mode == WorkMode::Rebuild || m_mode == WorkMode::Add) {
for(QString path : m_cache) { for(const QString& path : m_cache) {
info.setFile(path); info.setFile(path);
if(true == suffixMap[info.suffix()] && info.isFile()) { if(targetFileTypeMap[info.suffix()] && info.isFile()) {
if(!FileUtils::isEncrypedOrUnsupport(path, info.suffix())) { if(!FileUtils::isEncrypedOrUnsupport(path, info.suffix())) {
filesNeedIndex.append(path); filesNeedIndex.append(path);
} }
@ -222,9 +223,9 @@ void BatchIndexer::contentIndex()
} else if(m_mode == WorkMode::Update) { } else if(m_mode == WorkMode::Update) {
QMap<std::string, std::string> indexTimes = contentDb.getIndexTimes(); QMap<std::string, std::string> indexTimes = contentDb.getIndexTimes();
qDebug() << indexTimes.size() << "documents recorded"; qDebug() << indexTimes.size() << "documents recorded";
for(QString path : m_cache) { for(const QString& path : m_cache) {
info.setFile(path); info.setFile(path);
if(true == suffixMap[info.suffix()] && info.isFile()) { if(targetFileTypeMap[info.suffix()] && info.isFile()) {
std::string uterm = FileUtils::makeDocUterm(path); std::string uterm = FileUtils::makeDocUterm(path);
if(indexTimes.value(uterm) != info.lastModified().toString("yyyyMMddHHmmsszzz").toStdString()) { if(indexTimes.value(uterm) != info.lastModified().toString("yyyyMMddHHmmsszzz").toStdString()) {
if(!FileUtils::isEncrypedOrUnsupport(path, info.suffix())) { if(!FileUtils::isEncrypedOrUnsupport(path, info.suffix())) {
@ -238,7 +239,7 @@ void BatchIndexer::contentIndex()
} }
if(!indexTimes.isEmpty()) { if(!indexTimes.isEmpty()) {
qDebug() << indexTimes.size() << "documents need remove"; qDebug() << indexTimes.size() << "documents need remove";
for(std::string uniqueTerm : indexTimes.keys()) { for(const std::string& uniqueTerm : indexTimes.keys()) {
contentDb.removeDocument(uniqueTerm); contentDb.removeDocument(uniqueTerm);
} }
contentDb.commit(); contentDb.commit();
@ -251,19 +252,12 @@ void BatchIndexer::contentIndex()
uint batchSize = 0; uint batchSize = 0;
uint finishNum = 0; uint finishNum = 0;
for (QString path : filesNeedIndex) { for (const QString& path : filesNeedIndex) {
if(m_contentIndexStop->LOAD) { if(m_contentIndexStop->LOAD) {
qDebug() << "Index stopped, interrupt content index."; qDebug() << "Index stopped, interrupt content index.";
filesNeedIndex.clear(); filesNeedIndex.clear();
filesNeedOCRIndex.clear();
return; return;
} }
info.setFile(path);
if(true == targetPhotographTypeMap[info.suffix()]) {
filesNeedOCRIndex.append(path);
filesNeedIndex.removeOne(path);
continue;
}
fileContentIndexer indexer(path); fileContentIndexer indexer(path);
if(indexer.index()) { if(indexer.index()) {
contentDb.addDocument(indexer.document()); contentDb.addDocument(indexer.document());
@ -274,24 +268,84 @@ void BatchIndexer::contentIndex()
} }
if(batchSize >= 30) { if(batchSize >= 30) {
contentDb.commit(); contentDb.commit();
qDebug() << "30 finished."; qDebug() << finishNum << "of" << allSize <<"finished.";
Q_EMIT progress(IndexType::Contents, allSize, finishNum); Q_EMIT progress(IndexType::Contents, allSize, finishNum);
batchSize = 0; batchSize = 0;
} }
} }
contentDb.commit(); contentDb.commit();
Q_EMIT progress(IndexType::Contents, allSize, finishNum); Q_EMIT progress(IndexType::Contents, allSize, finishNum);
filesNeedIndex.clear(); filesNeedIndex.clear();
qDebug() << "Content index for normal files finished, now begin OCR index"; qDebug() << "Finish content index";
int ocrSize = filesNeedOCRIndex.size(); }
qDebug() << ocrSize << "pictures need OCR index.";
batchSize = 0; void BatchIndexer::ocrIndex()
int ocrFinishNum = 0; {
for(QString path : filesNeedOCRIndex) { qDebug() << "Begin ocr content index";
if(m_contentIndexStop->LOAD) { if(m_contentIndexOcrStop->LOAD) {
qDebug() << "Index stopped, interrupt content index."; qDebug() << "Index stopped, abort ocr content index.";
return;
}
WritableDatabase contentDb(DataBaseType::OcrContent);
if(!contentDb.open()) {
qWarning() << "Content db open failed, fail to run ocr content index!";
return;
}
QStringList filesNeedOCRIndex;
QFileInfo info;
if(m_mode == WorkMode::Rebuild) {
contentDb.rebuild();
if(!contentDb.open()) {
return;
}
}
if(m_mode == WorkMode::Rebuild || m_mode == WorkMode::Add) {
for(const QString &path : m_cache) {
info.setFile(path);
if(targetPhotographTypeMap[info.suffix()] && info.isFile()) {
if(!FileUtils::isEncrypedOrUnsupport(path, info.suffix())) {
filesNeedOCRIndex.append(path);
}
}
}
} else {
QMap<std::string, std::string> indexTimes = contentDb.getIndexTimes();
qDebug() << indexTimes.size() << "documents recorded";
for(const QString& path : m_cache) {
info.setFile(path);
if(targetPhotographTypeMap[info.suffix()] && info.isFile()) {
std::string uterm = FileUtils::makeDocUterm(path);
if(indexTimes.value(uterm) != info.lastModified().toString("yyyyMMddHHmmsszzz").toStdString()) {
if(!FileUtils::isEncrypedOrUnsupport(path, info.suffix())) {
filesNeedOCRIndex.append(path);
indexTimes.remove(uterm);
}
} else {
indexTimes.remove(uterm);
}
}
}
if(!indexTimes.isEmpty()) {
qDebug() << indexTimes.size() << "documents need remove";
for(const std::string& uniqueTerm : indexTimes.keys()) {
contentDb.removeDocument(uniqueTerm);
}
contentDb.commit();
}
}
uint allSize = filesNeedOCRIndex.size();
qDebug() << allSize << "pictures need ocr content index.";
Q_EMIT progress(IndexType::OCR, allSize, 0);
uint batchSize = 0;
uint finishNum = 0;
for (const QString &path : filesNeedOCRIndex) {
if(m_contentIndexOcrStop->LOAD) {
qDebug() << "Index stopped, interrupt ocr content index.";
filesNeedOCRIndex.clear(); filesNeedOCRIndex.clear();
return; return;
} }
@ -299,22 +353,20 @@ void BatchIndexer::contentIndex()
if(indexer.index()) { if(indexer.index()) {
contentDb.addDocument(indexer.document()); contentDb.addDocument(indexer.document());
++batchSize; ++batchSize;
++ocrFinishNum; ++finishNum;
} else { } else {
// qDebug() << "Extract fail===" << path; // qDebug() << "Extract fail===" << path;
} }
if(batchSize >= 30) { if(batchSize >= 10) {
contentDb.commit(); contentDb.commit();
qDebug() << "30 finished."; qDebug() << finishNum << "of" << allSize <<"finished.";
Q_EMIT progress(IndexType::Contents, allSize, finishNum + ocrFinishNum); Q_EMIT progress(IndexType::OCR, allSize, finishNum);
Q_EMIT progress(IndexType::OCR, ocrSize, ocrFinishNum);
batchSize = 0; batchSize = 0;
} }
} }
contentDb.commit(); contentDb.commit();
Q_EMIT progress(IndexType::OCR, ocrSize, ocrFinishNum); Q_EMIT progress(IndexType::OCR, allSize, finishNum);
Q_EMIT progress(IndexType::Contents, allSize, finishNum + ocrFinishNum);
filesNeedOCRIndex.clear(); filesNeedOCRIndex.clear();
qDebug() << "Finish OCR index."; qDebug() << "Ocr content index finished,";
qDebug() << "Finish content index";
} }

View File

@ -53,6 +53,7 @@ public:
None = 0, None = 0,
Basic = 1u << 0, Basic = 1u << 0,
Content = 1u << 1, Content = 1u << 1,
Ocr = 1u << 2,
All = Basic | Content All = Basic | Content
}; };
Q_DECLARE_FLAGS(Targets, Target) Q_DECLARE_FLAGS(Targets, Target)
@ -61,6 +62,7 @@ public:
const QStringList& blackList, const QStringList& blackList,
QAtomicInt& indexStop, QAtomicInt& indexStop,
QAtomicInt& contentIndexStop, QAtomicInt& contentIndexStop,
QAtomicInt& contentIndexOcrStop,
WorkMode mode = WorkMode::Update, WorkMode mode = WorkMode::Update,
Targets target = Target::All); Targets target = Target::All);
void run() override; void run() override;
@ -69,17 +71,20 @@ Q_SIGNALS:
void progress(IndexType type, uint all, uint finished); void progress(IndexType type, uint all, uint finished);
void basicIndexDone(WorkMode); void basicIndexDone(WorkMode);
void contentIndexDone(WorkMode); void contentIndexDone(WorkMode);
void ocrContentIndexDone(WorkMode);
void done(WorkMode, Targets); void done(WorkMode, Targets);
private: private:
void fetch(); void fetch();
void basicIndex(); void basicIndex();
void contentIndex(); void contentIndex();
void ocrIndex();
QStringList m_folders; QStringList m_folders;
QStringList m_blackList; QStringList m_blackList;
QAtomicInt *m_indexStop = nullptr; QAtomicInt *m_indexStop = nullptr;
QAtomicInt *m_contentIndexStop = nullptr; QAtomicInt *m_contentIndexStop = nullptr;
QAtomicInt *m_contentIndexOcrStop = nullptr;
WorkMode m_mode; WorkMode m_mode;
Targets m_target; Targets m_target;
QStringList m_cache; QStringList m_cache;

View File

@ -23,11 +23,15 @@
#include <QDebug> #include <QDebug>
#include <QDir> #include <QDir>
#define INDEX_SETTINGS QDir::homePath() + "/.config/org.ukui/ukui-search/ukui-search-service.conf" #define INDEX_SETTINGS QDir::homePath() + "/.config/org.ukui/ukui-search/ukui-search-service.conf"
static const QString CONFIG_VERSION = QStringLiteral("1.0"); /**
* changelog: 1.1 ocr开关
*/
static const QString CONFIG_VERSION = QStringLiteral("1.1");
static const QByteArray UKUI_SEARCH_SCHEMAS = QByteArrayLiteral("org.ukui.search.settings"); static const QByteArray UKUI_SEARCH_SCHEMAS = QByteArrayLiteral("org.ukui.search.settings");
static const QString FILE_INDEX_ENABLE_KEY = QStringLiteral("fileIndexEnable"); static const QString FILE_INDEX_ENABLE_KEY = QStringLiteral("fileIndexEnable");
static const QString CONTENT_INDEX_ENABLE_KEY = QStringLiteral("contentIndexEnable"); static const QString CONTENT_INDEX_ENABLE_KEY = QStringLiteral("contentIndexEnable");
static const QString CONTENT_FUZZY_SEARCH_KEY = QStringLiteral("contentFuzzySearch"); static const QString CONTENT_FUZZY_SEARCH_KEY = QStringLiteral("contentFuzzySearch");
static const QString CONTENT_INDEX_ENABLE_OCR_KEY = QStringLiteral("contentIndexEnableOcr");
static const QString OCR_ENABLE_KEY = QStringLiteral("ocrEnable"); static const QString OCR_ENABLE_KEY = QStringLiteral("ocrEnable");
static const QString META_DATA_INDEX_ENABLE_KEY = QStringLiteral("metaDataIndexEnable"); static const QString META_DATA_INDEX_ENABLE_KEY = QStringLiteral("metaDataIndexEnable");
static const QString CONFIG_VERSION_KEY = QStringLiteral("version"); static const QString CONFIG_VERSION_KEY = QStringLiteral("version");
@ -65,6 +69,20 @@ FileIndexerConfig::FileIndexerConfig(QObject *parent)
if(m_gsettings->keys().contains(CONTENT_INDEX_ENABLE_KEY)) { if(m_gsettings->keys().contains(CONTENT_INDEX_ENABLE_KEY)) {
m_gsettings->set(CONTENT_INDEX_ENABLE_KEY, true); m_gsettings->set(CONTENT_INDEX_ENABLE_KEY, true);
} }
if(m_gsettings->keys().contains(CONTENT_INDEX_ENABLE_OCR_KEY)) {
m_gsettings->set(CONTENT_INDEX_ENABLE_OCR_KEY, true);
}
}
m_gsettings->set(CONFIG_VERSION_KEY, CONFIG_VERSION);
} else if (oldVersion == "1.0") {
bool contentIndex = false;
if(m_gsettings->keys().contains(CONTENT_INDEX_ENABLE_KEY)) {
contentIndex = m_gsettings->get(CONTENT_INDEX_ENABLE_KEY).toBool();
}
if(contentIndex) {
if(m_gsettings->keys().contains(CONTENT_INDEX_ENABLE_OCR_KEY)) {
m_gsettings->set(CONTENT_INDEX_ENABLE_OCR_KEY, true);
}
} }
m_gsettings->set(CONFIG_VERSION_KEY, CONFIG_VERSION); m_gsettings->set(CONFIG_VERSION_KEY, CONFIG_VERSION);
} }
@ -74,7 +92,8 @@ FileIndexerConfig::FileIndexerConfig(QObject *parent)
Q_EMIT this->fileIndexEnableStatusChanged(m_gsettings->get(FILE_INDEX_ENABLE_KEY).toBool()); Q_EMIT this->fileIndexEnableStatusChanged(m_gsettings->get(FILE_INDEX_ENABLE_KEY).toBool());
} else if(key == CONTENT_INDEX_ENABLE_KEY) { } else if(key == CONTENT_INDEX_ENABLE_KEY) {
Q_EMIT this->contentIndexEnableStatusChanged(m_gsettings->get(CONTENT_INDEX_ENABLE_KEY).toBool()); Q_EMIT this->contentIndexEnableStatusChanged(m_gsettings->get(CONTENT_INDEX_ENABLE_KEY).toBool());
} else if(key == CONTENT_INDEX_ENABLE_OCR_KEY) {
Q_EMIT this->contentIndexEnableOcrStatusChanged(m_gsettings->get(CONTENT_INDEX_ENABLE_OCR_KEY).toBool());
} }
}); });
} else { } else {
@ -146,7 +165,17 @@ bool FileIndexerConfig::isFuzzySearchEnable()
bool FileIndexerConfig::isOCREnable() bool FileIndexerConfig::isOCREnable()
{ {
return m_settings->value(OCR_ENABLE_KEY, true).toBool(); if(m_gsettings) {
if(m_gsettings->keys().contains(CONTENT_INDEX_ENABLE_OCR_KEY)) {
return m_gsettings->get(CONTENT_INDEX_ENABLE_OCR_KEY).toBool();
} else {
qWarning() << "FileIndexerConfig: Can not find key:" << CONTENT_INDEX_ENABLE_OCR_KEY << "in" << UKUI_SEARCH_SCHEMAS;
return false;
}
} else {
qWarning() << "FileIndexerConfig:" << UKUI_SEARCH_SCHEMAS << " is not found!";
return false;
}
} }
bool FileIndexerConfig::isMetaDataIndexEnable() bool FileIndexerConfig::isMetaDataIndexEnable()

View File

@ -87,6 +87,11 @@ Q_SIGNALS:
* *
*/ */
void contentIndexEnableStatusChanged(bool); void contentIndexEnableStatusChanged(bool);
/**
* @brief contentIndexEnableOcrStatusChanged
* ocr
*/
void contentIndexEnableOcrStatusChanged(bool);
private: private:
explicit FileIndexerConfig(QObject *parent = nullptr); explicit FileIndexerConfig(QObject *parent = nullptr);

View File

@ -28,7 +28,8 @@ IndexScheduler::IndexScheduler(QObject *parent) :
m_config(FileIndexerConfig::getInstance()), m_config(FileIndexerConfig::getInstance()),
m_state(Startup), m_state(Startup),
m_indexStop(0), m_indexStop(0),
m_contentIndexStop(0) m_contentIndexStop(0),
m_ocrContentIndexStop(0)
{ {
qRegisterMetaType<IndexerState>("IndexerState"); qRegisterMetaType<IndexerState>("IndexerState");
qRegisterMetaType<BatchIndexer::WorkMode>("BatchIndexer::WorkMode"); qRegisterMetaType<BatchIndexer::WorkMode>("BatchIndexer::WorkMode");
@ -38,6 +39,7 @@ IndexScheduler::IndexScheduler(QObject *parent) :
connect(&m_fileWatcher, &FileWatcher::filesUpdate, this, &IndexScheduler::updateIndex); connect(&m_fileWatcher, &FileWatcher::filesUpdate, this, &IndexScheduler::updateIndex);
connect(m_config, &FileIndexerConfig::fileIndexEnableStatusChanged, this, &IndexScheduler::fileIndexEnable); connect(m_config, &FileIndexerConfig::fileIndexEnableStatusChanged, this, &IndexScheduler::fileIndexEnable);
connect(m_config, &FileIndexerConfig::contentIndexEnableStatusChanged, this, &IndexScheduler::contentIndexEnable); connect(m_config, &FileIndexerConfig::contentIndexEnableStatusChanged, this, &IndexScheduler::contentIndexEnable);
connect(m_config, &FileIndexerConfig::contentIndexEnableOcrStatusChanged, this, &IndexScheduler::ocrContentIndexEnable);
connect(m_config, &FileIndexerConfig::appendIndexDir, this, &IndexScheduler::addNewPath); connect(m_config, &FileIndexerConfig::appendIndexDir, this, &IndexScheduler::addNewPath);
connect(m_config, &FileIndexerConfig::removeIndexDir, this, &IndexScheduler::removeIndex); connect(m_config, &FileIndexerConfig::removeIndexDir, this, &IndexScheduler::removeIndex);
@ -50,6 +52,9 @@ IndexScheduler::IndexScheduler(QObject *parent) :
} }
if(m_config->isContentIndexEnable()) { if(m_config->isContentIndexEnable()) {
targets |= BatchIndexer::Target::Content; targets |= BatchIndexer::Target::Content;
if(m_config->isOCREnable()) {
targets |= BatchIndexer::Target::Ocr;
}
} else { } else {
m_contentIndexStop.fetchAndStoreRelaxed(1); m_contentIndexStop.fetchAndStoreRelaxed(1);
} }
@ -62,8 +67,7 @@ void IndexScheduler::addNewPath(const QString &folders, const QStringList &black
qDebug() << "Index Scheduler is being stopped, add operation will be executed when started up next time."; qDebug() << "Index Scheduler is being stopped, add operation will be executed when started up next time.";
return; return;
} }
m_state = Running;
Q_EMIT stateChange(m_state);
BatchIndexer::Targets target = BatchIndexer::Target::None; BatchIndexer::Targets target = BatchIndexer::Target::None;
if(m_config->isFileIndexEnable()) { if(m_config->isFileIndexEnable()) {
target |= BatchIndexer::Target::Basic; target |= BatchIndexer::Target::Basic;
@ -73,6 +77,10 @@ void IndexScheduler::addNewPath(const QString &folders, const QStringList &black
target |= BatchIndexer::Target::Content; target |= BatchIndexer::Target::Content;
m_statusRecorder->setStatus(CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Updating); m_statusRecorder->setStatus(CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Updating);
} }
if(m_config->isOCREnable()) {
target |= BatchIndexer::Target::Ocr;
m_statusRecorder->setStatus(OCR_CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Updating);
}
BatchIndexer::WorkMode mode = BatchIndexer::WorkMode::Add; BatchIndexer::WorkMode mode = BatchIndexer::WorkMode::Add;
startIndexJob(QStringList() << folders, blackList, mode, target); startIndexJob(QStringList() << folders, blackList, mode, target);
if(BatchIndexer::Target::None != target) { if(BatchIndexer::Target::None != target) {
@ -101,7 +109,12 @@ void IndexScheduler::stop(BatchIndexer::Targets target)
m_statusRecorder->setStatus(CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Off); m_statusRecorder->setStatus(CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Off);
qDebug() << "File content index has been stopped."; qDebug() << "File content index has been stopped.";
} }
if(m_indexStop.LOAD && m_contentIndexStop.LOAD) { if(target & BatchIndexer::Target::Ocr) {
m_ocrContentIndexStop.fetchAndStoreRelaxed(1);
m_statusRecorder->setStatus(OCR_CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Off);
qDebug() << "File ocr content index has been stopped.";
}
if(m_indexStop.LOAD && m_contentIndexStop.LOAD && m_ocrContentIndexStop.LOAD) {
m_fileWatcher.removeWatch(); m_fileWatcher.removeWatch();
m_threadPool.clear(); m_threadPool.clear();
m_threadPool.waitForDone(-1); m_threadPool.waitForDone(-1);
@ -119,24 +132,30 @@ IndexScheduler::IndexerState IndexScheduler::getIndexState()
void IndexScheduler::start(BatchIndexer::Targets target) void IndexScheduler::start(BatchIndexer::Targets target)
{ {
qDebug() << "Index scheduler start." << target; qDebug() << "Index scheduler start." << target;
BatchIndexer::Targets realTargets = BatchIndexer::Target::None;
//检查是否有任务未完成 //检查是否有任务未完成
BatchIndexer::Targets tmpTargets = BatchIndexer::Target::None;
if(target & BatchIndexer::Basic) { if(target & BatchIndexer::Basic) {
if(m_indexFirstRunFinished && m_indexRebuildFinished) { if(m_indexPendingWorkCount == 0) {
tmpTargets |= BatchIndexer::Target::Basic; realTargets |= BatchIndexer::Target::Basic;
} }
} }
if(target & BatchIndexer::Content) { if(target & BatchIndexer::Content) {
if(m_contentIndexFirstRunFinished && m_contentIndexRebuildFinished) { if(m_contentIndexPendingWorkCount == 0) {
tmpTargets |= BatchIndexer::Target::Content; realTargets |= BatchIndexer::Target::Content;
} }
} }
if(tmpTargets == BatchIndexer::Target::None) { if(target & BatchIndexer::Ocr) {
qDebug() << "Index scheduler running, start operation ignored." if(m_ocrContentIndexPendingWorkCount == 0) {
<< "FirstRun finished: " << m_indexFirstRunFinished realTargets |= BatchIndexer::Target::Ocr;
<< "Rebuild finished: " << m_indexRebuildFinished }
<< "Content index firstRun finished: " << m_contentIndexFirstRunFinished }
<< "Content index rebuild finished: " << m_contentIndexRebuildFinished;
if(realTargets == BatchIndexer::Target::None) {
qDebug() << "Index scheduler running, start operation ignored.\n"
<< "index pending work count: " << m_contentIndexPendingWorkCount << "\n"
<< "Content index pending work count: " << m_contentIndexPendingWorkCount << "\n"
<< "Ocr content index pending work count: " << m_ocrContentIndexPendingWorkCount << "\n";
return; return;
} }
@ -147,25 +166,29 @@ void IndexScheduler::start(BatchIndexer::Targets target)
if(target & BatchIndexer::Content) { if(target & BatchIndexer::Content) {
m_contentIndexStop.fetchAndStoreRelaxed(0); m_contentIndexStop.fetchAndStoreRelaxed(0);
} }
//将索引调度器状态设置为运行中 if(target & BatchIndexer::Ocr) {
m_state = Running; m_ocrContentIndexStop.fetchAndStoreRelaxed(0);
Q_EMIT stateChange(m_state); }
//检查是否有数据库需要重建并且执行重建 //检查是否有数据库需要重建并且执行重建
BatchIndexer::Targets rebuiltTarget = checkAndRebuild(tmpTargets); BatchIndexer::Targets rebuiltTarget = checkAndRebuild(realTargets);
BatchIndexer::WorkMode mode = BatchIndexer::WorkMode::Update; BatchIndexer::WorkMode mode = BatchIndexer::WorkMode::Update;
BatchIndexer::Targets startTarget = BatchIndexer::Target::None; BatchIndexer::Targets startTarget = BatchIndexer::Target::None;
//如果数据库被执行过重建,那么跳过增量更新步骤。 //如果数据库被执行过重建,那么跳过增量更新步骤。
if((tmpTargets & BatchIndexer::Target::Basic) && !(rebuiltTarget & BatchIndexer::Target::Basic)) { if((realTargets & BatchIndexer::Target::Basic) && !(rebuiltTarget & BatchIndexer::Target::Basic)) {
startTarget |= BatchIndexer::Target::Basic; startTarget |= BatchIndexer::Target::Basic;
m_statusRecorder->setStatus(INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Updating); m_statusRecorder->setStatus(INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Updating);
} }
if((tmpTargets & BatchIndexer::Target::Content) && !(rebuiltTarget & BatchIndexer::Target::Content)) { if((realTargets & BatchIndexer::Target::Content) && !(rebuiltTarget & BatchIndexer::Target::Content)) {
startTarget |= BatchIndexer::Target::Content; startTarget |= BatchIndexer::Target::Content;
m_statusRecorder->setStatus(CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Updating); m_statusRecorder->setStatus(CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Updating);
} }
if(realTargets & BatchIndexer::Ocr && !(rebuiltTarget & BatchIndexer::Target::Ocr)) {
startTarget |= BatchIndexer::Target::Ocr;
m_statusRecorder->setStatus(OCR_CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Updating);
}
startIndexJob(m_config->currentIndexableDir(), m_config->currentBlackListOfIndex(), mode, startTarget); startIndexJob(m_config->currentIndexableDir(), m_config->currentBlackListOfIndex(), mode, startTarget);
//启动监听 //启动监听
@ -183,7 +206,6 @@ BatchIndexer::Targets IndexScheduler::checkAndRebuild(BatchIndexer::Targets targ
rebuildTarget |= BatchIndexer::Target::Basic; rebuildTarget |= BatchIndexer::Target::Basic;
m_statusRecorder->setStatus(INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Initializing); m_statusRecorder->setStatus(INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Initializing);
} }
if((target & BatchIndexer::Target::Content) && m_config->isContentIndexEnable() && if((target & BatchIndexer::Target::Content) && m_config->isContentIndexEnable() &&
(m_statusRecorder->getStatus(CONTENT_INDEX_DATABASE_STATE_KEY).toInt() == IndexStatusRecorder::State::Error (m_statusRecorder->getStatus(CONTENT_INDEX_DATABASE_STATE_KEY).toInt() == IndexStatusRecorder::State::Error
|| !m_statusRecorder->versionCheck(CONTENT_DATABASE_VERSION_KEY, CONTENT_DATABASE_VERSION))) { || !m_statusRecorder->versionCheck(CONTENT_DATABASE_VERSION_KEY, CONTENT_DATABASE_VERSION))) {
@ -191,6 +213,13 @@ BatchIndexer::Targets IndexScheduler::checkAndRebuild(BatchIndexer::Targets targ
rebuildTarget |= BatchIndexer::Target::Content; rebuildTarget |= BatchIndexer::Target::Content;
m_statusRecorder->setStatus(CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Initializing); m_statusRecorder->setStatus(CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Initializing);
} }
if((target & BatchIndexer::Target::Ocr) && m_config->isOCREnable() &&
(m_statusRecorder->getStatus(OCR_CONTENT_INDEX_DATABASE_STATE_KEY).toInt() == IndexStatusRecorder::State::Error
|| !m_statusRecorder->versionCheck(OCR_CONTENT_DATABASE_VERSION_KEY, OCR_CONTENT_DATABASE_VERSION))) {
qDebug() << "Ocr content database need rebuild";
rebuildTarget |= BatchIndexer::Target::Ocr;
m_statusRecorder->setStatus(OCR_CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Initializing);
}
startIndexJob(m_config->currentIndexableDir(), m_config->currentBlackListOfIndex(), mode, rebuildTarget); startIndexJob(m_config->currentIndexableDir(), m_config->currentBlackListOfIndex(), mode, rebuildTarget);
return rebuildTarget; return rebuildTarget;
} }
@ -198,34 +227,28 @@ BatchIndexer::Targets IndexScheduler::checkAndRebuild(BatchIndexer::Targets targ
void IndexScheduler::startIndexJob(const QStringList& folders,const QStringList& blackList, BatchIndexer::WorkMode mode, BatchIndexer::Targets target) void IndexScheduler::startIndexJob(const QStringList& folders,const QStringList& blackList, BatchIndexer::WorkMode mode, BatchIndexer::Targets target)
{ {
if(BatchIndexer::Target::None != target) { if(BatchIndexer::Target::None != target) {
switch (mode) { if(mode == BatchIndexer::WorkMode::Add) {
case BatchIndexer::WorkMode::Add: m_addNewPathPendingWorkCount++;
m_addNewPathFinished = false; }
break;
case BatchIndexer::WorkMode::Rebuild:
if(target & BatchIndexer::Basic) { if(target & BatchIndexer::Basic) {
m_indexRebuildFinished = false; m_indexPendingWorkCount++;
} }
if(target & BatchIndexer::Content) { if(target & BatchIndexer::Content) {
m_contentIndexRebuildFinished = false; m_contentIndexPendingWorkCount++;
} }
break; if(target & BatchIndexer::Ocr) {
case BatchIndexer::WorkMode::Update: m_ocrContentIndexPendingWorkCount++;
if(target & BatchIndexer::Basic) {
m_indexFirstRunFinished = false;
} }
if(target & BatchIndexer::Content) {
m_contentIndexFirstRunFinished = false; m_state = Running;
} Q_EMIT stateChange(m_state);
break;
default: BatchIndexer *indexer = new BatchIndexer(folders, blackList, m_indexStop, m_contentIndexStop, m_ocrContentIndexStop, mode, target);
break; connect(indexer, &BatchIndexer::done, this, &IndexScheduler::batchIndexerFinished, Qt::QueuedConnection);
}
BatchIndexer *indexer = new BatchIndexer(folders, blackList, m_indexStop, m_contentIndexStop, mode, target);
connect(indexer, &BatchIndexer::done, this, &IndexScheduler::firstRunFinished, Qt::QueuedConnection);
connect(indexer, &BatchIndexer::progress, this, &IndexScheduler::process, Qt::QueuedConnection); connect(indexer, &BatchIndexer::progress, this, &IndexScheduler::process, Qt::QueuedConnection);
connect(indexer, &BatchIndexer::basicIndexDone, this, &IndexScheduler::onBasicIndexDone, Qt::QueuedConnection); connect(indexer, &BatchIndexer::basicIndexDone, this, &IndexScheduler::onBasicIndexDone, Qt::QueuedConnection);
connect(indexer, &BatchIndexer::contentIndexDone, this, &IndexScheduler::onContentIndexDone, Qt::QueuedConnection); connect(indexer, &BatchIndexer::contentIndexDone, this, &IndexScheduler::onContentIndexDone, Qt::QueuedConnection);
connect(indexer, &BatchIndexer::ocrContentIndexDone, this, &IndexScheduler::onOcrContentIndexDone, Qt::QueuedConnection);
m_threadPool.start(indexer); m_threadPool.start(indexer);
} }
} }
@ -248,18 +271,33 @@ void IndexScheduler::contentIndexEnable(bool enable)
} }
} }
void IndexScheduler::ocrContentIndexEnable(bool enable)
{
if(enable) {
start(BatchIndexer::Ocr);
} else {
stop(BatchIndexer::Ocr);
}
}
void IndexScheduler::updateIndex(const QVector<PendingFile> &files) void IndexScheduler::updateIndex(const QVector<PendingFile> &files)
{ {
qDebug() << "updateIndex====="; qDebug() << "updateIndex=====";
m_updateFinished = false; m_updatePendingWorkCount++;
m_state = Running; m_state = Running;
IndexUpdater *updateJob = new IndexUpdater(files, m_indexStop, m_contentIndexStop); Q_EMIT stateChange(m_state);
IndexUpdater *updateJob = new IndexUpdater(files, m_indexStop, m_contentIndexStop, m_ocrContentIndexStop);
connect(updateJob, &IndexUpdater::done, this, &IndexScheduler::updateFinished, Qt::QueuedConnection); connect(updateJob, &IndexUpdater::done, this, &IndexScheduler::updateFinished, Qt::QueuedConnection);
m_threadPool.start(updateJob); m_threadPool.start(updateJob);
} }
void IndexScheduler::firstRunFinished() void IndexScheduler::batchIndexerFinished(BatchIndexer::WorkMode mode, BatchIndexer::Targets targets)
{ {
if(mode == BatchIndexer::WorkMode::Add) {
m_addNewPathPendingWorkCount--;
}
if(isIdle()) { if(isIdle()) {
m_state = Idle; m_state = Idle;
Q_EMIT stateChange(m_state); Q_EMIT stateChange(m_state);
@ -268,7 +306,7 @@ void IndexScheduler::firstRunFinished()
void IndexScheduler::updateFinished() void IndexScheduler::updateFinished()
{ {
m_updateFinished = true; m_updatePendingWorkCount--;
if(isIdle()) { if(isIdle()) {
m_state = Idle; m_state = Idle;
Q_EMIT stateChange(m_state); Q_EMIT stateChange(m_state);
@ -277,30 +315,20 @@ void IndexScheduler::updateFinished()
bool IndexScheduler::isIdle() bool IndexScheduler::isIdle()
{ {
return m_indexFirstRunFinished && m_contentIndexFirstRunFinished return m_indexPendingWorkCount == 0
&& m_addNewPathFinished && m_contentIndexPendingWorkCount == 0
&& m_updateFinished && m_ocrContentIndexPendingWorkCount == 0
&& m_indexRebuildFinished && m_contentIndexRebuildFinished; && m_updatePendingWorkCount == 0
&& m_addNewPathPendingWorkCount == 0;
} }
void IndexScheduler::onBasicIndexDone(BatchIndexer::WorkMode mode) void IndexScheduler::onBasicIndexDone(BatchIndexer::WorkMode mode)
{ {
switch (mode) { Q_UNUSED(mode)
case BatchIndexer::WorkMode::Add: m_indexPendingWorkCount--;
m_addNewPathFinished = true;
break;
case BatchIndexer::WorkMode::Rebuild:
m_indexRebuildFinished = true;
break;
case BatchIndexer::WorkMode::Update:
m_indexFirstRunFinished = true;
break;
default:
break;
}
bool success = false; bool success = false;
if(!(m_statusRecorder->getStatus(INDEX_DATABASE_STATE_KEY).toInt() == IndexStatusRecorder::State::Error)) { if(m_statusRecorder->getStatus(INDEX_DATABASE_STATE_KEY).toInt() != IndexStatusRecorder::State::Error) {
m_statusRecorder->setStatus(INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Ready); m_statusRecorder->setStatus(INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Ready);
success = true; success = true;
} }
@ -309,23 +337,26 @@ void IndexScheduler::onBasicIndexDone(BatchIndexer::WorkMode mode)
void IndexScheduler::onContentIndexDone(BatchIndexer::WorkMode mode) void IndexScheduler::onContentIndexDone(BatchIndexer::WorkMode mode)
{ {
switch (mode) { Q_UNUSED(mode)
case BatchIndexer::WorkMode::Add: m_contentIndexPendingWorkCount--;
m_addNewPathFinished = true;
break;
case BatchIndexer::WorkMode::Rebuild:
m_contentIndexRebuildFinished = true;
break;
case BatchIndexer::WorkMode::Update:
m_contentIndexFirstRunFinished = true;
break;
default:
break;
}
bool success = false; bool success = false;
if(!(m_statusRecorder->getStatus(CONTENT_INDEX_DATABASE_STATE_KEY).toInt() == IndexStatusRecorder::State::Error)) { if(m_statusRecorder->getStatus(CONTENT_INDEX_DATABASE_STATE_KEY).toInt() != IndexStatusRecorder::State::Error) {
m_statusRecorder->setStatus(CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Ready); m_statusRecorder->setStatus(CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Ready);
success = true; success = true;
} }
Q_EMIT contentIndexDone(success); Q_EMIT contentIndexDone(success);
} }
void IndexScheduler::onOcrContentIndexDone(BatchIndexer::WorkMode mode)
{
Q_UNUSED(mode)
m_ocrContentIndexPendingWorkCount--;
bool success = false;
if(m_statusRecorder->getStatus(OCR_CONTENT_INDEX_DATABASE_STATE_KEY).toInt() != IndexStatusRecorder::State::Error) {
m_statusRecorder->setStatus(OCR_CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Ready);
success = true;
}
Q_EMIT contentIndexDone(success);
}

View File

@ -43,17 +43,6 @@ public:
Q_ENUM(IndexerState) Q_ENUM(IndexerState)
explicit IndexScheduler(QObject *parent = nullptr); explicit IndexScheduler(QObject *parent = nullptr);
/**
* @brief addNewPath
* @param folders
* @param blackList
*/
Q_SCRIPTABLE void addNewPath(const QString &folders, const QStringList& blackList = QStringList());
/**
* @brief removeIndex
* @param folders
*/
Q_SCRIPTABLE void removeIndex(const QString& folders);
Q_SCRIPTABLE IndexerState getIndexState(); Q_SCRIPTABLE IndexerState getIndexState();
@ -65,16 +54,29 @@ Q_SIGNALS:
void done(); void done();
private Q_SLOTS: private Q_SLOTS:
/**
* @brief addNewPath
* @param folders
* @param blackList
*/
Q_SCRIPTABLE void addNewPath(const QString &folders, const QStringList& blackList = QStringList());
/**
* @brief removeIndex
* @param folders
*/
Q_SCRIPTABLE void removeIndex(const QString& folders);
void start(BatchIndexer::Targets target); void start(BatchIndexer::Targets target);
void stop(BatchIndexer::Targets target); void stop(BatchIndexer::Targets target);
void fileIndexEnable(bool enable); void fileIndexEnable(bool enable);
void contentIndexEnable(bool enable); void contentIndexEnable(bool enable);
void ocrContentIndexEnable(bool enable);
void updateIndex(const QVector<PendingFile>& files); void updateIndex(const QVector<PendingFile>& files);
void firstRunFinished(); void batchIndexerFinished(BatchIndexer::WorkMode mode, BatchIndexer::Targets targets);
void updateFinished(); void updateFinished();
bool isIdle(); bool isIdle();
void onBasicIndexDone(BatchIndexer::WorkMode mode); void onBasicIndexDone(BatchIndexer::WorkMode mode);
void onContentIndexDone(BatchIndexer::WorkMode mode); void onContentIndexDone(BatchIndexer::WorkMode mode);
void onOcrContentIndexDone(BatchIndexer::WorkMode mode);
private: private:
/** /**
@ -90,16 +92,15 @@ private:
IndexerState m_state; IndexerState m_state;
QAtomicInt m_indexStop; QAtomicInt m_indexStop;
QAtomicInt m_contentIndexStop; QAtomicInt m_contentIndexStop;
QAtomicInt m_ocrContentIndexStop;
QThreadPool m_threadPool; QThreadPool m_threadPool;
bool m_indexFirstRunFinished = true; quint64 m_indexPendingWorkCount = 0;
bool m_contentIndexFirstRunFinished = true; quint64 m_contentIndexPendingWorkCount = 0;
quint64 m_ocrContentIndexPendingWorkCount= 0;
bool m_indexRebuildFinished = true; quint64 m_updatePendingWorkCount = 0;
bool m_contentIndexRebuildFinished = true; quint64 m_addNewPathPendingWorkCount = 0;
bool m_updateFinished = true;
bool m_addNewPathFinished = true;
}; };
} }
#endif // INDEXSCHEDULER_H #endif // INDEXSCHEDULER_H

View File

@ -25,10 +25,12 @@
#include <QDir> #include <QDir>
#include <QMutex> #include <QMutex>
#define CONTENT_INDEX_DATABASE_STATE_KEY "content_index_database_state" #define CONTENT_INDEX_DATABASE_STATE_KEY "content_index_database_state"
#define OCR_CONTENT_INDEX_DATABASE_STATE_KEY "ocr_content_index_database_state"
#define INDEX_DATABASE_STATE_KEY "index_database_state" #define INDEX_DATABASE_STATE_KEY "index_database_state"
#define INDEX_STATUS QDir::homePath() + "/.config/org.ukui/ukui-search/ukui-search-index-status.conf" #define INDEX_STATUS QDir::homePath() + "/.config/org.ukui/ukui-search/ukui-search-index-status.conf"
#define INDEX_DATABASE_VERSION_KEY "index_database_version" #define INDEX_DATABASE_VERSION_KEY "index_database_version"
#define CONTENT_DATABASE_VERSION_KEY "content_database_version" #define CONTENT_DATABASE_VERSION_KEY "content_database_version"
#define OCR_CONTENT_DATABASE_VERSION_KEY "ocr_content_database_version"
namespace UkuiSearch { namespace UkuiSearch {
//fixme: we need a better way to record index status. //fixme: we need a better way to record index status.
class IndexStatusRecorder : public QObject class IndexStatusRecorder : public QObject

View File

@ -27,10 +27,11 @@
#include "file-utils.h" #include "file-utils.h"
#include "compatible-define.h" #include "compatible-define.h"
using namespace UkuiSearch; using namespace UkuiSearch;
IndexUpdater::IndexUpdater(const QVector<PendingFile>& files, QAtomicInt& indexstop, QAtomicInt& contentIndexstop) IndexUpdater::IndexUpdater(const QVector<PendingFile>& files, QAtomicInt& indexstop, QAtomicInt& contentIndexstop, QAtomicInt& contentIndexOcrStop)
: m_cache(files), : m_cache(files),
m_indexStop(&indexstop), m_indexStop(&indexstop),
m_contentIndexStop(&contentIndexstop) m_contentIndexStop(&contentIndexstop),
m_contentIndexOcrStop(&contentIndexOcrStop)
{ {
} }
void IndexUpdater::updateIndex() void IndexUpdater::updateIndex()
@ -44,7 +45,7 @@ void IndexUpdater::updateIndex()
return; return;
} }
qDebug() << "===update basic index==="; qDebug() << "===update basic index===";
for(PendingFile file : m_cache) { for(const PendingFile& file : m_cache) {
if(file.shouldRemoveIndex()) { if(file.shouldRemoveIndex()) {
qDebug() << "| remove:" <<file.path(); qDebug() << "| remove:" <<file.path();
basicDb.removeDocument(file.path()); basicDb.removeDocument(file.path());
@ -63,6 +64,21 @@ void IndexUpdater::updateIndex()
basicDb.commit(); basicDb.commit();
qDebug() << "===finish update basic index==="; qDebug() << "===finish update basic index===";
} }
}
void IndexUpdater::run()
{
updateIndex();
updateContentIndex();
updateOcrContentIndex();
m_cache.clear();
m_cache.shrink_to_fit();
malloc_trim(0);
Q_EMIT done();
}
void IndexUpdater::updateContentIndex()
{
if(FileIndexerConfig::getInstance()->isContentIndexEnable() && !m_contentIndexStop->LOAD) { if(FileIndexerConfig::getInstance()->isContentIndexEnable() && !m_contentIndexStop->LOAD) {
WritableDatabase contentDb(DataBaseType::Content); WritableDatabase contentDb(DataBaseType::Content);
if(!contentDb.open()) { if(!contentDb.open()) {
@ -70,23 +86,22 @@ void IndexUpdater::updateIndex()
return; return;
} }
QMap<QString, bool> suffixMap = targetFileTypeMap;
//ocr
if(FileIndexerConfig::getInstance()->isOCREnable()) {
suffixMap.INSERT(targetPhotographTypeMap);
}
qDebug() << "===update content index==="; qDebug() << "===update content index===";
int size = 0; int size = 0;
for(PendingFile file : m_cache) { for(PendingFile file : m_cache) {
if(m_contentIndexStop->LOAD) {
qDebug() << "Content index update interrupted";
return;
}
QString suffix = QFileInfo(file.path()).suffix(); QString suffix = QFileInfo(file.path()).suffix();
if(file.shouldRemoveIndex()) { if(file.shouldRemoveIndex()) {
qDebug() << "| remove:" <<file.path(); qDebug() << "| remove:" <<file.path();
if(file.isDir()) { if(file.isDir()) {
contentDb.removeChildrenDocument(file.path()); contentDb.removeChildrenDocument(file.path());
} else if(true == suffixMap[suffix]) { } else if(targetFileTypeMap[suffix]) {
contentDb.removeDocument(file.path()); contentDb.removeDocument(file.path());
} }
} else if(true == suffixMap[suffix] && !file.isDir()) { } else if(targetFileTypeMap[suffix] && !file.isDir()) {
if(FileUtils::isEncrypedOrUnsupport(file.path(), suffix)) { if(FileUtils::isEncrypedOrUnsupport(file.path(), suffix)) {
if(file.isModified() || file.isMoveTo()) { if(file.isModified() || file.isMoveTo()) {
contentDb.removeDocument(file.path()); contentDb.removeDocument(file.path());
@ -99,7 +114,6 @@ void IndexUpdater::updateIndex()
contentDb.addDocument(indexer.document()); contentDb.addDocument(indexer.document());
++size; ++size;
} else if(file.isModified() || file.isMoveTo()){ } else if(file.isModified() || file.isMoveTo()){
contentDb.removeDocument(file.path()); contentDb.removeDocument(file.path());
} }
} }
@ -108,24 +122,59 @@ void IndexUpdater::updateIndex()
qDebug() << "30 finished."; qDebug() << "30 finished.";
size = 0; size = 0;
} }
if(m_contentIndexStop->LOAD) {
qDebug() << "Index stopped, content index update interrupted";
m_cache.clear();
m_cache.shrink_to_fit();
malloc_trim(0);
return;
}
} }
contentDb.commit(); contentDb.commit();
qDebug() << "===finish update content index==="; qDebug() << "===finish update content index===";
} }
m_cache.clear();
m_cache.shrink_to_fit();
malloc_trim(0);
Q_EMIT done();
} }
void IndexUpdater::run() void IndexUpdater::updateOcrContentIndex()
{ {
updateIndex(); if(FileIndexerConfig::getInstance()->isOCREnable() && !m_contentIndexOcrStop->LOAD) {
WritableDatabase contentDb(DataBaseType::OcrContent);
if(!contentDb.open()) {
qWarning() << "Ocr content db open failed, fail to update index";
return;
}
qDebug() << "===update ocr content index===";
int size = 0;
for(PendingFile file : m_cache) {
if(m_contentIndexOcrStop->LOAD) {
qDebug() << "Ocr content index update interrupted";
return;
}
QString suffix = QFileInfo(file.path()).suffix();
if(file.shouldRemoveIndex()) {
qDebug() << "| remove:" <<file.path();
if(file.isDir()) {
contentDb.removeChildrenDocument(file.path());
} else if(targetPhotographTypeMap[suffix]) {
contentDb.removeDocument(file.path());
}
} else if(targetPhotographTypeMap[suffix] && !file.isDir()) {
if(FileUtils::isEncrypedOrUnsupport(file.path(), suffix)) {
if(file.isModified() || file.isMoveTo()) {
contentDb.removeDocument(file.path());
}
continue;
}
qDebug() << "| index:" <<file.path();
fileContentIndexer indexer(file.path());
if(indexer.index()) {
contentDb.addDocument(indexer.document());
++size;
} else if(file.isModified() || file.isMoveTo()){
contentDb.removeDocument(file.path());
}
}
if(size >= 10) {
contentDb.commit();
qDebug() << "10 finished.";
size = 0;
}
}
contentDb.commit();
qDebug() << "===finish update ocr content index===";
}
} }

View File

@ -31,7 +31,7 @@ class IndexUpdater : public QObject, public QRunnable
{ {
Q_OBJECT Q_OBJECT
public: public:
explicit IndexUpdater(const QVector<PendingFile>& files, QAtomicInt& indexstop, QAtomicInt& contentIndexstop); explicit IndexUpdater(const QVector<PendingFile>& files, QAtomicInt& indexstop, QAtomicInt& contentIndexstop, QAtomicInt& contentIndexOcrStop);
void run() override; void run() override;
Q_SIGNALS: Q_SIGNALS:
@ -39,10 +39,13 @@ Q_SIGNALS:
private: private:
void updateIndex(); void updateIndex();
void updateContentIndex();
void updateOcrContentIndex();
QVector<PendingFile> m_cache; QVector<PendingFile> m_cache;
QAtomicInt *m_contentIndexStop = nullptr; QAtomicInt *m_contentIndexStop = nullptr;
QAtomicInt *m_indexStop = nullptr; QAtomicInt *m_indexStop = nullptr;
QAtomicInt *m_contentIndexOcrStop = nullptr;
}; };
} }
#endif // INDEXUPDATER_H #endif // INDEXUPDATER_H

View File

@ -18,17 +18,17 @@
* *
*/ */
#include "search-manager.h" #include "search-manager.h"
#include <utility>
#include "dir-watcher.h" #include "dir-watcher.h"
using namespace UkuiSearch; using namespace UkuiSearch;
size_t SearchManager::uniqueSymbolFile = 0; size_t SearchManager::uniqueSymbolFile = 0;
size_t SearchManager::uniqueSymbolDir = 0; size_t SearchManager::uniqueSymbolDir = 0;
size_t SearchManager::uniqueSymbolContent = 0; size_t SearchManager::uniqueSymbolContent = 0;
size_t SearchManager::uniqueSymbolOcr = 0;
QMutex SearchManager::m_mutexFile; QMutex SearchManager::m_mutexFile;
QMutex SearchManager::m_mutexDir; QMutex SearchManager::m_mutexDir;
QMutex SearchManager::m_mutexContent; QMutex SearchManager::m_mutexContent;
QMutex SearchManager::m_mutexOcr;
SearchManager::SearchManager(QObject *parent) : QObject(parent) { SearchManager::SearchManager(QObject *parent) : QObject(parent) {
} }
@ -36,7 +36,7 @@ SearchManager::SearchManager(QObject *parent) : QObject(parent) {
SearchManager::~SearchManager() { SearchManager::~SearchManager() {
} }
int SearchManager::getCurrentIndexCount() { uint SearchManager::getCurrentIndexCount() {
try { try {
Xapian::Database db(INDEX_PATH); Xapian::Database db(INDEX_PATH);
return db.get_doccount(); return db.get_doccount();
@ -48,7 +48,7 @@ int SearchManager::getCurrentIndexCount() {
bool SearchManager::isBlocked(QString &path) { bool SearchManager::isBlocked(QString &path) {
QStringList blockList = DirWatcher::getDirWatcher()->getBlockDirsOfUser(); QStringList blockList = DirWatcher::getDirWatcher()->getBlockDirsOfUser();
for(QString i : blockList) { for(const QString& i : blockList) {
if(FileUtils::isOrUnder(path, i)) if(FileUtils::isOrUnder(path, i))
return true; return true;
} }
@ -56,7 +56,7 @@ bool SearchManager::isBlocked(QString &path) {
} }
bool SearchManager::creatResultInfo(SearchPluginIface::ResultInfo &ri, QString path) bool SearchManager::creatResultInfo(SearchPluginIface::ResultInfo &ri, const QString& path)
{ {
QFileInfo info(path); QFileInfo info(path);
if(!info.exists()) { if(!info.exists()) {
@ -68,7 +68,7 @@ bool SearchManager::creatResultInfo(SearchPluginIface::ResultInfo &ri, QString p
<< SearchPluginIface::DescriptionInfo{tr("Path:"), path} \ << SearchPluginIface::DescriptionInfo{tr("Path:"), path} \
<< SearchPluginIface::DescriptionInfo{tr("Modified time:"), info.lastModified().toString("yyyy/MM/dd hh:mm:ss")}; << SearchPluginIface::DescriptionInfo{tr("Modified time:"), info.lastModified().toString("yyyy/MM/dd hh:mm:ss")};
ri.actionKey = path; ri.actionKey = path;
if (true == targetPhotographTypeMap[info.suffix()]) { if (targetPhotographTypeMap[info.suffix()]) {
ri.type = 1;//1为ocr图片文件 ri.type = 1;//1为ocr图片文件
} else { } else {
ri.type = 0;//0为默认文本文件 ri.type = 0;//0为默认文本文件
@ -80,8 +80,8 @@ FileSearch::FileSearch(DataQueue<SearchPluginIface::ResultInfo> *searchResult, s
this->setAutoDelete(true); this->setAutoDelete(true);
m_search_result = searchResult; m_search_result = searchResult;
m_uniqueSymbol = uniqueSymbol; m_uniqueSymbol = uniqueSymbol;
m_keyword = keyword; m_keyword = std::move(keyword);
m_value = value; m_value = std::move(value);
m_slot = slot; m_slot = slot;
m_begin = begin; m_begin = begin;
m_num = num; m_num = num;
@ -113,20 +113,19 @@ void FileSearch::run() {
//可能会有更好的方法,待优化。 //可能会有更好的方法,待优化。
m_begin = 0; m_begin = 0;
m_num = 100; m_num = 100;
int resultCount = 1; uint resultCount = 1;
int totalCount = 0; uint totalCount = 0;
while(resultCount > 0) { while(resultCount > 0) {
resultCount = keywordSearchfile(); resultCount = keywordSearchFile();
m_begin += m_num; m_begin += m_num;
totalCount += resultCount; totalCount += resultCount;
} }
qDebug() << "Total count:" << m_value << totalCount; qDebug() << "Total count:" << m_value << totalCount;
return;
} }
int FileSearch::keywordSearchfile() { uint FileSearch::keywordSearchFile() {
try { try {
qDebug() << "--keywordSearchfile start--"; qDebug() << "--keywordSearchFile start--";
Xapian::Database db(INDEX_PATH); Xapian::Database db(INDEX_PATH);
Xapian::Query query = creatQueryForFileSearch(); Xapian::Query query = creatQueryForFileSearch();
Xapian::Enquire enquire(db); Xapian::Enquire enquire(db);
@ -140,24 +139,24 @@ int FileSearch::keywordSearchfile() {
queryFile = query; queryFile = query;
} }
qDebug() << "keywordSearchfile:" << QString::fromStdString(queryFile.get_description()); qDebug() << "keywordSearchFile:" << QString::fromStdString(queryFile.get_description());
enquire.set_query(queryFile); enquire.set_query(queryFile);
enquire.set_docid_order(enquire.DONT_CARE); enquire.set_docid_order(Xapian::Enquire::DONT_CARE);
enquire.set_sort_by_relevance_then_value(2, true); enquire.set_sort_by_relevance_then_value(2, true);
Xapian::MSet result = enquire.get_mset(m_begin, m_num, 0, m_matchDecider); Xapian::MSet result = enquire.get_mset(m_begin, m_num, nullptr, m_matchDecider);
int resultCount = result.size(); uint resultCount = result.size();
qDebug() << "keywordSearchfile results count=" << resultCount; qDebug() << "keywordSearchFile results count=" << resultCount;
if(resultCount == 0) if(resultCount == 0)
return 0; return 0;
if(getResult(result) == -1) if(getResult(result) == -1)
return -1; return -1;
qDebug() << "--keywordSearchfile finish--"; qDebug() << "--keywordSearchFile finish--";
return resultCount; return resultCount;
} catch(const Xapian::Error &e) { } catch(const Xapian::Error &e) {
qWarning() << QString::fromStdString(e.get_description()); qWarning() << QString::fromStdString(e.get_description());
qDebug() << "--keywordSearchfile finish--"; qDebug() << "--keywordSearchFile finish--";
return -1; return -1;
} }
} }
@ -170,7 +169,7 @@ Xapian::Query FileSearch::creatQueryForFileSearch() {
for(; bf.position() != -1; bf.toNextBoundary()) { for(; bf.position() != -1; bf.toNextBoundary()) {
int end = bf.position(); int end = bf.position();
if(bf.boundaryReasons() & QTextBoundaryFinder::EndOfItem) { if(bf.boundaryReasons() & QTextBoundaryFinder::EndOfItem) {
v.push_back(Xapian::Query(QUrl::toPercentEncoding(userInput.mid(start, end - start)).toStdString())); v.emplace_back(QUrl::toPercentEncoding(userInput.mid(start, end - start)).toStdString());
} }
start = end; start = end;
} }
@ -233,7 +232,7 @@ int FileSearch::getResult(Xapian::MSet &result) {
FileContentSearch::FileContentSearch(DataQueue<SearchPluginIface::ResultInfo> *searchResult, size_t uniqueSymbol, QString keyword, bool fuzzy, int begin, int num) FileContentSearch::FileContentSearch(DataQueue<SearchPluginIface::ResultInfo> *searchResult, size_t uniqueSymbol, QString keyword, bool fuzzy, int begin, int num)
:m_search_result(searchResult), :m_search_result(searchResult),
m_uniqueSymbol(uniqueSymbol), m_uniqueSymbol(uniqueSymbol),
m_keyword(keyword), m_keyword(std::move(keyword)),
m_fuzzy(fuzzy), m_fuzzy(fuzzy),
m_begin(begin), m_begin(begin),
m_num(num) m_num(num)
@ -258,22 +257,22 @@ void FileContentSearch::run() {
//这里同文件搜索,待优化。 //这里同文件搜索,待优化。
m_begin = 0; m_begin = 0;
m_num = 100; m_num = 100;
int resultCount = 1; uint resultCount = 1;
int totalCount = 0; uint totalCount = 0;
while(resultCount > 0) { while(resultCount > 0) {
resultCount = keywordSearchContent(); resultCount = keywordSearchContent();
m_begin += m_num; m_begin += m_num;
totalCount += resultCount; totalCount += resultCount;
} }
qDebug() << "Total count:" << totalCount; qDebug() << "Total count:" << totalCount;
return;
} }
int FileContentSearch::keywordSearchContent() { uint FileContentSearch::keywordSearchContent() {
try { try {
qDebug() << "--keywordSearchContent search start--"; qDebug() << "--keywordSearchContent search start--";
Xapian::Database db(CONTENT_INDEX_PATH); Xapian::Database db(CONTENT_INDEX_PATH);
db.add_database(Xapian::Database(OCR_CONTENT_INDEX_PATH));
Xapian::Enquire enquire(db); Xapian::Enquire enquire(db);
Xapian::QueryParser qp; Xapian::QueryParser qp;
qp.set_default_op(Xapian::Query::OP_AND); qp.set_default_op(Xapian::Query::OP_AND);
@ -282,15 +281,15 @@ int FileContentSearch::keywordSearchContent() {
std::vector<KeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegment(m_keyword); std::vector<KeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegment(m_keyword);
//Creat a query //Creat a query
std::string words; std::string words;
for(size_t i = 0; i < sKeyWord.size(); i++) { for(auto & i : sKeyWord) {
words.append(sKeyWord.at(i).word).append(" "); words.append(i.word).append(" ");
} }
// Xapian::Query query = qp.parse_query(words); // Xapian::Query query = qp.parse_query(words);
std::vector<Xapian::Query> v; std::vector<Xapian::Query> v;
for(size_t i = 0; i < sKeyWord.size(); i++) { for(auto & i : sKeyWord) {
v.push_back(Xapian::Query(sKeyWord.at(i).word)); v.emplace_back(i.word);
// qDebug() << QString::fromStdString(sKeyWord.at(i).word); // qDebug() << QString::fromStdString(sKeyWord.at(i).word);
} }
Xapian::Query query; Xapian::Query query;
@ -304,9 +303,9 @@ int FileContentSearch::keywordSearchContent() {
enquire.set_query(query); enquire.set_query(query);
Xapian::MSet result = enquire.get_mset(m_begin, m_num, 0, m_matchDecider); Xapian::MSet result = enquire.get_mset(m_begin, m_num, nullptr, m_matchDecider);
int resultCount = result.size(); uint resultCount = result.size();
if(result.size() == 0) { if(result.empty()) {
return 0; return 0;
} }
qDebug() << "keywordSearchContent results count=" << resultCount; qDebug() << "keywordSearchContent results count=" << resultCount;
@ -339,7 +338,7 @@ int FileContentSearch::getResult(Xapian::MSet &result, std::string &keyWord) {
} }
// Construct snippets containing keyword. // Construct snippets containing keyword.
auto termIterator = doc.termlist_begin(); auto termIterator = doc.termlist_begin();
QStringList words = QString::fromStdString(keyWord).split(" ", QString::SkipEmptyParts); QStringList words = QString::fromStdString(keyWord).split(" ", Qt::SkipEmptyParts);
for(const QString& wordTobeFound : words) { for(const QString& wordTobeFound : words) {
std::string term = wordTobeFound.toStdString(); std::string term = wordTobeFound.toStdString();
@ -374,127 +373,12 @@ int FileContentSearch::getResult(Xapian::MSet &result, std::string &keyWord) {
return 0; return 0;
} }
OcrSearch::OcrSearch(DataQueue<SearchPluginIface::ResultInfo> *searchResult, size_t uniqueSymbol, QString keyword, int begin, int num) {
this->setAutoDelete(true);
m_search_result = searchResult;
m_uniqueSymbol = uniqueSymbol;
m_keyword = keyword;
m_begin = begin;
m_num = num;
m_matchDecider = new OcrMatchDecider();
}
OcrSearch::~OcrSearch() {
m_search_result = nullptr;
if(m_matchDecider)
delete m_matchDecider;
}
void OcrSearch::run() {
SearchManager::m_mutexOcr.lock();
if(!m_search_result->isEmpty()) {
m_search_result->clear();
}
SearchManager::m_mutexOcr.unlock();
//这里同文件搜索,待优化。
m_begin = 0;
m_num = 100;
int resultCount = 1;
int totalCount = 0;
while(resultCount > 0) {
resultCount = keywordSearchOcr();
m_begin += m_num;
totalCount += resultCount;
}
qDebug() << "Total count:" << totalCount;
return;
}
int OcrSearch::keywordSearchOcr() {
try {
qDebug() << "--keywordSearch OCR search start--";
Xapian::Database db(OCR_INDEX_PATH);
Xapian::Enquire enquire(db);
Xapian::QueryParser qp;
qp.set_default_op(Xapian::Query::OP_AND);
qp.set_database(db);
std::vector<KeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegment(m_keyword);
//Creat a query
std::string words;
for(size_t i = 0; i < sKeyWord.size(); i++) {
words.append(sKeyWord.at(i).word).append(" ");
}
std::vector<Xapian::Query> v;
for(size_t i = 0; i < sKeyWord.size(); i++) {
v.push_back(Xapian::Query(sKeyWord.at(i).word));
qDebug() << QString::fromStdString(sKeyWord.at(i).word);
}
Xapian::Query query = Xapian::Query(Xapian::Query::OP_AND, v.begin(), v.end());
qDebug() << "keywordSearch OCR:" << QString::fromStdString(query.get_description());
enquire.set_query(query);
Xapian::MSet result = enquire.get_mset(m_begin, m_num, 0, m_matchDecider);
int resultCount = result.size();
if(result.size() == 0) {
return 0;
}
qDebug() << "keywordSearch OCR results count=" << resultCount;
if(getResult(result, words) == -1) {
return -1;
}
qDebug() << "--keywordSearch OCR search finish--";
return resultCount;
} catch(const Xapian::Error &e) {
qWarning() << QString::fromStdString(e.get_description());
qDebug() << "--keywordSearch OCR search finish--";
return -1;
}
}
int OcrSearch::getResult(Xapian::MSet &result, std::string &keyWord) {
for(auto it = result.begin(); it != result.end(); ++it) {
Xapian::Document doc = it.get_document();
std::string data = doc.get_data();
QString path = QString::fromStdString(doc.get_value(1));
SearchPluginIface::ResultInfo ri;
if(!SearchManager::creatResultInfo(ri, path)) {
continue;
}
// Construct snippets containing keyword.
auto term = doc.termlist_begin();
std::string wordTobeFound = QString::fromStdString(keyWord).section(" ", 0, 0).toStdString();
term.skip_to(wordTobeFound);
//fix me: make a snippet without cut cjk char.
auto pos = term.positionlist_begin();
QString snippet = FileUtils::getSnippet(data, *pos, QString::fromStdString(keyWord).remove(" "));
ri.description.prepend(SearchPluginIface::DescriptionInfo{"", FileUtils::getHtmlText(snippet, QString::fromStdString(keyWord).remove(" "))});
QString().swap(snippet);
std::string().swap(data);
SearchManager::m_mutexOcr.lock();
if(m_uniqueSymbol == SearchManager::uniqueSymbolOcr) {
m_search_result->enqueue(ri);
SearchManager::m_mutexOcr.unlock();
} else {
SearchManager::m_mutexOcr.unlock();
return -1;
}
}
return 0;
}
DirectSearch::DirectSearch(QString keyword, DataQueue<SearchPluginIface::ResultInfo> *searchResult, QString value, size_t uniqueSymbol) { DirectSearch::DirectSearch(QString keyword, DataQueue<SearchPluginIface::ResultInfo> *searchResult, QString value, size_t uniqueSymbol) {
this->setAutoDelete(true); this->setAutoDelete(true);
m_keyword = keyword; m_keyword = std::move(keyword);
m_searchResult = searchResult; m_searchResult = searchResult;
m_uniqueSymbol = uniqueSymbol; m_uniqueSymbol = uniqueSymbol;
m_value = value; m_value = std::move(value);
} }
void DirectSearch::run() { void DirectSearch::run() {
@ -534,13 +418,13 @@ void DirectSearch::run() {
for (auto i : list) { for (auto i : list) {
if (i.isDir() && (!(i.isSymLink()))) { if (i.isDir() && (!(i.isSymLink()))) {
bool findIndex = false; bool findIndex = false;
for (QString j : blockList) { for (const QString& j : blockList) {
if (FileUtils::isOrUnder(i.absoluteFilePath(), j)) { if (FileUtils::isOrUnder(i.absoluteFilePath(), j)) {
findIndex = true; findIndex = true;
break; break;
} }
} }
if (findIndex == true) { if (findIndex) {
qDebug() << "path is blocked:" << i.absoluteFilePath(); qDebug() << "path is blocked:" << i.absoluteFilePath();
continue; continue;
} }
@ -587,12 +471,3 @@ bool FileContentMatchDecider::operator ()(const Xapian::Document &doc) const
} }
return true; return true;
} }
bool OcrMatchDecider::operator ()(const Xapian::Document &doc) const
{
QString path = QString::fromStdString(doc.get_value(1));
if(SearchManager::isBlocked(path)) {
return false;
}
return true;
}

View File

@ -63,7 +63,7 @@
#define INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/index_data").toStdString() #define INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/index_data").toStdString()
#define CONTENT_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/content_index_data").toStdString() #define CONTENT_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/content_index_data").toStdString()
#define OCR_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/ocr_index_data").toStdString() #define OCR_CONTENT_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/ocr_content_index_data").toStdString()
namespace UkuiSearch { namespace UkuiSearch {
class FileMatchDecider; class FileMatchDecider;
class FileContentMatchDecider; class FileContentMatchDecider;
@ -81,7 +81,7 @@ public:
explicit SearchManager(QObject *parent = nullptr); explicit SearchManager(QObject *parent = nullptr);
~SearchManager(); ~SearchManager();
static int getCurrentIndexCount(); static uint getCurrentIndexCount();
static size_t uniqueSymbolFile; static size_t uniqueSymbolFile;
static size_t uniqueSymbolDir; static size_t uniqueSymbolDir;
@ -94,7 +94,7 @@ public:
private: private:
static bool isBlocked(QString &path); static bool isBlocked(QString &path);
static bool creatResultInfo(UkuiSearch::SearchPluginIface::ResultInfo &ri, QString path); static bool creatResultInfo(UkuiSearch::SearchPluginIface::ResultInfo &ri, const QString& path);
}; };
class FileSearch : public QRunnable { class FileSearch : public QRunnable {
@ -104,7 +104,7 @@ public:
protected: protected:
void run(); void run();
private: private:
int keywordSearchfile(); uint keywordSearchFile();
Xapian::Query creatQueryForFileSearch(); Xapian::Query creatQueryForFileSearch();
int getResult(Xapian::MSet &result); int getResult(Xapian::MSet &result);
@ -125,7 +125,7 @@ public:
protected: protected:
void run(); void run();
private: private:
int keywordSearchContent(); uint keywordSearchContent();
int getResult(Xapian::MSet &result, std::string &keyWord); int getResult(Xapian::MSet &result, std::string &keyWord);
DataQueue<SearchPluginIface::ResultInfo> *m_search_result = nullptr; DataQueue<SearchPluginIface::ResultInfo> *m_search_result = nullptr;
@ -137,24 +137,6 @@ private:
int m_num = 20; int m_num = 20;
}; };
class OcrSearch : public QRunnable {
public:
explicit OcrSearch(DataQueue<SearchPluginIface::ResultInfo> *searchResult, size_t uniqueSymbol, QString keyword, int begin = 0, int num = 20);
~OcrSearch();
protected:
void run();
private:
int keywordSearchOcr();
int getResult(Xapian::MSet &result, std::string &keyWord);
DataQueue<SearchPluginIface::ResultInfo> *m_search_result = nullptr;
OcrMatchDecider *m_matchDecider;
size_t m_uniqueSymbol;
QString m_keyword;
int m_begin = 0;
int m_num = 20;
};
class DirectSearch : public QRunnable { class DirectSearch : public QRunnable {
public: public:
explicit DirectSearch(QString keyword, DataQueue<SearchPluginIface::ResultInfo> *searchResult, QString value, size_t uniqueSymbol); explicit DirectSearch(QString keyword, DataQueue<SearchPluginIface::ResultInfo> *searchResult, QString value, size_t uniqueSymbol);
@ -176,10 +158,6 @@ class FileContentMatchDecider : public Xapian::MatchDecider {
public: public:
bool operator ()(const Xapian::Document &doc) const; bool operator ()(const Xapian::Document &doc) const;
}; };
class OcrMatchDecider : public Xapian::MatchDecider {
public:
bool operator ()(const Xapian::Document &doc) const;
};
} }
#endif // SEARCHMANAGER_H #endif // SEARCHMANAGER_H

View File

@ -29,6 +29,7 @@ using namespace UkuiSearch;
static QMutex g_basicDatabaseMutex; static QMutex g_basicDatabaseMutex;
static QMutex g_contentDatabaseMutex; static QMutex g_contentDatabaseMutex;
static QMutex g_ocrContentDatabaseMutex;
#define DATABASE_TRY(code) try { \ #define DATABASE_TRY(code) try { \
code; \ code; \
@ -51,6 +52,9 @@ WritableDatabase::WritableDatabase(const DataBaseType &type)
m_path = CONTENT_INDEX_PATH; m_path = CONTENT_INDEX_PATH;
m_mutex = &g_contentDatabaseMutex; m_mutex = &g_contentDatabaseMutex;
break; break;
case DataBaseType::OcrContent:
m_path = OCR_CONTENT_INDEX_PATH;
m_mutex = &g_ocrContentDatabaseMutex;
default: default:
break; break;
} }
@ -106,6 +110,8 @@ void WritableDatabase::rebuild()
case DataBaseType::Content: case DataBaseType::Content:
IndexStatusRecorder::getInstance()->setVersion(CONTENT_DATABASE_VERSION_KEY, CONTENT_DATABASE_VERSION); IndexStatusRecorder::getInstance()->setVersion(CONTENT_DATABASE_VERSION_KEY, CONTENT_DATABASE_VERSION);
break; break;
case DataBaseType::OcrContent:
IndexStatusRecorder::getInstance()->setVersion(OCR_CONTENT_DATABASE_VERSION_KEY, OCR_CONTENT_DATABASE_VERSION);
default: default:
break; break;
} }
@ -190,6 +196,9 @@ void WritableDatabase::errorRecord()
case DataBaseType::Content: case DataBaseType::Content:
IndexStatusRecorder::getInstance()->setStatus(CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Error); IndexStatusRecorder::getInstance()->setStatus(CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Error);
break; break;
case DataBaseType::OcrContent:
IndexStatusRecorder::getInstance()->setStatus(OCR_CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Error);
break;
default: default:
break; break;
} }