feat(ukui-search-service):为OCR内容索引增加单独的数据库和单独控制开关

This commit is contained in:
iaom 2023-09-28 10:54:58 +08:00 committed by Baijunjie
parent e9ce06a471
commit d6e3cd46e9
14 changed files with 448 additions and 401 deletions

View File

@ -25,5 +25,10 @@
<summary>content fuzzy search</summary>
<description>Enable or disable fuzzy search for file content.</description>
</key>
<key name="content-index-enable-ocr" type="b">
<default>false</default>
<summary>content index enable ocr</summary>
<description>Enable or disable OCR in content index.</description>
</key>
</schema>
</schemalist>

View File

@ -13,6 +13,8 @@ static const int LABEL_MAX_WIDTH = 320;
static const QString HOME_PATH = QDir::homePath();
static const QString INDEX_PATH = HOME_PATH + QStringLiteral("/.config/org.ukui/ukui-search/index_data");
static const QString CONTENT_INDEX_PATH = HOME_PATH + QStringLiteral("/.config/org.ukui/ukui-search/content_index_data");
static const QString OCR_CONTENT_INDEX_PATH = HOME_PATH + QStringLiteral("/.config/org.ukui/ukui-search/ocr_content_index_data");
static const QString FILE_SEARCH_VALUE = QStringLiteral("0");
static const QString DIR_SEARCH_VALUE = QStringLiteral("1");
static const QString INDEX_SEM = QStringLiteral("ukui-search-index-sem");
@ -27,7 +29,7 @@ static const QString INDEX_DATABASE_VERSION = QStringLiteral("1.0.1");
* changelog 1.1.0 value
*/
static const QString CONTENT_DATABASE_VERSION = QStringLiteral("1.1.0");
static const QString OCR_CONTENT_DATABASE_VERSION = QStringLiteral("1.0.0");
static const QStringList allAppPath = {
@ -84,7 +86,8 @@ static const QMap<QString, bool> targetPhotographTypeMap = {
*/
enum class DataBaseType {
Basic = 0,
Content = 1
Content = 1,
OcrContent = 2
};
/**

View File

@ -32,11 +32,14 @@
#include "writable-database.h"
#include "compatible-define.h"
using namespace UkuiSearch;
BatchIndexer::BatchIndexer(const QStringList &folders, const QStringList &blackList, QAtomicInt& indexStop, QAtomicInt &contentIndexStop, WorkMode mode, Targets target)
BatchIndexer::BatchIndexer(const QStringList &folders, const QStringList &blackList,
QAtomicInt& indexStop, QAtomicInt &contentIndexStop, QAtomicInt &contentIndexOcrStop,
WorkMode mode, Targets target)
: m_folders(folders),
m_blackList(blackList),
m_indexStop(&indexStop),
m_contentIndexStop(&contentIndexStop),
m_contentIndexOcrStop(&contentIndexOcrStop),
m_mode(mode),
m_target(target)
{
@ -60,6 +63,10 @@ void BatchIndexer::run()
contentIndex();
Q_EMIT contentIndexDone(m_mode);
}
if(m_target & Target::Ocr) {
ocrIndex();
Q_EMIT ocrContentIndexDone(m_mode);
}
m_cache.clear();
malloc_trim(0);
qDebug() << "FirstRunIndexer: time :" << timer.elapsed() << "milliseconds";
@ -71,15 +78,15 @@ void BatchIndexer::fetch()
qDebug() << "Now begin fetching files to be indexed...";
qDebug() << "Index folders:" << m_folders << "blacklist :" << m_blackList;
QQueue<QString> bfs;
for(QString blockPath : m_blackList) {
for(QString path : m_folders) {
for(const QString& blockPath : m_blackList) {
for(const QString& path : m_folders) {
if(FileUtils::isOrUnder(path, blockPath)) {
m_folders.removeOne(path);
}
}
}
m_cache.append(m_folders);
for(QString path : m_folders) {
for(const QString &path : m_folders) {
bfs.enqueue(path);
}
QFileInfoList list;
@ -91,9 +98,9 @@ void BatchIndexer::fetch()
while(!bfs.empty()) {
dir.setPath(bfs.dequeue());
list = dir.entryInfoList();
for(auto i : list) {
for(const auto& i : list) {
bool isBlocked = false;
for(QString path : tmpList) {
for(const QString &path : tmpList) {
if(i.absoluteFilePath() == path) {
isBlocked = true;
tmpList.removeOne(path);
@ -144,7 +151,7 @@ void BatchIndexer::basicIndex()
}
if(!indexTimes.isEmpty()) {
qDebug() << indexTimes.size() << "documents need remove.";
for(std::string uniqueTerm : indexTimes.keys()) {
for(const std::string& uniqueTerm : indexTimes.keys()) {
basicDb.removeDocument(uniqueTerm);
}
basicDb.commit();
@ -163,7 +170,7 @@ void BatchIndexer::basicIndex()
++finishNum;
}
if(batchSize >= 8192) {
qDebug() << "8192 finished.";
qDebug() << finishNum << "of" << allSize <<"finished.";
basicDb.commit();
Q_EMIT progress(IndexType::Basic, allSize, finishNum);
//文件名索引很快
@ -194,16 +201,10 @@ void BatchIndexer::contentIndex()
qWarning() << "Content db open failed, fail to run content index!";
return;
}
QStringList filesNeedIndex;
QStringList filesNeedOCRIndex;
QMap<QString, bool> suffixMap = targetFileTypeMap;
QFileInfo info;
// ocr
// bool ocrEnable = FileIndexerConfig::getInstance()->isOCREnable();
if(FileIndexerConfig::getInstance()->isOCREnable()) {
qDebug() << "OCR enabled.";
suffixMap.INSERT(targetPhotographTypeMap);
}
if(m_mode == WorkMode::Rebuild) {
contentDb.rebuild();
if(!contentDb.open()) {
@ -211,9 +212,9 @@ void BatchIndexer::contentIndex()
}
}
if(m_mode == WorkMode::Rebuild || m_mode == WorkMode::Add) {
for(QString path : m_cache) {
for(const QString& path : m_cache) {
info.setFile(path);
if(true == suffixMap[info.suffix()] && info.isFile()) {
if(targetFileTypeMap[info.suffix()] && info.isFile()) {
if(!FileUtils::isEncrypedOrUnsupport(path, info.suffix())) {
filesNeedIndex.append(path);
}
@ -222,9 +223,9 @@ void BatchIndexer::contentIndex()
} else if(m_mode == WorkMode::Update) {
QMap<std::string, std::string> indexTimes = contentDb.getIndexTimes();
qDebug() << indexTimes.size() << "documents recorded";
for(QString path : m_cache) {
for(const QString& path : m_cache) {
info.setFile(path);
if(true == suffixMap[info.suffix()] && info.isFile()) {
if(targetFileTypeMap[info.suffix()] && info.isFile()) {
std::string uterm = FileUtils::makeDocUterm(path);
if(indexTimes.value(uterm) != info.lastModified().toString("yyyyMMddHHmmsszzz").toStdString()) {
if(!FileUtils::isEncrypedOrUnsupport(path, info.suffix())) {
@ -238,7 +239,7 @@ void BatchIndexer::contentIndex()
}
if(!indexTimes.isEmpty()) {
qDebug() << indexTimes.size() << "documents need remove";
for(std::string uniqueTerm : indexTimes.keys()) {
for(const std::string& uniqueTerm : indexTimes.keys()) {
contentDb.removeDocument(uniqueTerm);
}
contentDb.commit();
@ -251,19 +252,12 @@ void BatchIndexer::contentIndex()
uint batchSize = 0;
uint finishNum = 0;
for (QString path : filesNeedIndex) {
for (const QString& path : filesNeedIndex) {
if(m_contentIndexStop->LOAD) {
qDebug() << "Index stopped, interrupt content index.";
filesNeedIndex.clear();
filesNeedOCRIndex.clear();
return;
}
info.setFile(path);
if(true == targetPhotographTypeMap[info.suffix()]) {
filesNeedOCRIndex.append(path);
filesNeedIndex.removeOne(path);
continue;
}
fileContentIndexer indexer(path);
if(indexer.index()) {
contentDb.addDocument(indexer.document());
@ -274,24 +268,84 @@ void BatchIndexer::contentIndex()
}
if(batchSize >= 30) {
contentDb.commit();
qDebug() << "30 finished.";
qDebug() << finishNum << "of" << allSize <<"finished.";
Q_EMIT progress(IndexType::Contents, allSize, finishNum);
batchSize = 0;
}
}
contentDb.commit();
Q_EMIT progress(IndexType::Contents, allSize, finishNum);
filesNeedIndex.clear();
qDebug() << "Content index for normal files finished, now begin OCR index";
int ocrSize = filesNeedOCRIndex.size();
qDebug() << ocrSize << "pictures need OCR index.";
qDebug() << "Finish content index";
}
batchSize = 0;
int ocrFinishNum = 0;
for(QString path : filesNeedOCRIndex) {
if(m_contentIndexStop->LOAD) {
qDebug() << "Index stopped, interrupt content index.";
void BatchIndexer::ocrIndex()
{
qDebug() << "Begin ocr content index";
if(m_contentIndexOcrStop->LOAD) {
qDebug() << "Index stopped, abort ocr content index.";
return;
}
WritableDatabase contentDb(DataBaseType::OcrContent);
if(!contentDb.open()) {
qWarning() << "Content db open failed, fail to run ocr content index!";
return;
}
QStringList filesNeedOCRIndex;
QFileInfo info;
if(m_mode == WorkMode::Rebuild) {
contentDb.rebuild();
if(!contentDb.open()) {
return;
}
}
if(m_mode == WorkMode::Rebuild || m_mode == WorkMode::Add) {
for(const QString &path : m_cache) {
info.setFile(path);
if(targetPhotographTypeMap[info.suffix()] && info.isFile()) {
if(!FileUtils::isEncrypedOrUnsupport(path, info.suffix())) {
filesNeedOCRIndex.append(path);
}
}
}
} else {
QMap<std::string, std::string> indexTimes = contentDb.getIndexTimes();
qDebug() << indexTimes.size() << "documents recorded";
for(const QString& path : m_cache) {
info.setFile(path);
if(targetPhotographTypeMap[info.suffix()] && info.isFile()) {
std::string uterm = FileUtils::makeDocUterm(path);
if(indexTimes.value(uterm) != info.lastModified().toString("yyyyMMddHHmmsszzz").toStdString()) {
if(!FileUtils::isEncrypedOrUnsupport(path, info.suffix())) {
filesNeedOCRIndex.append(path);
indexTimes.remove(uterm);
}
} else {
indexTimes.remove(uterm);
}
}
}
if(!indexTimes.isEmpty()) {
qDebug() << indexTimes.size() << "documents need remove";
for(const std::string& uniqueTerm : indexTimes.keys()) {
contentDb.removeDocument(uniqueTerm);
}
contentDb.commit();
}
}
uint allSize = filesNeedOCRIndex.size();
qDebug() << allSize << "pictures need ocr content index.";
Q_EMIT progress(IndexType::OCR, allSize, 0);
uint batchSize = 0;
uint finishNum = 0;
for (const QString &path : filesNeedOCRIndex) {
if(m_contentIndexOcrStop->LOAD) {
qDebug() << "Index stopped, interrupt ocr content index.";
filesNeedOCRIndex.clear();
return;
}
@ -299,22 +353,20 @@ void BatchIndexer::contentIndex()
if(indexer.index()) {
contentDb.addDocument(indexer.document());
++batchSize;
++ocrFinishNum;
++finishNum;
} else {
// qDebug() << "Extract fail===" << path;
}
if(batchSize >= 30) {
if(batchSize >= 10) {
contentDb.commit();
qDebug() << "30 finished.";
Q_EMIT progress(IndexType::Contents, allSize, finishNum + ocrFinishNum);
Q_EMIT progress(IndexType::OCR, ocrSize, ocrFinishNum);
qDebug() << finishNum << "of" << allSize <<"finished.";
Q_EMIT progress(IndexType::OCR, allSize, finishNum);
batchSize = 0;
}
}
contentDb.commit();
Q_EMIT progress(IndexType::OCR, ocrSize, ocrFinishNum);
Q_EMIT progress(IndexType::Contents, allSize, finishNum + ocrFinishNum);
Q_EMIT progress(IndexType::OCR, allSize, finishNum);
filesNeedOCRIndex.clear();
qDebug() << "Finish OCR index.";
qDebug() << "Finish content index";
qDebug() << "Ocr content index finished,";
}

View File

@ -53,6 +53,7 @@ public:
None = 0,
Basic = 1u << 0,
Content = 1u << 1,
Ocr = 1u << 2,
All = Basic | Content
};
Q_DECLARE_FLAGS(Targets, Target)
@ -61,6 +62,7 @@ public:
const QStringList& blackList,
QAtomicInt& indexStop,
QAtomicInt& contentIndexStop,
QAtomicInt& contentIndexOcrStop,
WorkMode mode = WorkMode::Update,
Targets target = Target::All);
void run() override;
@ -69,17 +71,20 @@ Q_SIGNALS:
void progress(IndexType type, uint all, uint finished);
void basicIndexDone(WorkMode);
void contentIndexDone(WorkMode);
void ocrContentIndexDone(WorkMode);
void done(WorkMode, Targets);
private:
void fetch();
void basicIndex();
void contentIndex();
void ocrIndex();
QStringList m_folders;
QStringList m_blackList;
QAtomicInt *m_indexStop = nullptr;
QAtomicInt *m_contentIndexStop = nullptr;
QAtomicInt *m_contentIndexOcrStop = nullptr;
WorkMode m_mode;
Targets m_target;
QStringList m_cache;

View File

@ -23,11 +23,15 @@
#include <QDebug>
#include <QDir>
#define INDEX_SETTINGS QDir::homePath() + "/.config/org.ukui/ukui-search/ukui-search-service.conf"
static const QString CONFIG_VERSION = QStringLiteral("1.0");
/**
* changelog: 1.1 ocr开关
*/
static const QString CONFIG_VERSION = QStringLiteral("1.1");
static const QByteArray UKUI_SEARCH_SCHEMAS = QByteArrayLiteral("org.ukui.search.settings");
static const QString FILE_INDEX_ENABLE_KEY = QStringLiteral("fileIndexEnable");
static const QString CONTENT_INDEX_ENABLE_KEY = QStringLiteral("contentIndexEnable");
static const QString CONTENT_FUZZY_SEARCH_KEY = QStringLiteral("contentFuzzySearch");
static const QString CONTENT_INDEX_ENABLE_OCR_KEY = QStringLiteral("contentIndexEnableOcr");
static const QString OCR_ENABLE_KEY = QStringLiteral("ocrEnable");
static const QString META_DATA_INDEX_ENABLE_KEY = QStringLiteral("metaDataIndexEnable");
static const QString CONFIG_VERSION_KEY = QStringLiteral("version");
@ -65,6 +69,20 @@ FileIndexerConfig::FileIndexerConfig(QObject *parent)
if(m_gsettings->keys().contains(CONTENT_INDEX_ENABLE_KEY)) {
m_gsettings->set(CONTENT_INDEX_ENABLE_KEY, true);
}
if(m_gsettings->keys().contains(CONTENT_INDEX_ENABLE_OCR_KEY)) {
m_gsettings->set(CONTENT_INDEX_ENABLE_OCR_KEY, true);
}
}
m_gsettings->set(CONFIG_VERSION_KEY, CONFIG_VERSION);
} else if (oldVersion == "1.0") {
bool contentIndex = false;
if(m_gsettings->keys().contains(CONTENT_INDEX_ENABLE_KEY)) {
contentIndex = m_gsettings->get(CONTENT_INDEX_ENABLE_KEY).toBool();
}
if(contentIndex) {
if(m_gsettings->keys().contains(CONTENT_INDEX_ENABLE_OCR_KEY)) {
m_gsettings->set(CONTENT_INDEX_ENABLE_OCR_KEY, true);
}
}
m_gsettings->set(CONFIG_VERSION_KEY, CONFIG_VERSION);
}
@ -74,7 +92,8 @@ FileIndexerConfig::FileIndexerConfig(QObject *parent)
Q_EMIT this->fileIndexEnableStatusChanged(m_gsettings->get(FILE_INDEX_ENABLE_KEY).toBool());
} else if(key == CONTENT_INDEX_ENABLE_KEY) {
Q_EMIT this->contentIndexEnableStatusChanged(m_gsettings->get(CONTENT_INDEX_ENABLE_KEY).toBool());
} else if(key == CONTENT_INDEX_ENABLE_OCR_KEY) {
Q_EMIT this->contentIndexEnableOcrStatusChanged(m_gsettings->get(CONTENT_INDEX_ENABLE_OCR_KEY).toBool());
}
});
} else {
@ -146,7 +165,17 @@ bool FileIndexerConfig::isFuzzySearchEnable()
bool FileIndexerConfig::isOCREnable()
{
return m_settings->value(OCR_ENABLE_KEY, true).toBool();
if(m_gsettings) {
if(m_gsettings->keys().contains(CONTENT_INDEX_ENABLE_OCR_KEY)) {
return m_gsettings->get(CONTENT_INDEX_ENABLE_OCR_KEY).toBool();
} else {
qWarning() << "FileIndexerConfig: Can not find key:" << CONTENT_INDEX_ENABLE_OCR_KEY << "in" << UKUI_SEARCH_SCHEMAS;
return false;
}
} else {
qWarning() << "FileIndexerConfig:" << UKUI_SEARCH_SCHEMAS << " is not found!";
return false;
}
}
bool FileIndexerConfig::isMetaDataIndexEnable()

View File

@ -87,6 +87,11 @@ Q_SIGNALS:
*
*/
void contentIndexEnableStatusChanged(bool);
/**
* @brief contentIndexEnableOcrStatusChanged
* ocr
*/
void contentIndexEnableOcrStatusChanged(bool);
private:
explicit FileIndexerConfig(QObject *parent = nullptr);

View File

@ -23,12 +23,13 @@
using namespace UkuiSearch;
IndexScheduler::IndexScheduler(QObject *parent) :
QObject(parent),
m_statusRecorder(IndexStatusRecorder::getInstance()),
m_config(FileIndexerConfig::getInstance()),
m_state(Startup),
m_indexStop(0),
m_contentIndexStop(0)
QObject(parent),
m_statusRecorder(IndexStatusRecorder::getInstance()),
m_config(FileIndexerConfig::getInstance()),
m_state(Startup),
m_indexStop(0),
m_contentIndexStop(0),
m_ocrContentIndexStop(0)
{
qRegisterMetaType<IndexerState>("IndexerState");
qRegisterMetaType<BatchIndexer::WorkMode>("BatchIndexer::WorkMode");
@ -38,6 +39,7 @@ IndexScheduler::IndexScheduler(QObject *parent) :
connect(&m_fileWatcher, &FileWatcher::filesUpdate, this, &IndexScheduler::updateIndex);
connect(m_config, &FileIndexerConfig::fileIndexEnableStatusChanged, this, &IndexScheduler::fileIndexEnable);
connect(m_config, &FileIndexerConfig::contentIndexEnableStatusChanged, this, &IndexScheduler::contentIndexEnable);
connect(m_config, &FileIndexerConfig::contentIndexEnableOcrStatusChanged, this, &IndexScheduler::ocrContentIndexEnable);
connect(m_config, &FileIndexerConfig::appendIndexDir, this, &IndexScheduler::addNewPath);
connect(m_config, &FileIndexerConfig::removeIndexDir, this, &IndexScheduler::removeIndex);
@ -50,6 +52,9 @@ IndexScheduler::IndexScheduler(QObject *parent) :
}
if(m_config->isContentIndexEnable()) {
targets |= BatchIndexer::Target::Content;
if(m_config->isOCREnable()) {
targets |= BatchIndexer::Target::Ocr;
}
} else {
m_contentIndexStop.fetchAndStoreRelaxed(1);
}
@ -62,8 +67,7 @@ void IndexScheduler::addNewPath(const QString &folders, const QStringList &black
qDebug() << "Index Scheduler is being stopped, add operation will be executed when started up next time.";
return;
}
m_state = Running;
Q_EMIT stateChange(m_state);
BatchIndexer::Targets target = BatchIndexer::Target::None;
if(m_config->isFileIndexEnable()) {
target |= BatchIndexer::Target::Basic;
@ -73,6 +77,10 @@ void IndexScheduler::addNewPath(const QString &folders, const QStringList &black
target |= BatchIndexer::Target::Content;
m_statusRecorder->setStatus(CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Updating);
}
if(m_config->isOCREnable()) {
target |= BatchIndexer::Target::Ocr;
m_statusRecorder->setStatus(OCR_CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Updating);
}
BatchIndexer::WorkMode mode = BatchIndexer::WorkMode::Add;
startIndexJob(QStringList() << folders, blackList, mode, target);
if(BatchIndexer::Target::None != target) {
@ -101,7 +109,12 @@ void IndexScheduler::stop(BatchIndexer::Targets target)
m_statusRecorder->setStatus(CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Off);
qDebug() << "File content index has been stopped.";
}
if(m_indexStop.LOAD && m_contentIndexStop.LOAD) {
if(target & BatchIndexer::Target::Ocr) {
m_ocrContentIndexStop.fetchAndStoreRelaxed(1);
m_statusRecorder->setStatus(OCR_CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Off);
qDebug() << "File ocr content index has been stopped.";
}
if(m_indexStop.LOAD && m_contentIndexStop.LOAD && m_ocrContentIndexStop.LOAD) {
m_fileWatcher.removeWatch();
m_threadPool.clear();
m_threadPool.waitForDone(-1);
@ -119,24 +132,30 @@ IndexScheduler::IndexerState IndexScheduler::getIndexState()
void IndexScheduler::start(BatchIndexer::Targets target)
{
qDebug() << "Index scheduler start." << target;
BatchIndexer::Targets realTargets = BatchIndexer::Target::None;
//检查是否有任务未完成
BatchIndexer::Targets tmpTargets = BatchIndexer::Target::None;
if(target & BatchIndexer::Basic) {
if(m_indexFirstRunFinished && m_indexRebuildFinished) {
tmpTargets |= BatchIndexer::Target::Basic;
if(m_indexPendingWorkCount == 0) {
realTargets |= BatchIndexer::Target::Basic;
}
}
if(target & BatchIndexer::Content) {
if(m_contentIndexFirstRunFinished && m_contentIndexRebuildFinished) {
tmpTargets |= BatchIndexer::Target::Content;
if(m_contentIndexPendingWorkCount == 0) {
realTargets |= BatchIndexer::Target::Content;
}
}
if(tmpTargets == BatchIndexer::Target::None) {
qDebug() << "Index scheduler running, start operation ignored."
<< "FirstRun finished: " << m_indexFirstRunFinished
<< "Rebuild finished: " << m_indexRebuildFinished
<< "Content index firstRun finished: " << m_contentIndexFirstRunFinished
<< "Content index rebuild finished: " << m_contentIndexRebuildFinished;
if(target & BatchIndexer::Ocr) {
if(m_ocrContentIndexPendingWorkCount == 0) {
realTargets |= BatchIndexer::Target::Ocr;
}
}
if(realTargets == BatchIndexer::Target::None) {
qDebug() << "Index scheduler running, start operation ignored.\n"
<< "index pending work count: " << m_contentIndexPendingWorkCount << "\n"
<< "Content index pending work count: " << m_contentIndexPendingWorkCount << "\n"
<< "Ocr content index pending work count: " << m_ocrContentIndexPendingWorkCount << "\n";
return;
}
@ -147,25 +166,29 @@ void IndexScheduler::start(BatchIndexer::Targets target)
if(target & BatchIndexer::Content) {
m_contentIndexStop.fetchAndStoreRelaxed(0);
}
//将索引调度器状态设置为运行中
m_state = Running;
Q_EMIT stateChange(m_state);
if(target & BatchIndexer::Ocr) {
m_ocrContentIndexStop.fetchAndStoreRelaxed(0);
}
//检查是否有数据库需要重建并且执行重建
BatchIndexer::Targets rebuiltTarget = checkAndRebuild(tmpTargets);
BatchIndexer::Targets rebuiltTarget = checkAndRebuild(realTargets);
BatchIndexer::WorkMode mode = BatchIndexer::WorkMode::Update;
BatchIndexer::Targets startTarget = BatchIndexer::Target::None;
//如果数据库被执行过重建,那么跳过增量更新步骤。
if((tmpTargets & BatchIndexer::Target::Basic) && !(rebuiltTarget & BatchIndexer::Target::Basic)) {
if((realTargets & BatchIndexer::Target::Basic) && !(rebuiltTarget & BatchIndexer::Target::Basic)) {
startTarget |= BatchIndexer::Target::Basic;
m_statusRecorder->setStatus(INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Updating);
}
if((tmpTargets & BatchIndexer::Target::Content) && !(rebuiltTarget & BatchIndexer::Target::Content)) {
if((realTargets & BatchIndexer::Target::Content) && !(rebuiltTarget & BatchIndexer::Target::Content)) {
startTarget |= BatchIndexer::Target::Content;
m_statusRecorder->setStatus(CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Updating);
}
if(realTargets & BatchIndexer::Ocr && !(rebuiltTarget & BatchIndexer::Target::Ocr)) {
startTarget |= BatchIndexer::Target::Ocr;
m_statusRecorder->setStatus(OCR_CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Updating);
}
startIndexJob(m_config->currentIndexableDir(), m_config->currentBlackListOfIndex(), mode, startTarget);
//启动监听
@ -183,7 +206,6 @@ BatchIndexer::Targets IndexScheduler::checkAndRebuild(BatchIndexer::Targets targ
rebuildTarget |= BatchIndexer::Target::Basic;
m_statusRecorder->setStatus(INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Initializing);
}
if((target & BatchIndexer::Target::Content) && m_config->isContentIndexEnable() &&
(m_statusRecorder->getStatus(CONTENT_INDEX_DATABASE_STATE_KEY).toInt() == IndexStatusRecorder::State::Error
|| !m_statusRecorder->versionCheck(CONTENT_DATABASE_VERSION_KEY, CONTENT_DATABASE_VERSION))) {
@ -191,6 +213,13 @@ BatchIndexer::Targets IndexScheduler::checkAndRebuild(BatchIndexer::Targets targ
rebuildTarget |= BatchIndexer::Target::Content;
m_statusRecorder->setStatus(CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Initializing);
}
if((target & BatchIndexer::Target::Ocr) && m_config->isOCREnable() &&
(m_statusRecorder->getStatus(OCR_CONTENT_INDEX_DATABASE_STATE_KEY).toInt() == IndexStatusRecorder::State::Error
|| !m_statusRecorder->versionCheck(OCR_CONTENT_DATABASE_VERSION_KEY, OCR_CONTENT_DATABASE_VERSION))) {
qDebug() << "Ocr content database need rebuild";
rebuildTarget |= BatchIndexer::Target::Ocr;
m_statusRecorder->setStatus(OCR_CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Initializing);
}
startIndexJob(m_config->currentIndexableDir(), m_config->currentBlackListOfIndex(), mode, rebuildTarget);
return rebuildTarget;
}
@ -198,34 +227,28 @@ BatchIndexer::Targets IndexScheduler::checkAndRebuild(BatchIndexer::Targets targ
void IndexScheduler::startIndexJob(const QStringList& folders,const QStringList& blackList, BatchIndexer::WorkMode mode, BatchIndexer::Targets target)
{
if(BatchIndexer::Target::None != target) {
switch (mode) {
case BatchIndexer::WorkMode::Add:
m_addNewPathFinished = false;
break;
case BatchIndexer::WorkMode::Rebuild:
if(target & BatchIndexer::Basic) {
m_indexRebuildFinished = false;
}
if(target & BatchIndexer::Content) {
m_contentIndexRebuildFinished = false;
}
break;
case BatchIndexer::WorkMode::Update:
if(target & BatchIndexer::Basic) {
m_indexFirstRunFinished = false;
}
if(target & BatchIndexer::Content) {
m_contentIndexFirstRunFinished = false;
}
break;
default:
break;
if(mode == BatchIndexer::WorkMode::Add) {
m_addNewPathPendingWorkCount++;
}
BatchIndexer *indexer = new BatchIndexer(folders, blackList, m_indexStop, m_contentIndexStop, mode, target);
connect(indexer, &BatchIndexer::done, this, &IndexScheduler::firstRunFinished, Qt::QueuedConnection);
if(target & BatchIndexer::Basic) {
m_indexPendingWorkCount++;
}
if(target & BatchIndexer::Content) {
m_contentIndexPendingWorkCount++;
}
if(target & BatchIndexer::Ocr) {
m_ocrContentIndexPendingWorkCount++;
}
m_state = Running;
Q_EMIT stateChange(m_state);
BatchIndexer *indexer = new BatchIndexer(folders, blackList, m_indexStop, m_contentIndexStop, m_ocrContentIndexStop, mode, target);
connect(indexer, &BatchIndexer::done, this, &IndexScheduler::batchIndexerFinished, Qt::QueuedConnection);
connect(indexer, &BatchIndexer::progress, this, &IndexScheduler::process, Qt::QueuedConnection);
connect(indexer, &BatchIndexer::basicIndexDone, this, &IndexScheduler::onBasicIndexDone, Qt::QueuedConnection);
connect(indexer, &BatchIndexer::contentIndexDone, this, &IndexScheduler::onContentIndexDone, Qt::QueuedConnection);
connect(indexer, &BatchIndexer::ocrContentIndexDone, this, &IndexScheduler::onOcrContentIndexDone, Qt::QueuedConnection);
m_threadPool.start(indexer);
}
}
@ -248,18 +271,33 @@ void IndexScheduler::contentIndexEnable(bool enable)
}
}
void IndexScheduler::ocrContentIndexEnable(bool enable)
{
if(enable) {
start(BatchIndexer::Ocr);
} else {
stop(BatchIndexer::Ocr);
}
}
void IndexScheduler::updateIndex(const QVector<PendingFile> &files)
{
qDebug() << "updateIndex=====";
m_updateFinished = false;
m_updatePendingWorkCount++;
m_state = Running;
IndexUpdater *updateJob = new IndexUpdater(files, m_indexStop, m_contentIndexStop);
Q_EMIT stateChange(m_state);
IndexUpdater *updateJob = new IndexUpdater(files, m_indexStop, m_contentIndexStop, m_ocrContentIndexStop);
connect(updateJob, &IndexUpdater::done, this, &IndexScheduler::updateFinished, Qt::QueuedConnection);
m_threadPool.start(updateJob);
}
void IndexScheduler::firstRunFinished()
void IndexScheduler::batchIndexerFinished(BatchIndexer::WorkMode mode, BatchIndexer::Targets targets)
{
if(mode == BatchIndexer::WorkMode::Add) {
m_addNewPathPendingWorkCount--;
}
if(isIdle()) {
m_state = Idle;
Q_EMIT stateChange(m_state);
@ -268,7 +306,7 @@ void IndexScheduler::firstRunFinished()
void IndexScheduler::updateFinished()
{
m_updateFinished = true;
m_updatePendingWorkCount--;
if(isIdle()) {
m_state = Idle;
Q_EMIT stateChange(m_state);
@ -277,30 +315,20 @@ void IndexScheduler::updateFinished()
bool IndexScheduler::isIdle()
{
return m_indexFirstRunFinished && m_contentIndexFirstRunFinished
&& m_addNewPathFinished
&& m_updateFinished
&& m_indexRebuildFinished && m_contentIndexRebuildFinished;
return m_indexPendingWorkCount == 0
&& m_contentIndexPendingWorkCount == 0
&& m_ocrContentIndexPendingWorkCount == 0
&& m_updatePendingWorkCount == 0
&& m_addNewPathPendingWorkCount == 0;
}
void IndexScheduler::onBasicIndexDone(BatchIndexer::WorkMode mode)
{
switch (mode) {
case BatchIndexer::WorkMode::Add:
m_addNewPathFinished = true;
break;
case BatchIndexer::WorkMode::Rebuild:
m_indexRebuildFinished = true;
break;
case BatchIndexer::WorkMode::Update:
m_indexFirstRunFinished = true;
break;
default:
break;
}
Q_UNUSED(mode)
m_indexPendingWorkCount--;
bool success = false;
if(!(m_statusRecorder->getStatus(INDEX_DATABASE_STATE_KEY).toInt() == IndexStatusRecorder::State::Error)) {
if(m_statusRecorder->getStatus(INDEX_DATABASE_STATE_KEY).toInt() != IndexStatusRecorder::State::Error) {
m_statusRecorder->setStatus(INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Ready);
success = true;
}
@ -309,23 +337,26 @@ void IndexScheduler::onBasicIndexDone(BatchIndexer::WorkMode mode)
void IndexScheduler::onContentIndexDone(BatchIndexer::WorkMode mode)
{
switch (mode) {
case BatchIndexer::WorkMode::Add:
m_addNewPathFinished = true;
break;
case BatchIndexer::WorkMode::Rebuild:
m_contentIndexRebuildFinished = true;
break;
case BatchIndexer::WorkMode::Update:
m_contentIndexFirstRunFinished = true;
break;
default:
break;
}
Q_UNUSED(mode)
m_contentIndexPendingWorkCount--;
bool success = false;
if(!(m_statusRecorder->getStatus(CONTENT_INDEX_DATABASE_STATE_KEY).toInt() == IndexStatusRecorder::State::Error)) {
if(m_statusRecorder->getStatus(CONTENT_INDEX_DATABASE_STATE_KEY).toInt() != IndexStatusRecorder::State::Error) {
m_statusRecorder->setStatus(CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Ready);
success = true;
}
Q_EMIT contentIndexDone(success);
}
void IndexScheduler::onOcrContentIndexDone(BatchIndexer::WorkMode mode)
{
Q_UNUSED(mode)
m_ocrContentIndexPendingWorkCount--;
bool success = false;
if(m_statusRecorder->getStatus(OCR_CONTENT_INDEX_DATABASE_STATE_KEY).toInt() != IndexStatusRecorder::State::Error) {
m_statusRecorder->setStatus(OCR_CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Ready);
success = true;
}
Q_EMIT contentIndexDone(success);
}

View File

@ -43,17 +43,6 @@ public:
Q_ENUM(IndexerState)
explicit IndexScheduler(QObject *parent = nullptr);
/**
* @brief addNewPath
* @param folders
* @param blackList
*/
Q_SCRIPTABLE void addNewPath(const QString &folders, const QStringList& blackList = QStringList());
/**
* @brief removeIndex
* @param folders
*/
Q_SCRIPTABLE void removeIndex(const QString& folders);
Q_SCRIPTABLE IndexerState getIndexState();
@ -65,16 +54,29 @@ Q_SIGNALS:
void done();
private Q_SLOTS:
/**
* @brief addNewPath
* @param folders
* @param blackList
*/
Q_SCRIPTABLE void addNewPath(const QString &folders, const QStringList& blackList = QStringList());
/**
* @brief removeIndex
* @param folders
*/
Q_SCRIPTABLE void removeIndex(const QString& folders);
void start(BatchIndexer::Targets target);
void stop(BatchIndexer::Targets target);
void fileIndexEnable(bool enable);
void contentIndexEnable(bool enable);
void ocrContentIndexEnable(bool enable);
void updateIndex(const QVector<PendingFile>& files);
void firstRunFinished();
void batchIndexerFinished(BatchIndexer::WorkMode mode, BatchIndexer::Targets targets);
void updateFinished();
bool isIdle();
void onBasicIndexDone(BatchIndexer::WorkMode mode);
void onContentIndexDone(BatchIndexer::WorkMode mode);
void onOcrContentIndexDone(BatchIndexer::WorkMode mode);
private:
/**
@ -90,16 +92,15 @@ private:
IndexerState m_state;
QAtomicInt m_indexStop;
QAtomicInt m_contentIndexStop;
QAtomicInt m_ocrContentIndexStop;
QThreadPool m_threadPool;
bool m_indexFirstRunFinished = true;
bool m_contentIndexFirstRunFinished = true;
quint64 m_indexPendingWorkCount = 0;
quint64 m_contentIndexPendingWorkCount = 0;
quint64 m_ocrContentIndexPendingWorkCount= 0;
bool m_indexRebuildFinished = true;
bool m_contentIndexRebuildFinished = true;
bool m_updateFinished = true;
bool m_addNewPathFinished = true;
quint64 m_updatePendingWorkCount = 0;
quint64 m_addNewPathPendingWorkCount = 0;
};
}
#endif // INDEXSCHEDULER_H

View File

@ -25,10 +25,12 @@
#include <QDir>
#include <QMutex>
#define CONTENT_INDEX_DATABASE_STATE_KEY "content_index_database_state"
#define OCR_CONTENT_INDEX_DATABASE_STATE_KEY "ocr_content_index_database_state"
#define INDEX_DATABASE_STATE_KEY "index_database_state"
#define INDEX_STATUS QDir::homePath() + "/.config/org.ukui/ukui-search/ukui-search-index-status.conf"
#define INDEX_DATABASE_VERSION_KEY "index_database_version"
#define CONTENT_DATABASE_VERSION_KEY "content_database_version"
#define OCR_CONTENT_DATABASE_VERSION_KEY "ocr_content_database_version"
namespace UkuiSearch {
//fixme: we need a better way to record index status.
class IndexStatusRecorder : public QObject

View File

@ -27,10 +27,11 @@
#include "file-utils.h"
#include "compatible-define.h"
using namespace UkuiSearch;
IndexUpdater::IndexUpdater(const QVector<PendingFile>& files, QAtomicInt& indexstop, QAtomicInt& contentIndexstop)
IndexUpdater::IndexUpdater(const QVector<PendingFile>& files, QAtomicInt& indexstop, QAtomicInt& contentIndexstop, QAtomicInt& contentIndexOcrStop)
: m_cache(files),
m_indexStop(&indexstop),
m_contentIndexStop(&contentIndexstop)
m_contentIndexStop(&contentIndexstop),
m_contentIndexOcrStop(&contentIndexOcrStop)
{
}
void IndexUpdater::updateIndex()
@ -44,7 +45,7 @@ void IndexUpdater::updateIndex()
return;
}
qDebug() << "===update basic index===";
for(PendingFile file : m_cache) {
for(const PendingFile& file : m_cache) {
if(file.shouldRemoveIndex()) {
qDebug() << "| remove:" <<file.path();
basicDb.removeDocument(file.path());
@ -63,6 +64,21 @@ void IndexUpdater::updateIndex()
basicDb.commit();
qDebug() << "===finish update basic index===";
}
}
void IndexUpdater::run()
{
updateIndex();
updateContentIndex();
updateOcrContentIndex();
m_cache.clear();
m_cache.shrink_to_fit();
malloc_trim(0);
Q_EMIT done();
}
void IndexUpdater::updateContentIndex()
{
if(FileIndexerConfig::getInstance()->isContentIndexEnable() && !m_contentIndexStop->LOAD) {
WritableDatabase contentDb(DataBaseType::Content);
if(!contentDb.open()) {
@ -70,23 +86,22 @@ void IndexUpdater::updateIndex()
return;
}
QMap<QString, bool> suffixMap = targetFileTypeMap;
//ocr
if(FileIndexerConfig::getInstance()->isOCREnable()) {
suffixMap.INSERT(targetPhotographTypeMap);
}
qDebug() << "===update content index===";
int size = 0;
for(PendingFile file : m_cache) {
if(m_contentIndexStop->LOAD) {
qDebug() << "Content index update interrupted";
return;
}
QString suffix = QFileInfo(file.path()).suffix();
if(file.shouldRemoveIndex()) {
qDebug() << "| remove:" <<file.path();
if(file.isDir()) {
contentDb.removeChildrenDocument(file.path());
} else if(true == suffixMap[suffix]) {
} else if(targetFileTypeMap[suffix]) {
contentDb.removeDocument(file.path());
}
} else if(true == suffixMap[suffix] && !file.isDir()) {
} else if(targetFileTypeMap[suffix] && !file.isDir()) {
if(FileUtils::isEncrypedOrUnsupport(file.path(), suffix)) {
if(file.isModified() || file.isMoveTo()) {
contentDb.removeDocument(file.path());
@ -99,7 +114,6 @@ void IndexUpdater::updateIndex()
contentDb.addDocument(indexer.document());
++size;
} else if(file.isModified() || file.isMoveTo()){
contentDb.removeDocument(file.path());
}
}
@ -108,24 +122,59 @@ void IndexUpdater::updateIndex()
qDebug() << "30 finished.";
size = 0;
}
if(m_contentIndexStop->LOAD) {
qDebug() << "Index stopped, content index update interrupted";
m_cache.clear();
m_cache.shrink_to_fit();
malloc_trim(0);
return;
}
}
contentDb.commit();
qDebug() << "===finish update content index===";
}
m_cache.clear();
m_cache.shrink_to_fit();
malloc_trim(0);
Q_EMIT done();
}
void IndexUpdater::run()
void IndexUpdater::updateOcrContentIndex()
{
updateIndex();
if(FileIndexerConfig::getInstance()->isOCREnable() && !m_contentIndexOcrStop->LOAD) {
WritableDatabase contentDb(DataBaseType::OcrContent);
if(!contentDb.open()) {
qWarning() << "Ocr content db open failed, fail to update index";
return;
}
qDebug() << "===update ocr content index===";
int size = 0;
for(PendingFile file : m_cache) {
if(m_contentIndexOcrStop->LOAD) {
qDebug() << "Ocr content index update interrupted";
return;
}
QString suffix = QFileInfo(file.path()).suffix();
if(file.shouldRemoveIndex()) {
qDebug() << "| remove:" <<file.path();
if(file.isDir()) {
contentDb.removeChildrenDocument(file.path());
} else if(targetPhotographTypeMap[suffix]) {
contentDb.removeDocument(file.path());
}
} else if(targetPhotographTypeMap[suffix] && !file.isDir()) {
if(FileUtils::isEncrypedOrUnsupport(file.path(), suffix)) {
if(file.isModified() || file.isMoveTo()) {
contentDb.removeDocument(file.path());
}
continue;
}
qDebug() << "| index:" <<file.path();
fileContentIndexer indexer(file.path());
if(indexer.index()) {
contentDb.addDocument(indexer.document());
++size;
} else if(file.isModified() || file.isMoveTo()){
contentDb.removeDocument(file.path());
}
}
if(size >= 10) {
contentDb.commit();
qDebug() << "10 finished.";
size = 0;
}
}
contentDb.commit();
qDebug() << "===finish update ocr content index===";
}
}

View File

@ -31,7 +31,7 @@ class IndexUpdater : public QObject, public QRunnable
{
Q_OBJECT
public:
explicit IndexUpdater(const QVector<PendingFile>& files, QAtomicInt& indexstop, QAtomicInt& contentIndexstop);
explicit IndexUpdater(const QVector<PendingFile>& files, QAtomicInt& indexstop, QAtomicInt& contentIndexstop, QAtomicInt& contentIndexOcrStop);
void run() override;
Q_SIGNALS:
@ -39,10 +39,13 @@ Q_SIGNALS:
private:
void updateIndex();
void updateContentIndex();
void updateOcrContentIndex();
QVector<PendingFile> m_cache;
QAtomicInt *m_contentIndexStop = nullptr;
QAtomicInt *m_indexStop = nullptr;
QAtomicInt *m_contentIndexOcrStop = nullptr;
};
}
#endif // INDEXUPDATER_H

View File

@ -18,17 +18,17 @@
*
*/
#include "search-manager.h"
#include <utility>
#include "dir-watcher.h"
using namespace UkuiSearch;
size_t SearchManager::uniqueSymbolFile = 0;
size_t SearchManager::uniqueSymbolDir = 0;
size_t SearchManager::uniqueSymbolContent = 0;
size_t SearchManager::uniqueSymbolOcr = 0;
QMutex SearchManager::m_mutexFile;
QMutex SearchManager::m_mutexDir;
QMutex SearchManager::m_mutexContent;
QMutex SearchManager::m_mutexOcr;
SearchManager::SearchManager(QObject *parent) : QObject(parent) {
}
@ -36,7 +36,7 @@ SearchManager::SearchManager(QObject *parent) : QObject(parent) {
SearchManager::~SearchManager() {
}
int SearchManager::getCurrentIndexCount() {
uint SearchManager::getCurrentIndexCount() {
try {
Xapian::Database db(INDEX_PATH);
return db.get_doccount();
@ -48,7 +48,7 @@ int SearchManager::getCurrentIndexCount() {
bool SearchManager::isBlocked(QString &path) {
QStringList blockList = DirWatcher::getDirWatcher()->getBlockDirsOfUser();
for(QString i : blockList) {
for(const QString& i : blockList) {
if(FileUtils::isOrUnder(path, i))
return true;
}
@ -56,7 +56,7 @@ bool SearchManager::isBlocked(QString &path) {
}
bool SearchManager::creatResultInfo(SearchPluginIface::ResultInfo &ri, QString path)
bool SearchManager::creatResultInfo(SearchPluginIface::ResultInfo &ri, const QString& path)
{
QFileInfo info(path);
if(!info.exists()) {
@ -68,7 +68,7 @@ bool SearchManager::creatResultInfo(SearchPluginIface::ResultInfo &ri, QString p
<< SearchPluginIface::DescriptionInfo{tr("Path:"), path} \
<< SearchPluginIface::DescriptionInfo{tr("Modified time:"), info.lastModified().toString("yyyy/MM/dd hh:mm:ss")};
ri.actionKey = path;
if (true == targetPhotographTypeMap[info.suffix()]) {
if (targetPhotographTypeMap[info.suffix()]) {
ri.type = 1;//1为ocr图片文件
} else {
ri.type = 0;//0为默认文本文件
@ -80,8 +80,8 @@ FileSearch::FileSearch(DataQueue<SearchPluginIface::ResultInfo> *searchResult, s
this->setAutoDelete(true);
m_search_result = searchResult;
m_uniqueSymbol = uniqueSymbol;
m_keyword = keyword;
m_value = value;
m_keyword = std::move(keyword);
m_value = std::move(value);
m_slot = slot;
m_begin = begin;
m_num = num;
@ -113,20 +113,19 @@ void FileSearch::run() {
//可能会有更好的方法,待优化。
m_begin = 0;
m_num = 100;
int resultCount = 1;
int totalCount = 0;
uint resultCount = 1;
uint totalCount = 0;
while(resultCount > 0) {
resultCount = keywordSearchfile();
resultCount = keywordSearchFile();
m_begin += m_num;
totalCount += resultCount;
}
qDebug() << "Total count:" << m_value << totalCount;
return;
}
int FileSearch::keywordSearchfile() {
uint FileSearch::keywordSearchFile() {
try {
qDebug() << "--keywordSearchfile start--";
qDebug() << "--keywordSearchFile start--";
Xapian::Database db(INDEX_PATH);
Xapian::Query query = creatQueryForFileSearch();
Xapian::Enquire enquire(db);
@ -140,24 +139,24 @@ int FileSearch::keywordSearchfile() {
queryFile = query;
}
qDebug() << "keywordSearchfile:" << QString::fromStdString(queryFile.get_description());
qDebug() << "keywordSearchFile:" << QString::fromStdString(queryFile.get_description());
enquire.set_query(queryFile);
enquire.set_docid_order(enquire.DONT_CARE);
enquire.set_docid_order(Xapian::Enquire::DONT_CARE);
enquire.set_sort_by_relevance_then_value(2, true);
Xapian::MSet result = enquire.get_mset(m_begin, m_num, 0, m_matchDecider);
int resultCount = result.size();
qDebug() << "keywordSearchfile results count=" << resultCount;
Xapian::MSet result = enquire.get_mset(m_begin, m_num, nullptr, m_matchDecider);
uint resultCount = result.size();
qDebug() << "keywordSearchFile results count=" << resultCount;
if(resultCount == 0)
return 0;
if(getResult(result) == -1)
return -1;
qDebug() << "--keywordSearchfile finish--";
qDebug() << "--keywordSearchFile finish--";
return resultCount;
} catch(const Xapian::Error &e) {
qWarning() << QString::fromStdString(e.get_description());
qDebug() << "--keywordSearchfile finish--";
qDebug() << "--keywordSearchFile finish--";
return -1;
}
}
@ -170,7 +169,7 @@ Xapian::Query FileSearch::creatQueryForFileSearch() {
for(; bf.position() != -1; bf.toNextBoundary()) {
int end = bf.position();
if(bf.boundaryReasons() & QTextBoundaryFinder::EndOfItem) {
v.push_back(Xapian::Query(QUrl::toPercentEncoding(userInput.mid(start, end - start)).toStdString()));
v.emplace_back(QUrl::toPercentEncoding(userInput.mid(start, end - start)).toStdString());
}
start = end;
}
@ -233,7 +232,7 @@ int FileSearch::getResult(Xapian::MSet &result) {
FileContentSearch::FileContentSearch(DataQueue<SearchPluginIface::ResultInfo> *searchResult, size_t uniqueSymbol, QString keyword, bool fuzzy, int begin, int num)
:m_search_result(searchResult),
m_uniqueSymbol(uniqueSymbol),
m_keyword(keyword),
m_keyword(std::move(keyword)),
m_fuzzy(fuzzy),
m_begin(begin),
m_num(num)
@ -258,22 +257,22 @@ void FileContentSearch::run() {
//这里同文件搜索,待优化。
m_begin = 0;
m_num = 100;
int resultCount = 1;
int totalCount = 0;
uint resultCount = 1;
uint totalCount = 0;
while(resultCount > 0) {
resultCount = keywordSearchContent();
m_begin += m_num;
totalCount += resultCount;
}
qDebug() << "Total count:" << totalCount;
return;
}
int FileContentSearch::keywordSearchContent() {
uint FileContentSearch::keywordSearchContent() {
try {
qDebug() << "--keywordSearchContent search start--";
Xapian::Database db(CONTENT_INDEX_PATH);
db.add_database(Xapian::Database(OCR_CONTENT_INDEX_PATH));
Xapian::Enquire enquire(db);
Xapian::QueryParser qp;
qp.set_default_op(Xapian::Query::OP_AND);
@ -282,15 +281,15 @@ int FileContentSearch::keywordSearchContent() {
std::vector<KeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegment(m_keyword);
//Creat a query
std::string words;
for(size_t i = 0; i < sKeyWord.size(); i++) {
words.append(sKeyWord.at(i).word).append(" ");
for(auto & i : sKeyWord) {
words.append(i.word).append(" ");
}
// Xapian::Query query = qp.parse_query(words);
std::vector<Xapian::Query> v;
for(size_t i = 0; i < sKeyWord.size(); i++) {
v.push_back(Xapian::Query(sKeyWord.at(i).word));
for(auto & i : sKeyWord) {
v.emplace_back(i.word);
// qDebug() << QString::fromStdString(sKeyWord.at(i).word);
}
Xapian::Query query;
@ -304,9 +303,9 @@ int FileContentSearch::keywordSearchContent() {
enquire.set_query(query);
Xapian::MSet result = enquire.get_mset(m_begin, m_num, 0, m_matchDecider);
int resultCount = result.size();
if(result.size() == 0) {
Xapian::MSet result = enquire.get_mset(m_begin, m_num, nullptr, m_matchDecider);
uint resultCount = result.size();
if(result.empty()) {
return 0;
}
qDebug() << "keywordSearchContent results count=" << resultCount;
@ -339,7 +338,7 @@ int FileContentSearch::getResult(Xapian::MSet &result, std::string &keyWord) {
}
// Construct snippets containing keyword.
auto termIterator = doc.termlist_begin();
QStringList words = QString::fromStdString(keyWord).split(" ", QString::SkipEmptyParts);
QStringList words = QString::fromStdString(keyWord).split(" ", Qt::SkipEmptyParts);
for(const QString& wordTobeFound : words) {
std::string term = wordTobeFound.toStdString();
@ -374,127 +373,12 @@ int FileContentSearch::getResult(Xapian::MSet &result, std::string &keyWord) {
return 0;
}
OcrSearch::OcrSearch(DataQueue<SearchPluginIface::ResultInfo> *searchResult, size_t uniqueSymbol, QString keyword, int begin, int num) {
this->setAutoDelete(true);
m_search_result = searchResult;
m_uniqueSymbol = uniqueSymbol;
m_keyword = keyword;
m_begin = begin;
m_num = num;
m_matchDecider = new OcrMatchDecider();
}
OcrSearch::~OcrSearch() {
m_search_result = nullptr;
if(m_matchDecider)
delete m_matchDecider;
}
void OcrSearch::run() {
SearchManager::m_mutexOcr.lock();
if(!m_search_result->isEmpty()) {
m_search_result->clear();
}
SearchManager::m_mutexOcr.unlock();
//这里同文件搜索,待优化。
m_begin = 0;
m_num = 100;
int resultCount = 1;
int totalCount = 0;
while(resultCount > 0) {
resultCount = keywordSearchOcr();
m_begin += m_num;
totalCount += resultCount;
}
qDebug() << "Total count:" << totalCount;
return;
}
int OcrSearch::keywordSearchOcr() {
try {
qDebug() << "--keywordSearch OCR search start--";
Xapian::Database db(OCR_INDEX_PATH);
Xapian::Enquire enquire(db);
Xapian::QueryParser qp;
qp.set_default_op(Xapian::Query::OP_AND);
qp.set_database(db);
std::vector<KeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegment(m_keyword);
//Creat a query
std::string words;
for(size_t i = 0; i < sKeyWord.size(); i++) {
words.append(sKeyWord.at(i).word).append(" ");
}
std::vector<Xapian::Query> v;
for(size_t i = 0; i < sKeyWord.size(); i++) {
v.push_back(Xapian::Query(sKeyWord.at(i).word));
qDebug() << QString::fromStdString(sKeyWord.at(i).word);
}
Xapian::Query query = Xapian::Query(Xapian::Query::OP_AND, v.begin(), v.end());
qDebug() << "keywordSearch OCR:" << QString::fromStdString(query.get_description());
enquire.set_query(query);
Xapian::MSet result = enquire.get_mset(m_begin, m_num, 0, m_matchDecider);
int resultCount = result.size();
if(result.size() == 0) {
return 0;
}
qDebug() << "keywordSearch OCR results count=" << resultCount;
if(getResult(result, words) == -1) {
return -1;
}
qDebug() << "--keywordSearch OCR search finish--";
return resultCount;
} catch(const Xapian::Error &e) {
qWarning() << QString::fromStdString(e.get_description());
qDebug() << "--keywordSearch OCR search finish--";
return -1;
}
}
int OcrSearch::getResult(Xapian::MSet &result, std::string &keyWord) {
for(auto it = result.begin(); it != result.end(); ++it) {
Xapian::Document doc = it.get_document();
std::string data = doc.get_data();
QString path = QString::fromStdString(doc.get_value(1));
SearchPluginIface::ResultInfo ri;
if(!SearchManager::creatResultInfo(ri, path)) {
continue;
}
// Construct snippets containing keyword.
auto term = doc.termlist_begin();
std::string wordTobeFound = QString::fromStdString(keyWord).section(" ", 0, 0).toStdString();
term.skip_to(wordTobeFound);
//fix me: make a snippet without cut cjk char.
auto pos = term.positionlist_begin();
QString snippet = FileUtils::getSnippet(data, *pos, QString::fromStdString(keyWord).remove(" "));
ri.description.prepend(SearchPluginIface::DescriptionInfo{"", FileUtils::getHtmlText(snippet, QString::fromStdString(keyWord).remove(" "))});
QString().swap(snippet);
std::string().swap(data);
SearchManager::m_mutexOcr.lock();
if(m_uniqueSymbol == SearchManager::uniqueSymbolOcr) {
m_search_result->enqueue(ri);
SearchManager::m_mutexOcr.unlock();
} else {
SearchManager::m_mutexOcr.unlock();
return -1;
}
}
return 0;
}
DirectSearch::DirectSearch(QString keyword, DataQueue<SearchPluginIface::ResultInfo> *searchResult, QString value, size_t uniqueSymbol) {
this->setAutoDelete(true);
m_keyword = keyword;
m_keyword = std::move(keyword);
m_searchResult = searchResult;
m_uniqueSymbol = uniqueSymbol;
m_value = value;
m_value = std::move(value);
}
void DirectSearch::run() {
@ -534,13 +418,13 @@ void DirectSearch::run() {
for (auto i : list) {
if (i.isDir() && (!(i.isSymLink()))) {
bool findIndex = false;
for (QString j : blockList) {
for (const QString& j : blockList) {
if (FileUtils::isOrUnder(i.absoluteFilePath(), j)) {
findIndex = true;
break;
}
}
if (findIndex == true) {
if (findIndex) {
qDebug() << "path is blocked:" << i.absoluteFilePath();
continue;
}
@ -586,13 +470,4 @@ bool FileContentMatchDecider::operator ()(const Xapian::Document &doc) const
return false;
}
return true;
}
bool OcrMatchDecider::operator ()(const Xapian::Document &doc) const
{
QString path = QString::fromStdString(doc.get_value(1));
if(SearchManager::isBlocked(path)) {
return false;
}
return true;
}
}

View File

@ -63,7 +63,7 @@
#define INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/index_data").toStdString()
#define CONTENT_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/content_index_data").toStdString()
#define OCR_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/ocr_index_data").toStdString()
#define OCR_CONTENT_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/ocr_content_index_data").toStdString()
namespace UkuiSearch {
class FileMatchDecider;
class FileContentMatchDecider;
@ -81,7 +81,7 @@ public:
explicit SearchManager(QObject *parent = nullptr);
~SearchManager();
static int getCurrentIndexCount();
static uint getCurrentIndexCount();
static size_t uniqueSymbolFile;
static size_t uniqueSymbolDir;
@ -94,7 +94,7 @@ public:
private:
static bool isBlocked(QString &path);
static bool creatResultInfo(UkuiSearch::SearchPluginIface::ResultInfo &ri, QString path);
static bool creatResultInfo(UkuiSearch::SearchPluginIface::ResultInfo &ri, const QString& path);
};
class FileSearch : public QRunnable {
@ -104,7 +104,7 @@ public:
protected:
void run();
private:
int keywordSearchfile();
uint keywordSearchFile();
Xapian::Query creatQueryForFileSearch();
int getResult(Xapian::MSet &result);
@ -125,7 +125,7 @@ public:
protected:
void run();
private:
int keywordSearchContent();
uint keywordSearchContent();
int getResult(Xapian::MSet &result, std::string &keyWord);
DataQueue<SearchPluginIface::ResultInfo> *m_search_result = nullptr;
@ -137,24 +137,6 @@ private:
int m_num = 20;
};
class OcrSearch : public QRunnable {
public:
explicit OcrSearch(DataQueue<SearchPluginIface::ResultInfo> *searchResult, size_t uniqueSymbol, QString keyword, int begin = 0, int num = 20);
~OcrSearch();
protected:
void run();
private:
int keywordSearchOcr();
int getResult(Xapian::MSet &result, std::string &keyWord);
DataQueue<SearchPluginIface::ResultInfo> *m_search_result = nullptr;
OcrMatchDecider *m_matchDecider;
size_t m_uniqueSymbol;
QString m_keyword;
int m_begin = 0;
int m_num = 20;
};
class DirectSearch : public QRunnable {
public:
explicit DirectSearch(QString keyword, DataQueue<SearchPluginIface::ResultInfo> *searchResult, QString value, size_t uniqueSymbol);
@ -176,10 +158,6 @@ class FileContentMatchDecider : public Xapian::MatchDecider {
public:
bool operator ()(const Xapian::Document &doc) const;
};
class OcrMatchDecider : public Xapian::MatchDecider {
public:
bool operator ()(const Xapian::Document &doc) const;
};
}
#endif // SEARCHMANAGER_H

View File

@ -29,6 +29,7 @@ using namespace UkuiSearch;
static QMutex g_basicDatabaseMutex;
static QMutex g_contentDatabaseMutex;
static QMutex g_ocrContentDatabaseMutex;
#define DATABASE_TRY(code) try { \
code; \
@ -43,16 +44,19 @@ WritableDatabase::WritableDatabase(const DataBaseType &type)
: m_type(type)
{
switch (type) {
case DataBaseType::Basic:
m_path = INDEX_PATH;
m_mutex = &g_basicDatabaseMutex;
break;
case DataBaseType::Content:
m_path = CONTENT_INDEX_PATH;
m_mutex = &g_contentDatabaseMutex;
break;
default:
break;
case DataBaseType::Basic:
m_path = INDEX_PATH;
m_mutex = &g_basicDatabaseMutex;
break;
case DataBaseType::Content:
m_path = CONTENT_INDEX_PATH;
m_mutex = &g_contentDatabaseMutex;
break;
case DataBaseType::OcrContent:
m_path = OCR_CONTENT_INDEX_PATH;
m_mutex = &g_ocrContentDatabaseMutex;
default:
break;
}
m_mutex->lock();
}
@ -100,14 +104,16 @@ void WritableDatabase::rebuild()
//更新版本号
switch (m_type) {
case DataBaseType::Basic:
IndexStatusRecorder::getInstance()->setVersion(INDEX_DATABASE_VERSION_KEY, INDEX_DATABASE_VERSION);
break;
case DataBaseType::Content:
IndexStatusRecorder::getInstance()->setVersion(CONTENT_DATABASE_VERSION_KEY, CONTENT_DATABASE_VERSION);
break;
default:
break;
case DataBaseType::Basic:
IndexStatusRecorder::getInstance()->setVersion(INDEX_DATABASE_VERSION_KEY, INDEX_DATABASE_VERSION);
break;
case DataBaseType::Content:
IndexStatusRecorder::getInstance()->setVersion(CONTENT_DATABASE_VERSION_KEY, CONTENT_DATABASE_VERSION);
break;
case DataBaseType::OcrContent:
IndexStatusRecorder::getInstance()->setVersion(OCR_CONTENT_DATABASE_VERSION_KEY, OCR_CONTENT_DATABASE_VERSION);
default:
break;
}
}
@ -184,14 +190,17 @@ QMap<std::string, std::string> WritableDatabase::getIndexTimes()
void WritableDatabase::errorRecord()
{
switch (m_type) {
case DataBaseType::Basic:
IndexStatusRecorder::getInstance()->setStatus(INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Error);
break;
case DataBaseType::Content:
IndexStatusRecorder::getInstance()->setStatus(CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Error);
break;
default:
break;
case DataBaseType::Basic:
IndexStatusRecorder::getInstance()->setStatus(INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Error);
break;
case DataBaseType::Content:
IndexStatusRecorder::getInstance()->setStatus(CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Error);
break;
case DataBaseType::OcrContent:
IndexStatusRecorder::getInstance()->setStatus(OCR_CONTENT_INDEX_DATABASE_STATE_KEY, IndexStatusRecorder::State::Error);
break;
default:
break;
}
}